xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision c306cdb0)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "s390x-internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 #include "trace.h"
31 
32 #if !defined(CONFIG_USER_ONLY)
33 #include "hw/s390x/storage-keys.h"
34 #include "hw/boards.h"
35 #endif
36 
37 /*****************************************************************************/
38 /* Softmmu support */
39 
40 /* #define DEBUG_HELPER */
41 #ifdef DEBUG_HELPER
42 #define HELPER_LOG(x...) qemu_log(x)
43 #else
44 #define HELPER_LOG(x...)
45 #endif
46 
47 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
48 {
49     uint16_t pkm = env->cregs[3] >> 16;
50 
51     if (env->psw.mask & PSW_MASK_PSTATE) {
52         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
53         return pkm & (0x80 >> psw_key);
54     }
55     return true;
56 }
57 
58 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
59                                    uint64_t src, uint32_t len)
60 {
61     if (!len || src == dest) {
62         return false;
63     }
64     /* Take care of wrapping at the end of address space. */
65     if (unlikely(wrap_address(env, src + len - 1) < src)) {
66         return dest > src || dest <= wrap_address(env, src + len - 1);
67     }
68     return dest > src && dest <= src + len - 1;
69 }
70 
71 /* Trigger a SPECIFICATION exception if an address or a length is not
72    naturally aligned.  */
73 static inline void check_alignment(CPUS390XState *env, uint64_t v,
74                                    int wordsize, uintptr_t ra)
75 {
76     if (v % wordsize) {
77         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
78     }
79 }
80 
81 /* Load a value from memory according to its size.  */
82 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
83                                            int wordsize, uintptr_t ra)
84 {
85     switch (wordsize) {
86     case 1:
87         return cpu_ldub_data_ra(env, addr, ra);
88     case 2:
89         return cpu_lduw_data_ra(env, addr, ra);
90     default:
91         abort();
92     }
93 }
94 
95 /* Store a to memory according to its size.  */
96 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
97                                       uint64_t value, int wordsize,
98                                       uintptr_t ra)
99 {
100     switch (wordsize) {
101     case 1:
102         cpu_stb_data_ra(env, addr, value, ra);
103         break;
104     case 2:
105         cpu_stw_data_ra(env, addr, value, ra);
106         break;
107     default:
108         abort();
109     }
110 }
111 
112 /* An access covers at most 4096 bytes and therefore at most two pages. */
113 typedef struct S390Access {
114     target_ulong vaddr1;
115     target_ulong vaddr2;
116     char *haddr1;
117     char *haddr2;
118     uint16_t size1;
119     uint16_t size2;
120     /*
121      * If we can't access the host page directly, we'll have to do I/O access
122      * via ld/st helpers. These are internal details, so we store the
123      * mmu idx to do the access here instead of passing it around in the
124      * helpers. Maybe, one day we can get rid of ld/st access - once we can
125      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
126      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
127      * pages, we might trigger a new MMU translation - very unlikely that
128      * the mapping changes in between and we would trigger a fault.
129      */
130     int mmu_idx;
131 } S390Access;
132 
133 /*
134  * With nonfault=1, return the PGM_ exception that would have been injected
135  * into the guest; return 0 if no exception was detected.
136  *
137  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
138  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
139  */
140 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
141                              MMUAccessType access_type, int mmu_idx,
142                              bool nonfault, void **phost, uintptr_t ra)
143 {
144     int flags;
145 
146 #if defined(CONFIG_USER_ONLY)
147     flags = page_get_flags(addr);
148     if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
149         env->__excp_addr = addr;
150         flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
151         if (nonfault) {
152             return flags;
153         }
154         tcg_s390_program_interrupt(env, flags, ra);
155     }
156     *phost = g2h(env_cpu(env), addr);
157 #else
158     /*
159      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
160      * to detect if there was an exception during tlb_fill().
161      */
162     env->tlb_fill_exc = 0;
163     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
164                                ra);
165     if (env->tlb_fill_exc) {
166         return env->tlb_fill_exc;
167     }
168 
169     if (unlikely(flags & TLB_WATCHPOINT)) {
170         /* S390 does not presently use transaction attributes. */
171         cpu_check_watchpoint(env_cpu(env), addr, size,
172                              MEMTXATTRS_UNSPECIFIED,
173                              (access_type == MMU_DATA_STORE
174                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
175     }
176 #endif
177     return 0;
178 }
179 
180 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
181                              bool nonfault, vaddr vaddr1, int size,
182                              MMUAccessType access_type,
183                              int mmu_idx, uintptr_t ra)
184 {
185     void *haddr1, *haddr2 = NULL;
186     int size1, size2, exc;
187     vaddr vaddr2 = 0;
188 
189     assert(size > 0 && size <= 4096);
190 
191     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
192     size2 = size - size1;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &haddr1, ra);
196     if (exc) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr2 = wrap_address(env, vaddr1 + size1);
202         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
203                                 nonfault, &haddr2, ra);
204         if (exc) {
205             return exc;
206         }
207     }
208 
209     *access = (S390Access) {
210         .vaddr1 = vaddr1,
211         .vaddr2 = vaddr2,
212         .haddr1 = haddr1,
213         .haddr2 = haddr2,
214         .size1 = size1,
215         .size2 = size2,
216         .mmu_idx = mmu_idx
217     };
218     return 0;
219 }
220 
221 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
222                                  MMUAccessType access_type, int mmu_idx,
223                                  uintptr_t ra)
224 {
225     S390Access ret;
226     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
227                                 access_type, mmu_idx, ra);
228     assert(!exc);
229     return ret;
230 }
231 
232 /* Helper to handle memset on a single page. */
233 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
234                              uint8_t byte, uint16_t size, int mmu_idx,
235                              uintptr_t ra)
236 {
237 #ifdef CONFIG_USER_ONLY
238     g_assert(haddr);
239     memset(haddr, byte, size);
240 #else
241     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
242     int i;
243 
244     if (likely(haddr)) {
245         memset(haddr, byte, size);
246     } else {
247         /*
248          * Do a single access and test if we can then get access to the
249          * page. This is especially relevant to speed up TLB_NOTDIRTY.
250          */
251         g_assert(size > 0);
252         cpu_stb_mmu(env, vaddr, byte, oi, ra);
253         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
254         if (likely(haddr)) {
255             memset(haddr + 1, byte, size - 1);
256         } else {
257             for (i = 1; i < size; i++) {
258                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
259             }
260         }
261     }
262 #endif
263 }
264 
265 static void access_memset(CPUS390XState *env, S390Access *desta,
266                           uint8_t byte, uintptr_t ra)
267 {
268 
269     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
270                      desta->mmu_idx, ra);
271     if (likely(!desta->size2)) {
272         return;
273     }
274     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
275                      desta->mmu_idx, ra);
276 }
277 
278 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
279                                   int offset, int mmu_idx, uintptr_t ra)
280 {
281 #ifdef CONFIG_USER_ONLY
282     return ldub_p(*haddr + offset);
283 #else
284     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
285     uint8_t byte;
286 
287     if (likely(*haddr)) {
288         return ldub_p(*haddr + offset);
289     }
290     /*
291      * Do a single access and test if we can then get access to the
292      * page. This is especially relevant to speed up TLB_NOTDIRTY.
293      */
294     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
295     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
296     return byte;
297 #endif
298 }
299 
300 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
301                                int offset, uintptr_t ra)
302 {
303     if (offset < access->size1) {
304         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
305                                   offset, access->mmu_idx, ra);
306     }
307     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
308                               offset - access->size1, access->mmu_idx, ra);
309 }
310 
311 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
312                                int offset, uint8_t byte, int mmu_idx,
313                                uintptr_t ra)
314 {
315 #ifdef CONFIG_USER_ONLY
316     stb_p(*haddr + offset, byte);
317 #else
318     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
319 
320     if (likely(*haddr)) {
321         stb_p(*haddr + offset, byte);
322         return;
323     }
324     /*
325      * Do a single access and test if we can then get access to the
326      * page. This is especially relevant to speed up TLB_NOTDIRTY.
327      */
328     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
329     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
330 #endif
331 }
332 
333 static void access_set_byte(CPUS390XState *env, S390Access *access,
334                             int offset, uint8_t byte, uintptr_t ra)
335 {
336     if (offset < access->size1) {
337         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
338                            access->mmu_idx, ra);
339     } else {
340         do_access_set_byte(env, access->vaddr2, &access->haddr2,
341                            offset - access->size1, byte, access->mmu_idx, ra);
342     }
343 }
344 
345 /*
346  * Move data with the same semantics as memmove() in case ranges don't overlap
347  * or src > dest. Undefined behavior on destructive overlaps.
348  */
349 static void access_memmove(CPUS390XState *env, S390Access *desta,
350                            S390Access *srca, uintptr_t ra)
351 {
352     int diff;
353 
354     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
355 
356     /* Fallback to slow access in case we don't have access to all host pages */
357     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
358                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
359         int i;
360 
361         for (i = 0; i < desta->size1 + desta->size2; i++) {
362             uint8_t byte = access_get_byte(env, srca, i, ra);
363 
364             access_set_byte(env, desta, i, byte, ra);
365         }
366         return;
367     }
368 
369     if (srca->size1 == desta->size1) {
370         memmove(desta->haddr1, srca->haddr1, srca->size1);
371         if (unlikely(srca->size2)) {
372             memmove(desta->haddr2, srca->haddr2, srca->size2);
373         }
374     } else if (srca->size1 < desta->size1) {
375         diff = desta->size1 - srca->size1;
376         memmove(desta->haddr1, srca->haddr1, srca->size1);
377         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
378         if (likely(desta->size2)) {
379             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
380         }
381     } else {
382         diff = srca->size1 - desta->size1;
383         memmove(desta->haddr1, srca->haddr1, desta->size1);
384         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
385         if (likely(srca->size2)) {
386             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
387         }
388     }
389 }
390 
391 static int mmu_idx_from_as(uint8_t as)
392 {
393     switch (as) {
394     case AS_PRIMARY:
395         return MMU_PRIMARY_IDX;
396     case AS_SECONDARY:
397         return MMU_SECONDARY_IDX;
398     case AS_HOME:
399         return MMU_HOME_IDX;
400     default:
401         /* FIXME AS_ACCREG */
402         g_assert_not_reached();
403     }
404 }
405 
406 /* and on array */
407 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
408                              uint64_t src, uintptr_t ra)
409 {
410     const int mmu_idx = cpu_mmu_index(env, false);
411     S390Access srca1, srca2, desta;
412     uint32_t i;
413     uint8_t c = 0;
414 
415     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
416                __func__, l, dest, src);
417 
418     /* NC always processes one more byte than specified - maximum is 256 */
419     l++;
420 
421     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
422     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
423     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
424     for (i = 0; i < l; i++) {
425         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
426                           access_get_byte(env, &srca2, i, ra);
427 
428         c |= x;
429         access_set_byte(env, &desta, i, x, ra);
430     }
431     return c != 0;
432 }
433 
434 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
435                     uint64_t src)
436 {
437     return do_helper_nc(env, l, dest, src, GETPC());
438 }
439 
440 /* xor on array */
441 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
442                              uint64_t src, uintptr_t ra)
443 {
444     const int mmu_idx = cpu_mmu_index(env, false);
445     S390Access srca1, srca2, desta;
446     uint32_t i;
447     uint8_t c = 0;
448 
449     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
450                __func__, l, dest, src);
451 
452     /* XC always processes one more byte than specified - maximum is 256 */
453     l++;
454 
455     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
456     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
457     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
458 
459     /* xor with itself is the same as memset(0) */
460     if (src == dest) {
461         access_memset(env, &desta, 0, ra);
462         return 0;
463     }
464 
465     for (i = 0; i < l; i++) {
466         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
467                           access_get_byte(env, &srca2, i, ra);
468 
469         c |= x;
470         access_set_byte(env, &desta, i, x, ra);
471     }
472     return c != 0;
473 }
474 
475 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
476                     uint64_t src)
477 {
478     return do_helper_xc(env, l, dest, src, GETPC());
479 }
480 
481 /* or on array */
482 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
483                              uint64_t src, uintptr_t ra)
484 {
485     const int mmu_idx = cpu_mmu_index(env, false);
486     S390Access srca1, srca2, desta;
487     uint32_t i;
488     uint8_t c = 0;
489 
490     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
491                __func__, l, dest, src);
492 
493     /* OC always processes one more byte than specified - maximum is 256 */
494     l++;
495 
496     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
497     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
498     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
499     for (i = 0; i < l; i++) {
500         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
501                           access_get_byte(env, &srca2, i, ra);
502 
503         c |= x;
504         access_set_byte(env, &desta, i, x, ra);
505     }
506     return c != 0;
507 }
508 
509 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
510                     uint64_t src)
511 {
512     return do_helper_oc(env, l, dest, src, GETPC());
513 }
514 
515 /* memmove */
516 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
517                               uint64_t src, uintptr_t ra)
518 {
519     const int mmu_idx = cpu_mmu_index(env, false);
520     S390Access srca, desta;
521     uint32_t i;
522 
523     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
524                __func__, l, dest, src);
525 
526     /* MVC always copies one more byte than specified - maximum is 256 */
527     l++;
528 
529     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
530     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
531 
532     /*
533      * "When the operands overlap, the result is obtained as if the operands
534      * were processed one byte at a time". Only non-destructive overlaps
535      * behave like memmove().
536      */
537     if (dest == src + 1) {
538         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
539     } else if (!is_destructive_overlap(env, dest, src, l)) {
540         access_memmove(env, &desta, &srca, ra);
541     } else {
542         for (i = 0; i < l; i++) {
543             uint8_t byte = access_get_byte(env, &srca, i, ra);
544 
545             access_set_byte(env, &desta, i, byte, ra);
546         }
547     }
548 
549     return env->cc_op;
550 }
551 
552 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
553 {
554     do_helper_mvc(env, l, dest, src, GETPC());
555 }
556 
557 /* move inverse  */
558 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
559 {
560     const int mmu_idx = cpu_mmu_index(env, false);
561     S390Access srca, desta;
562     uintptr_t ra = GETPC();
563     int i;
564 
565     /* MVCIN always copies one more byte than specified - maximum is 256 */
566     l++;
567 
568     src = wrap_address(env, src - l + 1);
569     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
570     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
571     for (i = 0; i < l; i++) {
572         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
573 
574         access_set_byte(env, &desta, i, x, ra);
575     }
576 }
577 
578 /* move numerics  */
579 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
580 {
581     const int mmu_idx = cpu_mmu_index(env, false);
582     S390Access srca1, srca2, desta;
583     uintptr_t ra = GETPC();
584     int i;
585 
586     /* MVN always copies one more byte than specified - maximum is 256 */
587     l++;
588 
589     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
590     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
591     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
592     for (i = 0; i < l; i++) {
593         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
594                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
595 
596         access_set_byte(env, &desta, i, x, ra);
597     }
598 }
599 
600 /* move with offset  */
601 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
602 {
603     const int mmu_idx = cpu_mmu_index(env, false);
604     /* MVO always processes one more byte than specified - maximum is 16 */
605     const int len_dest = (l >> 4) + 1;
606     const int len_src = (l & 0xf) + 1;
607     uintptr_t ra = GETPC();
608     uint8_t byte_dest, byte_src;
609     S390Access srca, desta;
610     int i, j;
611 
612     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
613     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
614 
615     /* Handle rightmost byte */
616     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
617     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
618     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
619     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
620 
621     /* Process remaining bytes from right to left */
622     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
623         byte_dest = byte_src >> 4;
624         if (j >= 0) {
625             byte_src = access_get_byte(env, &srca, j, ra);
626         } else {
627             byte_src = 0;
628         }
629         byte_dest |= byte_src << 4;
630         access_set_byte(env, &desta, i, byte_dest, ra);
631     }
632 }
633 
634 /* move zones  */
635 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
636 {
637     const int mmu_idx = cpu_mmu_index(env, false);
638     S390Access srca1, srca2, desta;
639     uintptr_t ra = GETPC();
640     int i;
641 
642     /* MVZ always copies one more byte than specified - maximum is 256 */
643     l++;
644 
645     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
646     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
647     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
648     for (i = 0; i < l; i++) {
649         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
650                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
651 
652         access_set_byte(env, &desta, i, x, ra);
653     }
654 }
655 
656 /* compare unsigned byte arrays */
657 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
658                               uint64_t s2, uintptr_t ra)
659 {
660     uint32_t i;
661     uint32_t cc = 0;
662 
663     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
664                __func__, l, s1, s2);
665 
666     for (i = 0; i <= l; i++) {
667         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
668         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
669         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
670         if (x < y) {
671             cc = 1;
672             break;
673         } else if (x > y) {
674             cc = 2;
675             break;
676         }
677     }
678 
679     HELPER_LOG("\n");
680     return cc;
681 }
682 
683 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
684 {
685     return do_helper_clc(env, l, s1, s2, GETPC());
686 }
687 
688 /* compare logical under mask */
689 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
690                      uint64_t addr)
691 {
692     uintptr_t ra = GETPC();
693     uint32_t cc = 0;
694 
695     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
696                mask, addr);
697 
698     while (mask) {
699         if (mask & 8) {
700             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
701             uint8_t r = extract32(r1, 24, 8);
702             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
703                        addr);
704             if (r < d) {
705                 cc = 1;
706                 break;
707             } else if (r > d) {
708                 cc = 2;
709                 break;
710             }
711             addr++;
712         }
713         mask = (mask << 1) & 0xf;
714         r1 <<= 8;
715     }
716 
717     HELPER_LOG("\n");
718     return cc;
719 }
720 
721 static inline uint64_t get_address(CPUS390XState *env, int reg)
722 {
723     return wrap_address(env, env->regs[reg]);
724 }
725 
726 /*
727  * Store the address to the given register, zeroing out unused leftmost
728  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
729  */
730 static inline void set_address_zero(CPUS390XState *env, int reg,
731                                     uint64_t address)
732 {
733     if (env->psw.mask & PSW_MASK_64) {
734         env->regs[reg] = address;
735     } else {
736         if (!(env->psw.mask & PSW_MASK_32)) {
737             address &= 0x00ffffff;
738         } else {
739             address &= 0x7fffffff;
740         }
741         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
742     }
743 }
744 
745 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
746 {
747     if (env->psw.mask & PSW_MASK_64) {
748         /* 64-Bit mode */
749         env->regs[reg] = address;
750     } else {
751         if (!(env->psw.mask & PSW_MASK_32)) {
752             /* 24-Bit mode. According to the PoO it is implementation
753             dependent if bits 32-39 remain unchanged or are set to
754             zeros.  Choose the former so that the function can also be
755             used for TRT.  */
756             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
757         } else {
758             /* 31-Bit mode. According to the PoO it is implementation
759             dependent if bit 32 remains unchanged or is set to zero.
760             Choose the latter so that the function can also be used for
761             TRT.  */
762             address &= 0x7fffffff;
763             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
764         }
765     }
766 }
767 
768 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
769 {
770     if (!(env->psw.mask & PSW_MASK_64)) {
771         return (uint32_t)length;
772     }
773     return length;
774 }
775 
776 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
777 {
778     if (!(env->psw.mask & PSW_MASK_64)) {
779         /* 24-Bit and 31-Bit mode */
780         length &= 0x7fffffff;
781     }
782     return length;
783 }
784 
785 static inline uint64_t get_length(CPUS390XState *env, int reg)
786 {
787     return wrap_length31(env, env->regs[reg]);
788 }
789 
790 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
791 {
792     if (env->psw.mask & PSW_MASK_64) {
793         /* 64-Bit mode */
794         env->regs[reg] = length;
795     } else {
796         /* 24-Bit and 31-Bit mode */
797         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
798     }
799 }
800 
801 /* search string (c is byte to search, r2 is string, r1 end of string) */
802 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
803 {
804     uintptr_t ra = GETPC();
805     uint64_t end, str;
806     uint32_t len;
807     uint8_t v, c = env->regs[0];
808 
809     /* Bits 32-55 must contain all 0.  */
810     if (env->regs[0] & 0xffffff00u) {
811         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
812     }
813 
814     str = get_address(env, r2);
815     end = get_address(env, r1);
816 
817     /* Lest we fail to service interrupts in a timely manner, limit the
818        amount of work we're willing to do.  For now, let's cap at 8k.  */
819     for (len = 0; len < 0x2000; ++len) {
820         if (str + len == end) {
821             /* Character not found.  R1 & R2 are unmodified.  */
822             env->cc_op = 2;
823             return;
824         }
825         v = cpu_ldub_data_ra(env, str + len, ra);
826         if (v == c) {
827             /* Character found.  Set R1 to the location; R2 is unmodified.  */
828             env->cc_op = 1;
829             set_address(env, r1, str + len);
830             return;
831         }
832     }
833 
834     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
835     env->cc_op = 3;
836     set_address(env, r2, str + len);
837 }
838 
839 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
840 {
841     uintptr_t ra = GETPC();
842     uint32_t len;
843     uint16_t v, c = env->regs[0];
844     uint64_t end, str, adj_end;
845 
846     /* Bits 32-47 of R0 must be zero.  */
847     if (env->regs[0] & 0xffff0000u) {
848         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
849     }
850 
851     str = get_address(env, r2);
852     end = get_address(env, r1);
853 
854     /* If the LSB of the two addresses differ, use one extra byte.  */
855     adj_end = end + ((str ^ end) & 1);
856 
857     /* Lest we fail to service interrupts in a timely manner, limit the
858        amount of work we're willing to do.  For now, let's cap at 8k.  */
859     for (len = 0; len < 0x2000; len += 2) {
860         if (str + len == adj_end) {
861             /* End of input found.  */
862             env->cc_op = 2;
863             return;
864         }
865         v = cpu_lduw_data_ra(env, str + len, ra);
866         if (v == c) {
867             /* Character found.  Set R1 to the location; R2 is unmodified.  */
868             env->cc_op = 1;
869             set_address(env, r1, str + len);
870             return;
871         }
872     }
873 
874     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
875     env->cc_op = 3;
876     set_address(env, r2, str + len);
877 }
878 
879 /* unsigned string compare (c is string terminator) */
880 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
881 {
882     uintptr_t ra = GETPC();
883     uint32_t len;
884 
885     c = c & 0xff;
886     s1 = wrap_address(env, s1);
887     s2 = wrap_address(env, s2);
888 
889     /* Lest we fail to service interrupts in a timely manner, limit the
890        amount of work we're willing to do.  For now, let's cap at 8k.  */
891     for (len = 0; len < 0x2000; ++len) {
892         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
893         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
894         if (v1 == v2) {
895             if (v1 == c) {
896                 /* Equal.  CC=0, and don't advance the registers.  */
897                 env->cc_op = 0;
898                 env->retxl = s2;
899                 return s1;
900             }
901         } else {
902             /* Unequal.  CC={1,2}, and advance the registers.  Note that
903                the terminator need not be zero, but the string that contains
904                the terminator is by definition "low".  */
905             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
906             env->retxl = s2 + len;
907             return s1 + len;
908         }
909     }
910 
911     /* CPU-determined bytes equal; advance the registers.  */
912     env->cc_op = 3;
913     env->retxl = s2 + len;
914     return s1 + len;
915 }
916 
917 /* move page */
918 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
919 {
920     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
921     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
922     const int mmu_idx = cpu_mmu_index(env, false);
923     const bool f = extract64(r0, 11, 1);
924     const bool s = extract64(r0, 10, 1);
925     const bool cco = extract64(r0, 8, 1);
926     uintptr_t ra = GETPC();
927     S390Access srca, desta;
928     int exc;
929 
930     if ((f && s) || extract64(r0, 12, 4)) {
931         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
932     }
933 
934     /*
935      * We always manually handle exceptions such that we can properly store
936      * r1/r2 to the lowcore on page-translation exceptions.
937      *
938      * TODO: Access key handling
939      */
940     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
941                             MMU_DATA_LOAD, mmu_idx, ra);
942     if (exc) {
943         if (cco) {
944             return 2;
945         }
946         goto inject_exc;
947     }
948     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
949                             MMU_DATA_STORE, mmu_idx, ra);
950     if (exc) {
951         if (cco && exc != PGM_PROTECTION) {
952             return 1;
953         }
954         goto inject_exc;
955     }
956     access_memmove(env, &desta, &srca, ra);
957     return 0; /* data moved */
958 inject_exc:
959 #if !defined(CONFIG_USER_ONLY)
960     if (exc != PGM_ADDRESSING) {
961         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
962                  env->tlb_fill_tec);
963     }
964     if (exc == PGM_PAGE_TRANS) {
965         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
966                  r1 << 4 | r2);
967     }
968 #endif
969     tcg_s390_program_interrupt(env, exc, ra);
970 }
971 
972 /* string copy */
973 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
974 {
975     const int mmu_idx = cpu_mmu_index(env, false);
976     const uint64_t d = get_address(env, r1);
977     const uint64_t s = get_address(env, r2);
978     const uint8_t c = env->regs[0];
979     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
980     S390Access srca, desta;
981     uintptr_t ra = GETPC();
982     int i;
983 
984     if (env->regs[0] & 0xffffff00ull) {
985         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
986     }
987 
988     /*
989      * Our access should not exceed single pages, as we must not report access
990      * exceptions exceeding the actually copied range (which we don't know at
991      * this point). We might over-indicate watchpoints within the pages
992      * (if we ever care, we have to limit processing to a single byte).
993      */
994     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
995     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
996     for (i = 0; i < len; i++) {
997         const uint8_t v = access_get_byte(env, &srca, i, ra);
998 
999         access_set_byte(env, &desta, i, v, ra);
1000         if (v == c) {
1001             set_address_zero(env, r1, d + i);
1002             return 1;
1003         }
1004     }
1005     set_address_zero(env, r1, d + len);
1006     set_address_zero(env, r2, s + len);
1007     return 3;
1008 }
1009 
1010 /* load access registers r1 to r3 from memory at a2 */
1011 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1012 {
1013     uintptr_t ra = GETPC();
1014     int i;
1015 
1016     if (a2 & 0x3) {
1017         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1018     }
1019 
1020     for (i = r1;; i = (i + 1) % 16) {
1021         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1022         a2 += 4;
1023 
1024         if (i == r3) {
1025             break;
1026         }
1027     }
1028 }
1029 
1030 /* store access registers r1 to r3 in memory at a2 */
1031 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1032 {
1033     uintptr_t ra = GETPC();
1034     int i;
1035 
1036     if (a2 & 0x3) {
1037         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1038     }
1039 
1040     for (i = r1;; i = (i + 1) % 16) {
1041         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1042         a2 += 4;
1043 
1044         if (i == r3) {
1045             break;
1046         }
1047     }
1048 }
1049 
1050 /* move long helper */
1051 static inline uint32_t do_mvcl(CPUS390XState *env,
1052                                uint64_t *dest, uint64_t *destlen,
1053                                uint64_t *src, uint64_t *srclen,
1054                                uint16_t pad, int wordsize, uintptr_t ra)
1055 {
1056     const int mmu_idx = cpu_mmu_index(env, false);
1057     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1058     S390Access srca, desta;
1059     int i, cc;
1060 
1061     if (*destlen == *srclen) {
1062         cc = 0;
1063     } else if (*destlen < *srclen) {
1064         cc = 1;
1065     } else {
1066         cc = 2;
1067     }
1068 
1069     if (!*destlen) {
1070         return cc;
1071     }
1072 
1073     /*
1074      * Only perform one type of type of operation (move/pad) at a time.
1075      * Stay within single pages.
1076      */
1077     if (*srclen) {
1078         /* Copy the src array */
1079         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1080         *destlen -= len;
1081         *srclen -= len;
1082         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1083         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084         access_memmove(env, &desta, &srca, ra);
1085         *src = wrap_address(env, *src + len);
1086         *dest = wrap_address(env, *dest + len);
1087     } else if (wordsize == 1) {
1088         /* Pad the remaining area */
1089         *destlen -= len;
1090         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091         access_memset(env, &desta, pad, ra);
1092         *dest = wrap_address(env, *dest + len);
1093     } else {
1094         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095 
1096         /* The remaining length selects the padding byte. */
1097         for (i = 0; i < len; (*destlen)--, i++) {
1098             if (*destlen & 1) {
1099                 access_set_byte(env, &desta, i, pad, ra);
1100             } else {
1101                 access_set_byte(env, &desta, i, pad >> 8, ra);
1102             }
1103         }
1104         *dest = wrap_address(env, *dest + len);
1105     }
1106 
1107     return *destlen ? 3 : cc;
1108 }
1109 
1110 /* move long */
1111 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1112 {
1113     const int mmu_idx = cpu_mmu_index(env, false);
1114     uintptr_t ra = GETPC();
1115     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1116     uint64_t dest = get_address(env, r1);
1117     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1118     uint64_t src = get_address(env, r2);
1119     uint8_t pad = env->regs[r2 + 1] >> 24;
1120     CPUState *cs = env_cpu(env);
1121     S390Access srca, desta;
1122     uint32_t cc, cur_len;
1123 
1124     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1125         cc = 3;
1126     } else if (srclen == destlen) {
1127         cc = 0;
1128     } else if (destlen < srclen) {
1129         cc = 1;
1130     } else {
1131         cc = 2;
1132     }
1133 
1134     /* We might have to zero-out some bits even if there was no action. */
1135     if (unlikely(!destlen || cc == 3)) {
1136         set_address_zero(env, r2, src);
1137         set_address_zero(env, r1, dest);
1138         return cc;
1139     } else if (!srclen) {
1140         set_address_zero(env, r2, src);
1141     }
1142 
1143     /*
1144      * Only perform one type of type of operation (move/pad) in one step.
1145      * Stay within single pages.
1146      */
1147     while (destlen) {
1148         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1149         if (!srclen) {
1150             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                    ra);
1152             access_memset(env, &desta, pad, ra);
1153         } else {
1154             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1155 
1156             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1157                                   ra);
1158             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1159                                    ra);
1160             access_memmove(env, &desta, &srca, ra);
1161             src = wrap_address(env, src + cur_len);
1162             srclen -= cur_len;
1163             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1164             set_address_zero(env, r2, src);
1165         }
1166         dest = wrap_address(env, dest + cur_len);
1167         destlen -= cur_len;
1168         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1169         set_address_zero(env, r1, dest);
1170 
1171         /*
1172          * MVCL is interruptible. Return to the main loop if requested after
1173          * writing back all state to registers. If no interrupt will get
1174          * injected, we'll end up back in this handler and continue processing
1175          * the remaining parts.
1176          */
1177         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1178             cpu_loop_exit_restore(cs, ra);
1179         }
1180     }
1181     return cc;
1182 }
1183 
1184 /* move long extended */
1185 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1186                        uint32_t r3)
1187 {
1188     uintptr_t ra = GETPC();
1189     uint64_t destlen = get_length(env, r1 + 1);
1190     uint64_t dest = get_address(env, r1);
1191     uint64_t srclen = get_length(env, r3 + 1);
1192     uint64_t src = get_address(env, r3);
1193     uint8_t pad = a2;
1194     uint32_t cc;
1195 
1196     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1197 
1198     set_length(env, r1 + 1, destlen);
1199     set_length(env, r3 + 1, srclen);
1200     set_address(env, r1, dest);
1201     set_address(env, r3, src);
1202 
1203     return cc;
1204 }
1205 
1206 /* move long unicode */
1207 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1208                        uint32_t r3)
1209 {
1210     uintptr_t ra = GETPC();
1211     uint64_t destlen = get_length(env, r1 + 1);
1212     uint64_t dest = get_address(env, r1);
1213     uint64_t srclen = get_length(env, r3 + 1);
1214     uint64_t src = get_address(env, r3);
1215     uint16_t pad = a2;
1216     uint32_t cc;
1217 
1218     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1219 
1220     set_length(env, r1 + 1, destlen);
1221     set_length(env, r3 + 1, srclen);
1222     set_address(env, r1, dest);
1223     set_address(env, r3, src);
1224 
1225     return cc;
1226 }
1227 
1228 /* compare logical long helper */
1229 static inline uint32_t do_clcl(CPUS390XState *env,
1230                                uint64_t *src1, uint64_t *src1len,
1231                                uint64_t *src3, uint64_t *src3len,
1232                                uint16_t pad, uint64_t limit,
1233                                int wordsize, uintptr_t ra)
1234 {
1235     uint64_t len = MAX(*src1len, *src3len);
1236     uint32_t cc = 0;
1237 
1238     check_alignment(env, *src1len | *src3len, wordsize, ra);
1239 
1240     if (!len) {
1241         return cc;
1242     }
1243 
1244     /* Lest we fail to service interrupts in a timely manner, limit the
1245        amount of work we're willing to do.  */
1246     if (len > limit) {
1247         len = limit;
1248         cc = 3;
1249     }
1250 
1251     for (; len; len -= wordsize) {
1252         uint16_t v1 = pad;
1253         uint16_t v3 = pad;
1254 
1255         if (*src1len) {
1256             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1257         }
1258         if (*src3len) {
1259             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1260         }
1261 
1262         if (v1 != v3) {
1263             cc = (v1 < v3) ? 1 : 2;
1264             break;
1265         }
1266 
1267         if (*src1len) {
1268             *src1 += wordsize;
1269             *src1len -= wordsize;
1270         }
1271         if (*src3len) {
1272             *src3 += wordsize;
1273             *src3len -= wordsize;
1274         }
1275     }
1276 
1277     return cc;
1278 }
1279 
1280 
1281 /* compare logical long */
1282 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1283 {
1284     uintptr_t ra = GETPC();
1285     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1286     uint64_t src1 = get_address(env, r1);
1287     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1288     uint64_t src3 = get_address(env, r2);
1289     uint8_t pad = env->regs[r2 + 1] >> 24;
1290     uint32_t cc;
1291 
1292     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1293 
1294     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1295     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1296     set_address(env, r1, src1);
1297     set_address(env, r2, src3);
1298 
1299     return cc;
1300 }
1301 
1302 /* compare logical long extended memcompare insn with padding */
1303 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1304                        uint32_t r3)
1305 {
1306     uintptr_t ra = GETPC();
1307     uint64_t src1len = get_length(env, r1 + 1);
1308     uint64_t src1 = get_address(env, r1);
1309     uint64_t src3len = get_length(env, r3 + 1);
1310     uint64_t src3 = get_address(env, r3);
1311     uint8_t pad = a2;
1312     uint32_t cc;
1313 
1314     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1315 
1316     set_length(env, r1 + 1, src1len);
1317     set_length(env, r3 + 1, src3len);
1318     set_address(env, r1, src1);
1319     set_address(env, r3, src3);
1320 
1321     return cc;
1322 }
1323 
1324 /* compare logical long unicode memcompare insn with padding */
1325 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1326                        uint32_t r3)
1327 {
1328     uintptr_t ra = GETPC();
1329     uint64_t src1len = get_length(env, r1 + 1);
1330     uint64_t src1 = get_address(env, r1);
1331     uint64_t src3len = get_length(env, r3 + 1);
1332     uint64_t src3 = get_address(env, r3);
1333     uint16_t pad = a2;
1334     uint32_t cc = 0;
1335 
1336     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1337 
1338     set_length(env, r1 + 1, src1len);
1339     set_length(env, r3 + 1, src3len);
1340     set_address(env, r1, src1);
1341     set_address(env, r3, src3);
1342 
1343     return cc;
1344 }
1345 
1346 /* checksum */
1347 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1348                       uint64_t src, uint64_t src_len)
1349 {
1350     uintptr_t ra = GETPC();
1351     uint64_t max_len, len;
1352     uint64_t cksm = (uint32_t)r1;
1353 
1354     /* Lest we fail to service interrupts in a timely manner, limit the
1355        amount of work we're willing to do.  For now, let's cap at 8k.  */
1356     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1357 
1358     /* Process full words as available.  */
1359     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1360         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1361     }
1362 
1363     switch (max_len - len) {
1364     case 1:
1365         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1366         len += 1;
1367         break;
1368     case 2:
1369         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1370         len += 2;
1371         break;
1372     case 3:
1373         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1374         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1375         len += 3;
1376         break;
1377     }
1378 
1379     /* Fold the carry from the checksum.  Note that we can see carry-out
1380        during folding more than once (but probably not more than twice).  */
1381     while (cksm > 0xffffffffull) {
1382         cksm = (uint32_t)cksm + (cksm >> 32);
1383     }
1384 
1385     /* Indicate whether or not we've processed everything.  */
1386     env->cc_op = (len == src_len ? 0 : 3);
1387 
1388     /* Return both cksm and processed length.  */
1389     env->retxl = cksm;
1390     return len;
1391 }
1392 
1393 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1394 {
1395     uintptr_t ra = GETPC();
1396     int len_dest = len >> 4;
1397     int len_src = len & 0xf;
1398     uint8_t b;
1399 
1400     dest += len_dest;
1401     src += len_src;
1402 
1403     /* last byte is special, it only flips the nibbles */
1404     b = cpu_ldub_data_ra(env, src, ra);
1405     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1406     src--;
1407     len_src--;
1408 
1409     /* now pack every value */
1410     while (len_dest > 0) {
1411         b = 0;
1412 
1413         if (len_src >= 0) {
1414             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415             src--;
1416             len_src--;
1417         }
1418         if (len_src >= 0) {
1419             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1420             src--;
1421             len_src--;
1422         }
1423 
1424         len_dest--;
1425         dest--;
1426         cpu_stb_data_ra(env, dest, b, ra);
1427     }
1428 }
1429 
1430 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1431                            uint32_t srclen, int ssize, uintptr_t ra)
1432 {
1433     int i;
1434     /* The destination operand is always 16 bytes long.  */
1435     const int destlen = 16;
1436 
1437     /* The operands are processed from right to left.  */
1438     src += srclen - 1;
1439     dest += destlen - 1;
1440 
1441     for (i = 0; i < destlen; i++) {
1442         uint8_t b = 0;
1443 
1444         /* Start with a positive sign */
1445         if (i == 0) {
1446             b = 0xc;
1447         } else if (srclen > ssize) {
1448             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1449             src -= ssize;
1450             srclen -= ssize;
1451         }
1452 
1453         if (srclen > ssize) {
1454             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1455             src -= ssize;
1456             srclen -= ssize;
1457         }
1458 
1459         cpu_stb_data_ra(env, dest, b, ra);
1460         dest--;
1461     }
1462 }
1463 
1464 
1465 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1466                  uint32_t srclen)
1467 {
1468     do_pkau(env, dest, src, srclen, 1, GETPC());
1469 }
1470 
1471 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                  uint32_t srclen)
1473 {
1474     do_pkau(env, dest, src, srclen, 2, GETPC());
1475 }
1476 
1477 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1478                   uint64_t src)
1479 {
1480     uintptr_t ra = GETPC();
1481     int len_dest = len >> 4;
1482     int len_src = len & 0xf;
1483     uint8_t b;
1484     int second_nibble = 0;
1485 
1486     dest += len_dest;
1487     src += len_src;
1488 
1489     /* last byte is special, it only flips the nibbles */
1490     b = cpu_ldub_data_ra(env, src, ra);
1491     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1492     src--;
1493     len_src--;
1494 
1495     /* now pad every nibble with 0xf0 */
1496 
1497     while (len_dest > 0) {
1498         uint8_t cur_byte = 0;
1499 
1500         if (len_src > 0) {
1501             cur_byte = cpu_ldub_data_ra(env, src, ra);
1502         }
1503 
1504         len_dest--;
1505         dest--;
1506 
1507         /* only advance one nibble at a time */
1508         if (second_nibble) {
1509             cur_byte >>= 4;
1510             len_src--;
1511             src--;
1512         }
1513         second_nibble = !second_nibble;
1514 
1515         /* digit */
1516         cur_byte = (cur_byte & 0xf);
1517         /* zone bits */
1518         cur_byte |= 0xf0;
1519 
1520         cpu_stb_data_ra(env, dest, cur_byte, ra);
1521     }
1522 }
1523 
1524 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1525                                  uint32_t destlen, int dsize, uint64_t src,
1526                                  uintptr_t ra)
1527 {
1528     int i;
1529     uint32_t cc;
1530     uint8_t b;
1531     /* The source operand is always 16 bytes long.  */
1532     const int srclen = 16;
1533 
1534     /* The operands are processed from right to left.  */
1535     src += srclen - 1;
1536     dest += destlen - dsize;
1537 
1538     /* Check for the sign.  */
1539     b = cpu_ldub_data_ra(env, src, ra);
1540     src--;
1541     switch (b & 0xf) {
1542     case 0xa:
1543     case 0xc:
1544     case 0xe ... 0xf:
1545         cc = 0;  /* plus */
1546         break;
1547     case 0xb:
1548     case 0xd:
1549         cc = 1;  /* minus */
1550         break;
1551     default:
1552     case 0x0 ... 0x9:
1553         cc = 3;  /* invalid */
1554         break;
1555     }
1556 
1557     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1558     for (i = 0; i < destlen; i += dsize) {
1559         if (i == (31 * dsize)) {
1560             /* If length is 32/64 bytes, the leftmost byte is 0. */
1561             b = 0;
1562         } else if (i % (2 * dsize)) {
1563             b = cpu_ldub_data_ra(env, src, ra);
1564             src--;
1565         } else {
1566             b >>= 4;
1567         }
1568         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1569         dest -= dsize;
1570     }
1571 
1572     return cc;
1573 }
1574 
1575 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1576                        uint64_t src)
1577 {
1578     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1579 }
1580 
1581 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                        uint64_t src)
1583 {
1584     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1585 }
1586 
1587 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1588 {
1589     uintptr_t ra = GETPC();
1590     uint32_t cc = 0;
1591     int i;
1592 
1593     for (i = 0; i < destlen; i++) {
1594         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1595         /* digit */
1596         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1597 
1598         if (i == (destlen - 1)) {
1599             /* sign */
1600             cc |= (b & 0xf) < 0xa ? 1 : 0;
1601         } else {
1602             /* digit */
1603             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1604         }
1605     }
1606 
1607     return cc;
1608 }
1609 
1610 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1611                              uint64_t trans, uintptr_t ra)
1612 {
1613     uint32_t i;
1614 
1615     for (i = 0; i <= len; i++) {
1616         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1617         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1618         cpu_stb_data_ra(env, array + i, new_byte, ra);
1619     }
1620 
1621     return env->cc_op;
1622 }
1623 
1624 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1625                 uint64_t trans)
1626 {
1627     do_helper_tr(env, len, array, trans, GETPC());
1628 }
1629 
1630 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1631                      uint64_t len, uint64_t trans)
1632 {
1633     uintptr_t ra = GETPC();
1634     uint8_t end = env->regs[0] & 0xff;
1635     uint64_t l = len;
1636     uint64_t i;
1637     uint32_t cc = 0;
1638 
1639     if (!(env->psw.mask & PSW_MASK_64)) {
1640         array &= 0x7fffffff;
1641         l = (uint32_t)l;
1642     }
1643 
1644     /* Lest we fail to service interrupts in a timely manner, limit the
1645        amount of work we're willing to do.  For now, let's cap at 8k.  */
1646     if (l > 0x2000) {
1647         l = 0x2000;
1648         cc = 3;
1649     }
1650 
1651     for (i = 0; i < l; i++) {
1652         uint8_t byte, new_byte;
1653 
1654         byte = cpu_ldub_data_ra(env, array + i, ra);
1655 
1656         if (byte == end) {
1657             cc = 1;
1658             break;
1659         }
1660 
1661         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1662         cpu_stb_data_ra(env, array + i, new_byte, ra);
1663     }
1664 
1665     env->cc_op = cc;
1666     env->retxl = len - i;
1667     return array + i;
1668 }
1669 
1670 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1671                                      uint64_t array, uint64_t trans,
1672                                      int inc, uintptr_t ra)
1673 {
1674     int i;
1675 
1676     for (i = 0; i <= len; i++) {
1677         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1678         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1679 
1680         if (sbyte != 0) {
1681             set_address(env, 1, array + i * inc);
1682             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1683             return (i == len) ? 2 : 1;
1684         }
1685     }
1686 
1687     return 0;
1688 }
1689 
1690 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1691                                   uint64_t array, uint64_t trans,
1692                                   uintptr_t ra)
1693 {
1694     return do_helper_trt(env, len, array, trans, 1, ra);
1695 }
1696 
1697 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1698                      uint64_t trans)
1699 {
1700     return do_helper_trt(env, len, array, trans, 1, GETPC());
1701 }
1702 
1703 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1704                                    uint64_t array, uint64_t trans,
1705                                    uintptr_t ra)
1706 {
1707     return do_helper_trt(env, len, array, trans, -1, ra);
1708 }
1709 
1710 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                       uint64_t trans)
1712 {
1713     return do_helper_trt(env, len, array, trans, -1, GETPC());
1714 }
1715 
1716 /* Translate one/two to one/two */
1717 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1718                       uint32_t tst, uint32_t sizes)
1719 {
1720     uintptr_t ra = GETPC();
1721     int dsize = (sizes & 1) ? 1 : 2;
1722     int ssize = (sizes & 2) ? 1 : 2;
1723     uint64_t tbl = get_address(env, 1);
1724     uint64_t dst = get_address(env, r1);
1725     uint64_t len = get_length(env, r1 + 1);
1726     uint64_t src = get_address(env, r2);
1727     uint32_t cc = 3;
1728     int i;
1729 
1730     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1731        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1732        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1733     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1734         tbl &= -4096;
1735     } else {
1736         tbl &= -8;
1737     }
1738 
1739     check_alignment(env, len, ssize, ra);
1740 
1741     /* Lest we fail to service interrupts in a timely manner, */
1742     /* limit the amount of work we're willing to do.   */
1743     for (i = 0; i < 0x2000; i++) {
1744         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1745         uint64_t tble = tbl + (sval * dsize);
1746         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1747         if (dval == tst) {
1748             cc = 1;
1749             break;
1750         }
1751         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1752 
1753         len -= ssize;
1754         src += ssize;
1755         dst += dsize;
1756 
1757         if (len == 0) {
1758             cc = 0;
1759             break;
1760         }
1761     }
1762 
1763     set_address(env, r1, dst);
1764     set_length(env, r1 + 1, len);
1765     set_address(env, r2, src);
1766 
1767     return cc;
1768 }
1769 
1770 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1771                   uint32_t r1, uint32_t r3)
1772 {
1773     uintptr_t ra = GETPC();
1774     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1775     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1776     Int128 oldv;
1777     uint64_t oldh, oldl;
1778     bool fail;
1779 
1780     check_alignment(env, addr, 16, ra);
1781 
1782     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1783     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1784 
1785     oldv = int128_make128(oldl, oldh);
1786     fail = !int128_eq(oldv, cmpv);
1787     if (fail) {
1788         newv = oldv;
1789     }
1790 
1791     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1792     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1793 
1794     env->cc_op = fail;
1795     env->regs[r1] = int128_gethi(oldv);
1796     env->regs[r1 + 1] = int128_getlo(oldv);
1797 }
1798 
1799 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1800                            uint32_t r1, uint32_t r3)
1801 {
1802     uintptr_t ra = GETPC();
1803     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1804     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1805     int mem_idx;
1806     MemOpIdx oi;
1807     Int128 oldv;
1808     bool fail;
1809 
1810     assert(HAVE_CMPXCHG128);
1811 
1812     mem_idx = cpu_mmu_index(env, false);
1813     oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1814     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1815     fail = !int128_eq(oldv, cmpv);
1816 
1817     env->cc_op = fail;
1818     env->regs[r1] = int128_gethi(oldv);
1819     env->regs[r1 + 1] = int128_getlo(oldv);
1820 }
1821 
1822 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1823                         uint64_t a2, bool parallel)
1824 {
1825     uint32_t mem_idx = cpu_mmu_index(env, false);
1826     uintptr_t ra = GETPC();
1827     uint32_t fc = extract32(env->regs[0], 0, 8);
1828     uint32_t sc = extract32(env->regs[0], 8, 8);
1829     uint64_t pl = get_address(env, 1) & -16;
1830     uint64_t svh, svl;
1831     uint32_t cc;
1832 
1833     /* Sanity check the function code and storage characteristic.  */
1834     if (fc > 1 || sc > 3) {
1835         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1836             goto spec_exception;
1837         }
1838         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1839             goto spec_exception;
1840         }
1841     }
1842 
1843     /* Sanity check the alignments.  */
1844     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1845         goto spec_exception;
1846     }
1847 
1848     /* Sanity check writability of the store address.  */
1849     probe_write(env, a2, 1 << sc, mem_idx, ra);
1850 
1851     /*
1852      * Note that the compare-and-swap is atomic, and the store is atomic,
1853      * but the complete operation is not.  Therefore we do not need to
1854      * assert serial context in order to implement this.  That said,
1855      * restart early if we can't support either operation that is supposed
1856      * to be atomic.
1857      */
1858     if (parallel) {
1859         uint32_t max = 2;
1860 #ifdef CONFIG_ATOMIC64
1861         max = 3;
1862 #endif
1863         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1864             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1865             cpu_loop_exit_atomic(env_cpu(env), ra);
1866         }
1867     }
1868 
1869     /* All loads happen before all stores.  For simplicity, load the entire
1870        store value area from the parameter list.  */
1871     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1872     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1873 
1874     switch (fc) {
1875     case 0:
1876         {
1877             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1878             uint32_t cv = env->regs[r3];
1879             uint32_t ov;
1880 
1881             if (parallel) {
1882 #ifdef CONFIG_USER_ONLY
1883                 uint32_t *haddr = g2h(env_cpu(env), a1);
1884                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1885 #else
1886                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1887                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1888 #endif
1889             } else {
1890                 ov = cpu_ldl_data_ra(env, a1, ra);
1891                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1892             }
1893             cc = (ov != cv);
1894             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1895         }
1896         break;
1897 
1898     case 1:
1899         {
1900             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1901             uint64_t cv = env->regs[r3];
1902             uint64_t ov;
1903 
1904             if (parallel) {
1905 #ifdef CONFIG_ATOMIC64
1906                 MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1907                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1908 #else
1909                 /* Note that we asserted !parallel above.  */
1910                 g_assert_not_reached();
1911 #endif
1912             } else {
1913                 ov = cpu_ldq_data_ra(env, a1, ra);
1914                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1915             }
1916             cc = (ov != cv);
1917             env->regs[r3] = ov;
1918         }
1919         break;
1920 
1921     case 2:
1922         {
1923             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1924             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1925             Int128 nv = int128_make128(nvl, nvh);
1926             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1927             Int128 ov;
1928 
1929             if (!parallel) {
1930                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1931                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1932 
1933                 ov = int128_make128(ol, oh);
1934                 cc = !int128_eq(ov, cv);
1935                 if (cc) {
1936                     nv = ov;
1937                 }
1938 
1939                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1940                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1941             } else if (HAVE_CMPXCHG128) {
1942                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1943                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1944                 cc = !int128_eq(ov, cv);
1945             } else {
1946                 /* Note that we asserted !parallel above.  */
1947                 g_assert_not_reached();
1948             }
1949 
1950             env->regs[r3 + 0] = int128_gethi(ov);
1951             env->regs[r3 + 1] = int128_getlo(ov);
1952         }
1953         break;
1954 
1955     default:
1956         g_assert_not_reached();
1957     }
1958 
1959     /* Store only if the comparison succeeded.  Note that above we use a pair
1960        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1961        from the most-significant bits of svh.  */
1962     if (cc == 0) {
1963         switch (sc) {
1964         case 0:
1965             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1966             break;
1967         case 1:
1968             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1969             break;
1970         case 2:
1971             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1972             break;
1973         case 3:
1974             cpu_stq_data_ra(env, a2, svh, ra);
1975             break;
1976         case 4:
1977             if (!parallel) {
1978                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1979                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1980             } else if (HAVE_ATOMIC128) {
1981                 MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1982                 Int128 sv = int128_make128(svl, svh);
1983                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1984             } else {
1985                 /* Note that we asserted !parallel above.  */
1986                 g_assert_not_reached();
1987             }
1988             break;
1989         default:
1990             g_assert_not_reached();
1991         }
1992     }
1993 
1994     return cc;
1995 
1996  spec_exception:
1997     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1998 }
1999 
2000 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2001 {
2002     return do_csst(env, r3, a1, a2, false);
2003 }
2004 
2005 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2006                                uint64_t a2)
2007 {
2008     return do_csst(env, r3, a1, a2, true);
2009 }
2010 
2011 #if !defined(CONFIG_USER_ONLY)
2012 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2013 {
2014     uintptr_t ra = GETPC();
2015     bool PERchanged = false;
2016     uint64_t src = a2;
2017     uint32_t i;
2018 
2019     if (src & 0x7) {
2020         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2021     }
2022 
2023     for (i = r1;; i = (i + 1) % 16) {
2024         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2025         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2026             PERchanged = true;
2027         }
2028         env->cregs[i] = val;
2029         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2030                    i, src, val);
2031         src += sizeof(uint64_t);
2032 
2033         if (i == r3) {
2034             break;
2035         }
2036     }
2037 
2038     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2039         s390_cpu_recompute_watchpoints(env_cpu(env));
2040     }
2041 
2042     tlb_flush(env_cpu(env));
2043 }
2044 
2045 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2046 {
2047     uintptr_t ra = GETPC();
2048     bool PERchanged = false;
2049     uint64_t src = a2;
2050     uint32_t i;
2051 
2052     if (src & 0x3) {
2053         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2054     }
2055 
2056     for (i = r1;; i = (i + 1) % 16) {
2057         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2058         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2059             PERchanged = true;
2060         }
2061         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2062         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2063         src += sizeof(uint32_t);
2064 
2065         if (i == r3) {
2066             break;
2067         }
2068     }
2069 
2070     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2071         s390_cpu_recompute_watchpoints(env_cpu(env));
2072     }
2073 
2074     tlb_flush(env_cpu(env));
2075 }
2076 
2077 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2078 {
2079     uintptr_t ra = GETPC();
2080     uint64_t dest = a2;
2081     uint32_t i;
2082 
2083     if (dest & 0x7) {
2084         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2085     }
2086 
2087     for (i = r1;; i = (i + 1) % 16) {
2088         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2089         dest += sizeof(uint64_t);
2090 
2091         if (i == r3) {
2092             break;
2093         }
2094     }
2095 }
2096 
2097 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2098 {
2099     uintptr_t ra = GETPC();
2100     uint64_t dest = a2;
2101     uint32_t i;
2102 
2103     if (dest & 0x3) {
2104         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2105     }
2106 
2107     for (i = r1;; i = (i + 1) % 16) {
2108         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2109         dest += sizeof(uint32_t);
2110 
2111         if (i == r3) {
2112             break;
2113         }
2114     }
2115 }
2116 
2117 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2118 {
2119     uintptr_t ra = GETPC();
2120     int i;
2121 
2122     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2123 
2124     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2125         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2126     }
2127 
2128     return 0;
2129 }
2130 
2131 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2132 {
2133     S390CPU *cpu = env_archcpu(env);
2134     CPUState *cs = env_cpu(env);
2135 
2136     /*
2137      * TODO: we currently don't handle all access protection types
2138      * (including access-list and key-controlled) as well as AR mode.
2139      */
2140     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2141         /* Fetching permitted; storing permitted */
2142         return 0;
2143     }
2144 
2145     if (env->int_pgm_code == PGM_PROTECTION) {
2146         /* retry if reading is possible */
2147         cs->exception_index = -1;
2148         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2149             /* Fetching permitted; storing not permitted */
2150             return 1;
2151         }
2152     }
2153 
2154     switch (env->int_pgm_code) {
2155     case PGM_PROTECTION:
2156         /* Fetching not permitted; storing not permitted */
2157         cs->exception_index = -1;
2158         return 2;
2159     case PGM_ADDRESSING:
2160     case PGM_TRANS_SPEC:
2161         /* exceptions forwarded to the guest */
2162         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2163         return 0;
2164     }
2165 
2166     /* Translation not available */
2167     cs->exception_index = -1;
2168     return 3;
2169 }
2170 
2171 /* insert storage key extended */
2172 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2173 {
2174     static S390SKeysState *ss;
2175     static S390SKeysClass *skeyclass;
2176     uint64_t addr = wrap_address(env, r2);
2177     uint8_t key;
2178     int rc;
2179 
2180     addr = mmu_real2abs(env, addr);
2181     if (!mmu_absolute_addr_valid(addr, false)) {
2182         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2183     }
2184 
2185     if (unlikely(!ss)) {
2186         ss = s390_get_skeys_device();
2187         skeyclass = S390_SKEYS_GET_CLASS(ss);
2188         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2189             tlb_flush_all_cpus_synced(env_cpu(env));
2190         }
2191     }
2192 
2193     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2194     if (rc) {
2195         trace_get_skeys_nonzero(rc);
2196         return 0;
2197     }
2198     return key;
2199 }
2200 
2201 /* set storage key extended */
2202 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2203 {
2204     static S390SKeysState *ss;
2205     static S390SKeysClass *skeyclass;
2206     uint64_t addr = wrap_address(env, r2);
2207     uint8_t key;
2208     int rc;
2209 
2210     addr = mmu_real2abs(env, addr);
2211     if (!mmu_absolute_addr_valid(addr, false)) {
2212         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2213     }
2214 
2215     if (unlikely(!ss)) {
2216         ss = s390_get_skeys_device();
2217         skeyclass = S390_SKEYS_GET_CLASS(ss);
2218         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2219             tlb_flush_all_cpus_synced(env_cpu(env));
2220         }
2221     }
2222 
2223     key = r1 & 0xfe;
2224     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2225     if (rc) {
2226         trace_set_skeys_nonzero(rc);
2227     }
2228    /*
2229     * As we can only flush by virtual address and not all the entries
2230     * that point to a physical address we have to flush the whole TLB.
2231     */
2232     tlb_flush_all_cpus_synced(env_cpu(env));
2233 }
2234 
2235 /* reset reference bit extended */
2236 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2237 {
2238     uint64_t addr = wrap_address(env, r2);
2239     static S390SKeysState *ss;
2240     static S390SKeysClass *skeyclass;
2241     uint8_t re, key;
2242     int rc;
2243 
2244     addr = mmu_real2abs(env, addr);
2245     if (!mmu_absolute_addr_valid(addr, false)) {
2246         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2247     }
2248 
2249     if (unlikely(!ss)) {
2250         ss = s390_get_skeys_device();
2251         skeyclass = S390_SKEYS_GET_CLASS(ss);
2252         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2253             tlb_flush_all_cpus_synced(env_cpu(env));
2254         }
2255     }
2256 
2257     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2258     if (rc) {
2259         trace_get_skeys_nonzero(rc);
2260         return 0;
2261     }
2262 
2263     re = key & (SK_R | SK_C);
2264     key &= ~SK_R;
2265 
2266     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2267     if (rc) {
2268         trace_set_skeys_nonzero(rc);
2269         return 0;
2270     }
2271    /*
2272     * As we can only flush by virtual address and not all the entries
2273     * that point to a physical address we have to flush the whole TLB.
2274     */
2275     tlb_flush_all_cpus_synced(env_cpu(env));
2276 
2277     /*
2278      * cc
2279      *
2280      * 0  Reference bit zero; change bit zero
2281      * 1  Reference bit zero; change bit one
2282      * 2  Reference bit one; change bit zero
2283      * 3  Reference bit one; change bit one
2284      */
2285 
2286     return re >> 1;
2287 }
2288 
2289 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2290 {
2291     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2292     S390Access srca, desta;
2293     uintptr_t ra = GETPC();
2294     int cc = 0;
2295 
2296     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2297                __func__, l, a1, a2);
2298 
2299     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2300         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2301         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2302     }
2303 
2304     l = wrap_length32(env, l);
2305     if (l > 256) {
2306         /* max 256 */
2307         l = 256;
2308         cc = 3;
2309     } else if (!l) {
2310         return cc;
2311     }
2312 
2313     /* TODO: Access key handling */
2314     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2315     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2316     access_memmove(env, &desta, &srca, ra);
2317     return cc;
2318 }
2319 
2320 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2321 {
2322     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2323     S390Access srca, desta;
2324     uintptr_t ra = GETPC();
2325     int cc = 0;
2326 
2327     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2328                __func__, l, a1, a2);
2329 
2330     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2331         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2332         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2333     }
2334 
2335     l = wrap_length32(env, l);
2336     if (l > 256) {
2337         /* max 256 */
2338         l = 256;
2339         cc = 3;
2340     } else if (!l) {
2341         return cc;
2342     }
2343 
2344     /* TODO: Access key handling */
2345     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2346     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2347     access_memmove(env, &desta, &srca, ra);
2348     return cc;
2349 }
2350 
2351 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2352 {
2353     CPUState *cs = env_cpu(env);
2354     const uintptr_t ra = GETPC();
2355     uint64_t table, entry, raddr;
2356     uint16_t entries, i, index = 0;
2357 
2358     if (r2 & 0xff000) {
2359         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2360     }
2361 
2362     if (!(r2 & 0x800)) {
2363         /* invalidation-and-clearing operation */
2364         table = r1 & ASCE_ORIGIN;
2365         entries = (r2 & 0x7ff) + 1;
2366 
2367         switch (r1 & ASCE_TYPE_MASK) {
2368         case ASCE_TYPE_REGION1:
2369             index = (r2 >> 53) & 0x7ff;
2370             break;
2371         case ASCE_TYPE_REGION2:
2372             index = (r2 >> 42) & 0x7ff;
2373             break;
2374         case ASCE_TYPE_REGION3:
2375             index = (r2 >> 31) & 0x7ff;
2376             break;
2377         case ASCE_TYPE_SEGMENT:
2378             index = (r2 >> 20) & 0x7ff;
2379             break;
2380         }
2381         for (i = 0; i < entries; i++) {
2382             /* addresses are not wrapped in 24/31bit mode but table index is */
2383             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2384             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2385             if (!(entry & REGION_ENTRY_I)) {
2386                 /* we are allowed to not store if already invalid */
2387                 entry |= REGION_ENTRY_I;
2388                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2389             }
2390         }
2391     }
2392 
2393     /* We simply flush the complete tlb, therefore we can ignore r3. */
2394     if (m4 & 1) {
2395         tlb_flush(cs);
2396     } else {
2397         tlb_flush_all_cpus_synced(cs);
2398     }
2399 }
2400 
2401 /* invalidate pte */
2402 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2403                   uint32_t m4)
2404 {
2405     CPUState *cs = env_cpu(env);
2406     const uintptr_t ra = GETPC();
2407     uint64_t page = vaddr & TARGET_PAGE_MASK;
2408     uint64_t pte_addr, pte;
2409 
2410     /* Compute the page table entry address */
2411     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2412     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2413 
2414     /* Mark the page table entry as invalid */
2415     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2416     pte |= PAGE_ENTRY_I;
2417     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2418 
2419     /* XXX we exploit the fact that Linux passes the exact virtual
2420        address here - it's not obliged to! */
2421     if (m4 & 1) {
2422         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2423             tlb_flush_page(cs, page);
2424             /* XXX 31-bit hack */
2425             tlb_flush_page(cs, page ^ 0x80000000);
2426         } else {
2427             /* looks like we don't have a valid virtual address */
2428             tlb_flush(cs);
2429         }
2430     } else {
2431         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2432             tlb_flush_page_all_cpus_synced(cs, page);
2433             /* XXX 31-bit hack */
2434             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2435         } else {
2436             /* looks like we don't have a valid virtual address */
2437             tlb_flush_all_cpus_synced(cs);
2438         }
2439     }
2440 }
2441 
2442 /* flush local tlb */
2443 void HELPER(ptlb)(CPUS390XState *env)
2444 {
2445     tlb_flush(env_cpu(env));
2446 }
2447 
2448 /* flush global tlb */
2449 void HELPER(purge)(CPUS390XState *env)
2450 {
2451     tlb_flush_all_cpus_synced(env_cpu(env));
2452 }
2453 
2454 /* load real address */
2455 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2456 {
2457     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2458     uint64_t ret, tec;
2459     int flags, exc, cc;
2460 
2461     /* XXX incomplete - has more corner cases */
2462     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2463         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2464     }
2465 
2466     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2467     if (exc) {
2468         cc = 3;
2469         ret = exc | 0x80000000;
2470     } else {
2471         cc = 0;
2472         ret |= addr & ~TARGET_PAGE_MASK;
2473     }
2474 
2475     env->cc_op = cc;
2476     return ret;
2477 }
2478 #endif
2479 
2480 /* load pair from quadword */
2481 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2482 {
2483     uintptr_t ra = GETPC();
2484     uint64_t hi, lo;
2485 
2486     check_alignment(env, addr, 16, ra);
2487     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2488     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2489 
2490     env->retxl = lo;
2491     return hi;
2492 }
2493 
2494 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2495 {
2496     uintptr_t ra = GETPC();
2497     uint64_t hi, lo;
2498     int mem_idx;
2499     MemOpIdx oi;
2500     Int128 v;
2501 
2502     assert(HAVE_ATOMIC128);
2503 
2504     mem_idx = cpu_mmu_index(env, false);
2505     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2506     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2507     hi = int128_gethi(v);
2508     lo = int128_getlo(v);
2509 
2510     env->retxl = lo;
2511     return hi;
2512 }
2513 
2514 /* store pair to quadword */
2515 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2516                   uint64_t low, uint64_t high)
2517 {
2518     uintptr_t ra = GETPC();
2519 
2520     check_alignment(env, addr, 16, ra);
2521     cpu_stq_data_ra(env, addr + 0, high, ra);
2522     cpu_stq_data_ra(env, addr + 8, low, ra);
2523 }
2524 
2525 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2526                            uint64_t low, uint64_t high)
2527 {
2528     uintptr_t ra = GETPC();
2529     int mem_idx;
2530     MemOpIdx oi;
2531     Int128 v;
2532 
2533     assert(HAVE_ATOMIC128);
2534 
2535     mem_idx = cpu_mmu_index(env, false);
2536     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2537     v = int128_make128(low, high);
2538     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2539 }
2540 
2541 /* Execute instruction.  This instruction executes an insn modified with
2542    the contents of r1.  It does not change the executed instruction in memory;
2543    it does not change the program counter.
2544 
2545    Perform this by recording the modified instruction in env->ex_value.
2546    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2547 */
2548 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2549 {
2550     uint64_t insn = cpu_lduw_code(env, addr);
2551     uint8_t opc = insn >> 8;
2552 
2553     /* Or in the contents of R1[56:63].  */
2554     insn |= r1 & 0xff;
2555 
2556     /* Load the rest of the instruction.  */
2557     insn <<= 48;
2558     switch (get_ilen(opc)) {
2559     case 2:
2560         break;
2561     case 4:
2562         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2563         break;
2564     case 6:
2565         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2566         break;
2567     default:
2568         g_assert_not_reached();
2569     }
2570 
2571     /* The very most common cases can be sped up by avoiding a new TB.  */
2572     if ((opc & 0xf0) == 0xd0) {
2573         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2574                                       uint64_t, uintptr_t);
2575         static const dx_helper dx[16] = {
2576             [0x0] = do_helper_trt_bkwd,
2577             [0x2] = do_helper_mvc,
2578             [0x4] = do_helper_nc,
2579             [0x5] = do_helper_clc,
2580             [0x6] = do_helper_oc,
2581             [0x7] = do_helper_xc,
2582             [0xc] = do_helper_tr,
2583             [0xd] = do_helper_trt_fwd,
2584         };
2585         dx_helper helper = dx[opc & 0xf];
2586 
2587         if (helper) {
2588             uint32_t l = extract64(insn, 48, 8);
2589             uint32_t b1 = extract64(insn, 44, 4);
2590             uint32_t d1 = extract64(insn, 32, 12);
2591             uint32_t b2 = extract64(insn, 28, 4);
2592             uint32_t d2 = extract64(insn, 16, 12);
2593             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2594             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2595 
2596             env->cc_op = helper(env, l, a1, a2, 0);
2597             env->psw.addr += ilen;
2598             return;
2599         }
2600     } else if (opc == 0x0a) {
2601         env->int_svc_code = extract64(insn, 48, 8);
2602         env->int_svc_ilen = ilen;
2603         helper_exception(env, EXCP_SVC);
2604         g_assert_not_reached();
2605     }
2606 
2607     /* Record the insn we want to execute as well as the ilen to use
2608        during the execution of the target insn.  This will also ensure
2609        that ex_value is non-zero, which flags that we are in a state
2610        that requires such execution.  */
2611     env->ex_value = insn | ilen;
2612 }
2613 
2614 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2615                        uint64_t len)
2616 {
2617     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2618     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2619     const uint64_t r0 = env->regs[0];
2620     const uintptr_t ra = GETPC();
2621     uint8_t dest_key, dest_as, dest_k, dest_a;
2622     uint8_t src_key, src_as, src_k, src_a;
2623     uint64_t val;
2624     int cc = 0;
2625 
2626     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2627                __func__, dest, src, len);
2628 
2629     if (!(env->psw.mask & PSW_MASK_DAT)) {
2630         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2631     }
2632 
2633     /* OAC (operand access control) for the first operand -> dest */
2634     val = (r0 & 0xffff0000ULL) >> 16;
2635     dest_key = (val >> 12) & 0xf;
2636     dest_as = (val >> 6) & 0x3;
2637     dest_k = (val >> 1) & 0x1;
2638     dest_a = val & 0x1;
2639 
2640     /* OAC (operand access control) for the second operand -> src */
2641     val = (r0 & 0x0000ffffULL);
2642     src_key = (val >> 12) & 0xf;
2643     src_as = (val >> 6) & 0x3;
2644     src_k = (val >> 1) & 0x1;
2645     src_a = val & 0x1;
2646 
2647     if (!dest_k) {
2648         dest_key = psw_key;
2649     }
2650     if (!src_k) {
2651         src_key = psw_key;
2652     }
2653     if (!dest_a) {
2654         dest_as = psw_as;
2655     }
2656     if (!src_a) {
2657         src_as = psw_as;
2658     }
2659 
2660     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2661         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2662     }
2663     if (!(env->cregs[0] & CR0_SECONDARY) &&
2664         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2665         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2666     }
2667     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2668         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2669     }
2670 
2671     len = wrap_length32(env, len);
2672     if (len > 4096) {
2673         cc = 3;
2674         len = 4096;
2675     }
2676 
2677     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2678     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2679         (env->psw.mask & PSW_MASK_PSTATE)) {
2680         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2681                       __func__);
2682         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2683     }
2684 
2685     /* FIXME: Access using correct keys and AR-mode */
2686     if (len) {
2687         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2688                                          mmu_idx_from_as(src_as), ra);
2689         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2690                                           mmu_idx_from_as(dest_as), ra);
2691 
2692         access_memmove(env, &desta, &srca, ra);
2693     }
2694 
2695     return cc;
2696 }
2697 
2698 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2699    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2700    value >= 0 indicates failure, and the CC value to be returned.  */
2701 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2702                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2703                                  uint32_t *ochar, uint32_t *olen);
2704 
2705 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2706    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2707    indicates failure, and the CC value to be returned.  */
2708 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2709                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2710                                  uint32_t *olen);
2711 
2712 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2713                        bool enh_check, uintptr_t ra,
2714                        uint32_t *ochar, uint32_t *olen)
2715 {
2716     uint8_t s0, s1, s2, s3;
2717     uint32_t c, l;
2718 
2719     if (ilen < 1) {
2720         return 0;
2721     }
2722     s0 = cpu_ldub_data_ra(env, addr, ra);
2723     if (s0 <= 0x7f) {
2724         /* one byte character */
2725         l = 1;
2726         c = s0;
2727     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2728         /* invalid character */
2729         return 2;
2730     } else if (s0 <= 0xdf) {
2731         /* two byte character */
2732         l = 2;
2733         if (ilen < 2) {
2734             return 0;
2735         }
2736         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2737         c = s0 & 0x1f;
2738         c = (c << 6) | (s1 & 0x3f);
2739         if (enh_check && (s1 & 0xc0) != 0x80) {
2740             return 2;
2741         }
2742     } else if (s0 <= 0xef) {
2743         /* three byte character */
2744         l = 3;
2745         if (ilen < 3) {
2746             return 0;
2747         }
2748         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2749         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2750         c = s0 & 0x0f;
2751         c = (c << 6) | (s1 & 0x3f);
2752         c = (c << 6) | (s2 & 0x3f);
2753         /* Fold the byte-by-byte range descriptions in the PoO into
2754            tests against the complete value.  It disallows encodings
2755            that could be smaller, and the UTF-16 surrogates.  */
2756         if (enh_check
2757             && ((s1 & 0xc0) != 0x80
2758                 || (s2 & 0xc0) != 0x80
2759                 || c < 0x1000
2760                 || (c >= 0xd800 && c <= 0xdfff))) {
2761             return 2;
2762         }
2763     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2764         /* four byte character */
2765         l = 4;
2766         if (ilen < 4) {
2767             return 0;
2768         }
2769         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2770         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2771         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2772         c = s0 & 0x07;
2773         c = (c << 6) | (s1 & 0x3f);
2774         c = (c << 6) | (s2 & 0x3f);
2775         c = (c << 6) | (s3 & 0x3f);
2776         /* See above.  */
2777         if (enh_check
2778             && ((s1 & 0xc0) != 0x80
2779                 || (s2 & 0xc0) != 0x80
2780                 || (s3 & 0xc0) != 0x80
2781                 || c < 0x010000
2782                 || c > 0x10ffff)) {
2783             return 2;
2784         }
2785     } else {
2786         /* invalid character */
2787         return 2;
2788     }
2789 
2790     *ochar = c;
2791     *olen = l;
2792     return -1;
2793 }
2794 
2795 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2796                         bool enh_check, uintptr_t ra,
2797                         uint32_t *ochar, uint32_t *olen)
2798 {
2799     uint16_t s0, s1;
2800     uint32_t c, l;
2801 
2802     if (ilen < 2) {
2803         return 0;
2804     }
2805     s0 = cpu_lduw_data_ra(env, addr, ra);
2806     if ((s0 & 0xfc00) != 0xd800) {
2807         /* one word character */
2808         l = 2;
2809         c = s0;
2810     } else {
2811         /* two word character */
2812         l = 4;
2813         if (ilen < 4) {
2814             return 0;
2815         }
2816         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2817         c = extract32(s0, 6, 4) + 1;
2818         c = (c << 6) | (s0 & 0x3f);
2819         c = (c << 10) | (s1 & 0x3ff);
2820         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2821             /* invalid surrogate character */
2822             return 2;
2823         }
2824     }
2825 
2826     *ochar = c;
2827     *olen = l;
2828     return -1;
2829 }
2830 
2831 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2832                         bool enh_check, uintptr_t ra,
2833                         uint32_t *ochar, uint32_t *olen)
2834 {
2835     uint32_t c;
2836 
2837     if (ilen < 4) {
2838         return 0;
2839     }
2840     c = cpu_ldl_data_ra(env, addr, ra);
2841     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2842         /* invalid unicode character */
2843         return 2;
2844     }
2845 
2846     *ochar = c;
2847     *olen = 4;
2848     return -1;
2849 }
2850 
2851 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2852                        uintptr_t ra, uint32_t c, uint32_t *olen)
2853 {
2854     uint8_t d[4];
2855     uint32_t l, i;
2856 
2857     if (c <= 0x7f) {
2858         /* one byte character */
2859         l = 1;
2860         d[0] = c;
2861     } else if (c <= 0x7ff) {
2862         /* two byte character */
2863         l = 2;
2864         d[1] = 0x80 | extract32(c, 0, 6);
2865         d[0] = 0xc0 | extract32(c, 6, 5);
2866     } else if (c <= 0xffff) {
2867         /* three byte character */
2868         l = 3;
2869         d[2] = 0x80 | extract32(c, 0, 6);
2870         d[1] = 0x80 | extract32(c, 6, 6);
2871         d[0] = 0xe0 | extract32(c, 12, 4);
2872     } else {
2873         /* four byte character */
2874         l = 4;
2875         d[3] = 0x80 | extract32(c, 0, 6);
2876         d[2] = 0x80 | extract32(c, 6, 6);
2877         d[1] = 0x80 | extract32(c, 12, 6);
2878         d[0] = 0xf0 | extract32(c, 18, 3);
2879     }
2880 
2881     if (ilen < l) {
2882         return 1;
2883     }
2884     for (i = 0; i < l; ++i) {
2885         cpu_stb_data_ra(env, addr + i, d[i], ra);
2886     }
2887 
2888     *olen = l;
2889     return -1;
2890 }
2891 
2892 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2893                         uintptr_t ra, uint32_t c, uint32_t *olen)
2894 {
2895     uint16_t d0, d1;
2896 
2897     if (c <= 0xffff) {
2898         /* one word character */
2899         if (ilen < 2) {
2900             return 1;
2901         }
2902         cpu_stw_data_ra(env, addr, c, ra);
2903         *olen = 2;
2904     } else {
2905         /* two word character */
2906         if (ilen < 4) {
2907             return 1;
2908         }
2909         d1 = 0xdc00 | extract32(c, 0, 10);
2910         d0 = 0xd800 | extract32(c, 10, 6);
2911         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2912         cpu_stw_data_ra(env, addr + 0, d0, ra);
2913         cpu_stw_data_ra(env, addr + 2, d1, ra);
2914         *olen = 4;
2915     }
2916 
2917     return -1;
2918 }
2919 
2920 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2921                         uintptr_t ra, uint32_t c, uint32_t *olen)
2922 {
2923     if (ilen < 4) {
2924         return 1;
2925     }
2926     cpu_stl_data_ra(env, addr, c, ra);
2927     *olen = 4;
2928     return -1;
2929 }
2930 
2931 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2932                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2933                                        decode_unicode_fn decode,
2934                                        encode_unicode_fn encode)
2935 {
2936     uint64_t dst = get_address(env, r1);
2937     uint64_t dlen = get_length(env, r1 + 1);
2938     uint64_t src = get_address(env, r2);
2939     uint64_t slen = get_length(env, r2 + 1);
2940     bool enh_check = m3 & 1;
2941     int cc, i;
2942 
2943     /* Lest we fail to service interrupts in a timely manner, limit the
2944        amount of work we're willing to do.  For now, let's cap at 256.  */
2945     for (i = 0; i < 256; ++i) {
2946         uint32_t c, ilen, olen;
2947 
2948         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2949         if (unlikely(cc >= 0)) {
2950             break;
2951         }
2952         cc = encode(env, dst, dlen, ra, c, &olen);
2953         if (unlikely(cc >= 0)) {
2954             break;
2955         }
2956 
2957         src += ilen;
2958         slen -= ilen;
2959         dst += olen;
2960         dlen -= olen;
2961         cc = 3;
2962     }
2963 
2964     set_address(env, r1, dst);
2965     set_length(env, r1 + 1, dlen);
2966     set_address(env, r2, src);
2967     set_length(env, r2 + 1, slen);
2968 
2969     return cc;
2970 }
2971 
2972 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2973 {
2974     return convert_unicode(env, r1, r2, m3, GETPC(),
2975                            decode_utf8, encode_utf16);
2976 }
2977 
2978 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2979 {
2980     return convert_unicode(env, r1, r2, m3, GETPC(),
2981                            decode_utf8, encode_utf32);
2982 }
2983 
2984 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2985 {
2986     return convert_unicode(env, r1, r2, m3, GETPC(),
2987                            decode_utf16, encode_utf8);
2988 }
2989 
2990 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2991 {
2992     return convert_unicode(env, r1, r2, m3, GETPC(),
2993                            decode_utf16, encode_utf32);
2994 }
2995 
2996 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2997 {
2998     return convert_unicode(env, r1, r2, m3, GETPC(),
2999                            decode_utf32, encode_utf8);
3000 }
3001 
3002 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3003 {
3004     return convert_unicode(env, r1, r2, m3, GETPC(),
3005                            decode_utf32, encode_utf16);
3006 }
3007 
3008 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3009                         uintptr_t ra)
3010 {
3011     /* test the actual access, not just any access to the page due to LAP */
3012     while (len) {
3013         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3014         const uint64_t curlen = MIN(pagelen, len);
3015 
3016         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3017         addr = wrap_address(env, addr + curlen);
3018         len -= curlen;
3019     }
3020 }
3021 
3022 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3023 {
3024     probe_write_access(env, addr, len, GETPC());
3025 }
3026