xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 62a4db55)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "s390x-internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 #include "tcg/tcg.h"
31 
32 #if !defined(CONFIG_USER_ONLY)
33 #include "hw/s390x/storage-keys.h"
34 #include "hw/boards.h"
35 #endif
36 
37 /*****************************************************************************/
38 /* Softmmu support */
39 
40 /* #define DEBUG_HELPER */
41 #ifdef DEBUG_HELPER
42 #define HELPER_LOG(x...) qemu_log(x)
43 #else
44 #define HELPER_LOG(x...)
45 #endif
46 
47 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
48 {
49     uint16_t pkm = env->cregs[3] >> 16;
50 
51     if (env->psw.mask & PSW_MASK_PSTATE) {
52         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
53         return pkm & (0x80 >> psw_key);
54     }
55     return true;
56 }
57 
58 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
59                                    uint64_t src, uint32_t len)
60 {
61     if (!len || src == dest) {
62         return false;
63     }
64     /* Take care of wrapping at the end of address space. */
65     if (unlikely(wrap_address(env, src + len - 1) < src)) {
66         return dest > src || dest <= wrap_address(env, src + len - 1);
67     }
68     return dest > src && dest <= src + len - 1;
69 }
70 
71 /* Trigger a SPECIFICATION exception if an address or a length is not
72    naturally aligned.  */
73 static inline void check_alignment(CPUS390XState *env, uint64_t v,
74                                    int wordsize, uintptr_t ra)
75 {
76     if (v % wordsize) {
77         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
78     }
79 }
80 
81 /* Load a value from memory according to its size.  */
82 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
83                                            int wordsize, uintptr_t ra)
84 {
85     switch (wordsize) {
86     case 1:
87         return cpu_ldub_data_ra(env, addr, ra);
88     case 2:
89         return cpu_lduw_data_ra(env, addr, ra);
90     default:
91         abort();
92     }
93 }
94 
95 /* Store a to memory according to its size.  */
96 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
97                                       uint64_t value, int wordsize,
98                                       uintptr_t ra)
99 {
100     switch (wordsize) {
101     case 1:
102         cpu_stb_data_ra(env, addr, value, ra);
103         break;
104     case 2:
105         cpu_stw_data_ra(env, addr, value, ra);
106         break;
107     default:
108         abort();
109     }
110 }
111 
112 /* An access covers at most 4096 bytes and therefore at most two pages. */
113 typedef struct S390Access {
114     target_ulong vaddr1;
115     target_ulong vaddr2;
116     char *haddr1;
117     char *haddr2;
118     uint16_t size1;
119     uint16_t size2;
120     /*
121      * If we can't access the host page directly, we'll have to do I/O access
122      * via ld/st helpers. These are internal details, so we store the
123      * mmu idx to do the access here instead of passing it around in the
124      * helpers. Maybe, one day we can get rid of ld/st access - once we can
125      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
126      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
127      * pages, we might trigger a new MMU translation - very unlikely that
128      * the mapping changes in between and we would trigger a fault.
129      */
130     int mmu_idx;
131 } S390Access;
132 
133 /*
134  * With nonfault=1, return the PGM_ exception that would have been injected
135  * into the guest; return 0 if no exception was detected.
136  *
137  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
138  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
139  */
140 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
141                              MMUAccessType access_type, int mmu_idx,
142                              bool nonfault, void **phost, uintptr_t ra)
143 {
144     int flags;
145 
146 #if defined(CONFIG_USER_ONLY)
147     flags = page_get_flags(addr);
148     if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
149         env->__excp_addr = addr;
150         flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
151         if (nonfault) {
152             return flags;
153         }
154         tcg_s390_program_interrupt(env, flags, ra);
155     }
156     *phost = g2h(env_cpu(env), addr);
157 #else
158     /*
159      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
160      * to detect if there was an exception during tlb_fill().
161      */
162     env->tlb_fill_exc = 0;
163     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
164                                ra);
165     if (env->tlb_fill_exc) {
166         return env->tlb_fill_exc;
167     }
168 
169     if (unlikely(flags & TLB_WATCHPOINT)) {
170         /* S390 does not presently use transaction attributes. */
171         cpu_check_watchpoint(env_cpu(env), addr, size,
172                              MEMTXATTRS_UNSPECIFIED,
173                              (access_type == MMU_DATA_STORE
174                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
175     }
176 #endif
177     return 0;
178 }
179 
180 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
181                              bool nonfault, vaddr vaddr1, int size,
182                              MMUAccessType access_type,
183                              int mmu_idx, uintptr_t ra)
184 {
185     void *haddr1, *haddr2 = NULL;
186     int size1, size2, exc;
187     vaddr vaddr2 = 0;
188 
189     assert(size > 0 && size <= 4096);
190 
191     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
192     size2 = size - size1;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &haddr1, ra);
196     if (exc) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr2 = wrap_address(env, vaddr1 + size1);
202         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
203                                 nonfault, &haddr2, ra);
204         if (exc) {
205             return exc;
206         }
207     }
208 
209     *access = (S390Access) {
210         .vaddr1 = vaddr1,
211         .vaddr2 = vaddr2,
212         .haddr1 = haddr1,
213         .haddr2 = haddr2,
214         .size1 = size1,
215         .size2 = size2,
216         .mmu_idx = mmu_idx
217     };
218     return 0;
219 }
220 
221 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
222                                  MMUAccessType access_type, int mmu_idx,
223                                  uintptr_t ra)
224 {
225     S390Access ret;
226     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
227                                 access_type, mmu_idx, ra);
228     assert(!exc);
229     return ret;
230 }
231 
232 /* Helper to handle memset on a single page. */
233 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
234                              uint8_t byte, uint16_t size, int mmu_idx,
235                              uintptr_t ra)
236 {
237 #ifdef CONFIG_USER_ONLY
238     g_assert(haddr);
239     memset(haddr, byte, size);
240 #else
241     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
242     int i;
243 
244     if (likely(haddr)) {
245         memset(haddr, byte, size);
246     } else {
247         /*
248          * Do a single access and test if we can then get access to the
249          * page. This is especially relevant to speed up TLB_NOTDIRTY.
250          */
251         g_assert(size > 0);
252         helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
253         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
254         if (likely(haddr)) {
255             memset(haddr + 1, byte, size - 1);
256         } else {
257             for (i = 1; i < size; i++) {
258                 helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
259             }
260         }
261     }
262 #endif
263 }
264 
265 static void access_memset(CPUS390XState *env, S390Access *desta,
266                           uint8_t byte, uintptr_t ra)
267 {
268 
269     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
270                      desta->mmu_idx, ra);
271     if (likely(!desta->size2)) {
272         return;
273     }
274     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
275                      desta->mmu_idx, ra);
276 }
277 
278 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
279                                   int offset, int mmu_idx, uintptr_t ra)
280 {
281 #ifdef CONFIG_USER_ONLY
282     return ldub_p(*haddr + offset);
283 #else
284     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
285     uint8_t byte;
286 
287     if (likely(*haddr)) {
288         return ldub_p(*haddr + offset);
289     }
290     /*
291      * Do a single access and test if we can then get access to the
292      * page. This is especially relevant to speed up TLB_NOTDIRTY.
293      */
294     byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
295     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
296     return byte;
297 #endif
298 }
299 
300 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
301                                int offset, uintptr_t ra)
302 {
303     if (offset < access->size1) {
304         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
305                                   offset, access->mmu_idx, ra);
306     }
307     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
308                               offset - access->size1, access->mmu_idx, ra);
309 }
310 
311 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
312                                int offset, uint8_t byte, int mmu_idx,
313                                uintptr_t ra)
314 {
315 #ifdef CONFIG_USER_ONLY
316     stb_p(*haddr + offset, byte);
317 #else
318     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
319 
320     if (likely(*haddr)) {
321         stb_p(*haddr + offset, byte);
322         return;
323     }
324     /*
325      * Do a single access and test if we can then get access to the
326      * page. This is especially relevant to speed up TLB_NOTDIRTY.
327      */
328     helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
329     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
330 #endif
331 }
332 
333 static void access_set_byte(CPUS390XState *env, S390Access *access,
334                             int offset, uint8_t byte, uintptr_t ra)
335 {
336     if (offset < access->size1) {
337         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
338                            access->mmu_idx, ra);
339     } else {
340         do_access_set_byte(env, access->vaddr2, &access->haddr2,
341                            offset - access->size1, byte, access->mmu_idx, ra);
342     }
343 }
344 
345 /*
346  * Move data with the same semantics as memmove() in case ranges don't overlap
347  * or src > dest. Undefined behavior on destructive overlaps.
348  */
349 static void access_memmove(CPUS390XState *env, S390Access *desta,
350                            S390Access *srca, uintptr_t ra)
351 {
352     int diff;
353 
354     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
355 
356     /* Fallback to slow access in case we don't have access to all host pages */
357     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
358                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
359         int i;
360 
361         for (i = 0; i < desta->size1 + desta->size2; i++) {
362             uint8_t byte = access_get_byte(env, srca, i, ra);
363 
364             access_set_byte(env, desta, i, byte, ra);
365         }
366         return;
367     }
368 
369     if (srca->size1 == desta->size1) {
370         memmove(desta->haddr1, srca->haddr1, srca->size1);
371         if (unlikely(srca->size2)) {
372             memmove(desta->haddr2, srca->haddr2, srca->size2);
373         }
374     } else if (srca->size1 < desta->size1) {
375         diff = desta->size1 - srca->size1;
376         memmove(desta->haddr1, srca->haddr1, srca->size1);
377         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
378         if (likely(desta->size2)) {
379             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
380         }
381     } else {
382         diff = srca->size1 - desta->size1;
383         memmove(desta->haddr1, srca->haddr1, desta->size1);
384         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
385         if (likely(srca->size2)) {
386             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
387         }
388     }
389 }
390 
391 static int mmu_idx_from_as(uint8_t as)
392 {
393     switch (as) {
394     case AS_PRIMARY:
395         return MMU_PRIMARY_IDX;
396     case AS_SECONDARY:
397         return MMU_SECONDARY_IDX;
398     case AS_HOME:
399         return MMU_HOME_IDX;
400     default:
401         /* FIXME AS_ACCREG */
402         g_assert_not_reached();
403     }
404 }
405 
406 /* and on array */
407 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
408                              uint64_t src, uintptr_t ra)
409 {
410     const int mmu_idx = cpu_mmu_index(env, false);
411     S390Access srca1, srca2, desta;
412     uint32_t i;
413     uint8_t c = 0;
414 
415     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
416                __func__, l, dest, src);
417 
418     /* NC always processes one more byte than specified - maximum is 256 */
419     l++;
420 
421     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
422     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
423     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
424     for (i = 0; i < l; i++) {
425         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
426                           access_get_byte(env, &srca2, i, ra);
427 
428         c |= x;
429         access_set_byte(env, &desta, i, x, ra);
430     }
431     return c != 0;
432 }
433 
434 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
435                     uint64_t src)
436 {
437     return do_helper_nc(env, l, dest, src, GETPC());
438 }
439 
440 /* xor on array */
441 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
442                              uint64_t src, uintptr_t ra)
443 {
444     const int mmu_idx = cpu_mmu_index(env, false);
445     S390Access srca1, srca2, desta;
446     uint32_t i;
447     uint8_t c = 0;
448 
449     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
450                __func__, l, dest, src);
451 
452     /* XC always processes one more byte than specified - maximum is 256 */
453     l++;
454 
455     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
456     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
457     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
458 
459     /* xor with itself is the same as memset(0) */
460     if (src == dest) {
461         access_memset(env, &desta, 0, ra);
462         return 0;
463     }
464 
465     for (i = 0; i < l; i++) {
466         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
467                           access_get_byte(env, &srca2, i, ra);
468 
469         c |= x;
470         access_set_byte(env, &desta, i, x, ra);
471     }
472     return c != 0;
473 }
474 
475 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
476                     uint64_t src)
477 {
478     return do_helper_xc(env, l, dest, src, GETPC());
479 }
480 
481 /* or on array */
482 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
483                              uint64_t src, uintptr_t ra)
484 {
485     const int mmu_idx = cpu_mmu_index(env, false);
486     S390Access srca1, srca2, desta;
487     uint32_t i;
488     uint8_t c = 0;
489 
490     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
491                __func__, l, dest, src);
492 
493     /* OC always processes one more byte than specified - maximum is 256 */
494     l++;
495 
496     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
497     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
498     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
499     for (i = 0; i < l; i++) {
500         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
501                           access_get_byte(env, &srca2, i, ra);
502 
503         c |= x;
504         access_set_byte(env, &desta, i, x, ra);
505     }
506     return c != 0;
507 }
508 
509 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
510                     uint64_t src)
511 {
512     return do_helper_oc(env, l, dest, src, GETPC());
513 }
514 
515 /* memmove */
516 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
517                               uint64_t src, uintptr_t ra)
518 {
519     const int mmu_idx = cpu_mmu_index(env, false);
520     S390Access srca, desta;
521     uint32_t i;
522 
523     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
524                __func__, l, dest, src);
525 
526     /* MVC always copies one more byte than specified - maximum is 256 */
527     l++;
528 
529     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
530     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
531 
532     /*
533      * "When the operands overlap, the result is obtained as if the operands
534      * were processed one byte at a time". Only non-destructive overlaps
535      * behave like memmove().
536      */
537     if (dest == src + 1) {
538         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
539     } else if (!is_destructive_overlap(env, dest, src, l)) {
540         access_memmove(env, &desta, &srca, ra);
541     } else {
542         for (i = 0; i < l; i++) {
543             uint8_t byte = access_get_byte(env, &srca, i, ra);
544 
545             access_set_byte(env, &desta, i, byte, ra);
546         }
547     }
548 
549     return env->cc_op;
550 }
551 
552 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
553 {
554     do_helper_mvc(env, l, dest, src, GETPC());
555 }
556 
557 /* move inverse  */
558 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
559 {
560     const int mmu_idx = cpu_mmu_index(env, false);
561     S390Access srca, desta;
562     uintptr_t ra = GETPC();
563     int i;
564 
565     /* MVCIN always copies one more byte than specified - maximum is 256 */
566     l++;
567 
568     src = wrap_address(env, src - l + 1);
569     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
570     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
571     for (i = 0; i < l; i++) {
572         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
573 
574         access_set_byte(env, &desta, i, x, ra);
575     }
576 }
577 
578 /* move numerics  */
579 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
580 {
581     const int mmu_idx = cpu_mmu_index(env, false);
582     S390Access srca1, srca2, desta;
583     uintptr_t ra = GETPC();
584     int i;
585 
586     /* MVN always copies one more byte than specified - maximum is 256 */
587     l++;
588 
589     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
590     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
591     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
592     for (i = 0; i < l; i++) {
593         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
594                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
595 
596         access_set_byte(env, &desta, i, x, ra);
597     }
598 }
599 
600 /* move with offset  */
601 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
602 {
603     const int mmu_idx = cpu_mmu_index(env, false);
604     /* MVO always processes one more byte than specified - maximum is 16 */
605     const int len_dest = (l >> 4) + 1;
606     const int len_src = (l & 0xf) + 1;
607     uintptr_t ra = GETPC();
608     uint8_t byte_dest, byte_src;
609     S390Access srca, desta;
610     int i, j;
611 
612     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
613     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
614 
615     /* Handle rightmost byte */
616     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
617     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
618     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
619     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
620 
621     /* Process remaining bytes from right to left */
622     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
623         byte_dest = byte_src >> 4;
624         if (j >= 0) {
625             byte_src = access_get_byte(env, &srca, j, ra);
626         } else {
627             byte_src = 0;
628         }
629         byte_dest |= byte_src << 4;
630         access_set_byte(env, &desta, i, byte_dest, ra);
631     }
632 }
633 
634 /* move zones  */
635 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
636 {
637     const int mmu_idx = cpu_mmu_index(env, false);
638     S390Access srca1, srca2, desta;
639     uintptr_t ra = GETPC();
640     int i;
641 
642     /* MVZ always copies one more byte than specified - maximum is 256 */
643     l++;
644 
645     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
646     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
647     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
648     for (i = 0; i < l; i++) {
649         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
650                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
651 
652         access_set_byte(env, &desta, i, x, ra);
653     }
654 }
655 
656 /* compare unsigned byte arrays */
657 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
658                               uint64_t s2, uintptr_t ra)
659 {
660     uint32_t i;
661     uint32_t cc = 0;
662 
663     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
664                __func__, l, s1, s2);
665 
666     for (i = 0; i <= l; i++) {
667         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
668         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
669         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
670         if (x < y) {
671             cc = 1;
672             break;
673         } else if (x > y) {
674             cc = 2;
675             break;
676         }
677     }
678 
679     HELPER_LOG("\n");
680     return cc;
681 }
682 
683 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
684 {
685     return do_helper_clc(env, l, s1, s2, GETPC());
686 }
687 
688 /* compare logical under mask */
689 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
690                      uint64_t addr)
691 {
692     uintptr_t ra = GETPC();
693     uint32_t cc = 0;
694 
695     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
696                mask, addr);
697 
698     while (mask) {
699         if (mask & 8) {
700             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
701             uint8_t r = extract32(r1, 24, 8);
702             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
703                        addr);
704             if (r < d) {
705                 cc = 1;
706                 break;
707             } else if (r > d) {
708                 cc = 2;
709                 break;
710             }
711             addr++;
712         }
713         mask = (mask << 1) & 0xf;
714         r1 <<= 8;
715     }
716 
717     HELPER_LOG("\n");
718     return cc;
719 }
720 
721 static inline uint64_t get_address(CPUS390XState *env, int reg)
722 {
723     return wrap_address(env, env->regs[reg]);
724 }
725 
726 /*
727  * Store the address to the given register, zeroing out unused leftmost
728  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
729  */
730 static inline void set_address_zero(CPUS390XState *env, int reg,
731                                     uint64_t address)
732 {
733     if (env->psw.mask & PSW_MASK_64) {
734         env->regs[reg] = address;
735     } else {
736         if (!(env->psw.mask & PSW_MASK_32)) {
737             address &= 0x00ffffff;
738         } else {
739             address &= 0x7fffffff;
740         }
741         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
742     }
743 }
744 
745 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
746 {
747     if (env->psw.mask & PSW_MASK_64) {
748         /* 64-Bit mode */
749         env->regs[reg] = address;
750     } else {
751         if (!(env->psw.mask & PSW_MASK_32)) {
752             /* 24-Bit mode. According to the PoO it is implementation
753             dependent if bits 32-39 remain unchanged or are set to
754             zeros.  Choose the former so that the function can also be
755             used for TRT.  */
756             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
757         } else {
758             /* 31-Bit mode. According to the PoO it is implementation
759             dependent if bit 32 remains unchanged or is set to zero.
760             Choose the latter so that the function can also be used for
761             TRT.  */
762             address &= 0x7fffffff;
763             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
764         }
765     }
766 }
767 
768 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
769 {
770     if (!(env->psw.mask & PSW_MASK_64)) {
771         return (uint32_t)length;
772     }
773     return length;
774 }
775 
776 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
777 {
778     if (!(env->psw.mask & PSW_MASK_64)) {
779         /* 24-Bit and 31-Bit mode */
780         length &= 0x7fffffff;
781     }
782     return length;
783 }
784 
785 static inline uint64_t get_length(CPUS390XState *env, int reg)
786 {
787     return wrap_length31(env, env->regs[reg]);
788 }
789 
790 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
791 {
792     if (env->psw.mask & PSW_MASK_64) {
793         /* 64-Bit mode */
794         env->regs[reg] = length;
795     } else {
796         /* 24-Bit and 31-Bit mode */
797         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
798     }
799 }
800 
801 /* search string (c is byte to search, r2 is string, r1 end of string) */
802 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
803 {
804     uintptr_t ra = GETPC();
805     uint64_t end, str;
806     uint32_t len;
807     uint8_t v, c = env->regs[0];
808 
809     /* Bits 32-55 must contain all 0.  */
810     if (env->regs[0] & 0xffffff00u) {
811         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
812     }
813 
814     str = get_address(env, r2);
815     end = get_address(env, r1);
816 
817     /* Lest we fail to service interrupts in a timely manner, limit the
818        amount of work we're willing to do.  For now, let's cap at 8k.  */
819     for (len = 0; len < 0x2000; ++len) {
820         if (str + len == end) {
821             /* Character not found.  R1 & R2 are unmodified.  */
822             env->cc_op = 2;
823             return;
824         }
825         v = cpu_ldub_data_ra(env, str + len, ra);
826         if (v == c) {
827             /* Character found.  Set R1 to the location; R2 is unmodified.  */
828             env->cc_op = 1;
829             set_address(env, r1, str + len);
830             return;
831         }
832     }
833 
834     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
835     env->cc_op = 3;
836     set_address(env, r2, str + len);
837 }
838 
839 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
840 {
841     uintptr_t ra = GETPC();
842     uint32_t len;
843     uint16_t v, c = env->regs[0];
844     uint64_t end, str, adj_end;
845 
846     /* Bits 32-47 of R0 must be zero.  */
847     if (env->regs[0] & 0xffff0000u) {
848         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
849     }
850 
851     str = get_address(env, r2);
852     end = get_address(env, r1);
853 
854     /* If the LSB of the two addresses differ, use one extra byte.  */
855     adj_end = end + ((str ^ end) & 1);
856 
857     /* Lest we fail to service interrupts in a timely manner, limit the
858        amount of work we're willing to do.  For now, let's cap at 8k.  */
859     for (len = 0; len < 0x2000; len += 2) {
860         if (str + len == adj_end) {
861             /* End of input found.  */
862             env->cc_op = 2;
863             return;
864         }
865         v = cpu_lduw_data_ra(env, str + len, ra);
866         if (v == c) {
867             /* Character found.  Set R1 to the location; R2 is unmodified.  */
868             env->cc_op = 1;
869             set_address(env, r1, str + len);
870             return;
871         }
872     }
873 
874     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
875     env->cc_op = 3;
876     set_address(env, r2, str + len);
877 }
878 
879 /* unsigned string compare (c is string terminator) */
880 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
881 {
882     uintptr_t ra = GETPC();
883     uint32_t len;
884 
885     c = c & 0xff;
886     s1 = wrap_address(env, s1);
887     s2 = wrap_address(env, s2);
888 
889     /* Lest we fail to service interrupts in a timely manner, limit the
890        amount of work we're willing to do.  For now, let's cap at 8k.  */
891     for (len = 0; len < 0x2000; ++len) {
892         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
893         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
894         if (v1 == v2) {
895             if (v1 == c) {
896                 /* Equal.  CC=0, and don't advance the registers.  */
897                 env->cc_op = 0;
898                 env->retxl = s2;
899                 return s1;
900             }
901         } else {
902             /* Unequal.  CC={1,2}, and advance the registers.  Note that
903                the terminator need not be zero, but the string that contains
904                the terminator is by definition "low".  */
905             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
906             env->retxl = s2 + len;
907             return s1 + len;
908         }
909     }
910 
911     /* CPU-determined bytes equal; advance the registers.  */
912     env->cc_op = 3;
913     env->retxl = s2 + len;
914     return s1 + len;
915 }
916 
917 /* move page */
918 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
919 {
920     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
921     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
922     const int mmu_idx = cpu_mmu_index(env, false);
923     const bool f = extract64(r0, 11, 1);
924     const bool s = extract64(r0, 10, 1);
925     const bool cco = extract64(r0, 8, 1);
926     uintptr_t ra = GETPC();
927     S390Access srca, desta;
928     int exc;
929 
930     if ((f && s) || extract64(r0, 12, 4)) {
931         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
932     }
933 
934     /*
935      * We always manually handle exceptions such that we can properly store
936      * r1/r2 to the lowcore on page-translation exceptions.
937      *
938      * TODO: Access key handling
939      */
940     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
941                             MMU_DATA_LOAD, mmu_idx, ra);
942     if (exc) {
943         if (cco) {
944             return 2;
945         }
946         goto inject_exc;
947     }
948     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
949                             MMU_DATA_STORE, mmu_idx, ra);
950     if (exc) {
951         if (cco && exc != PGM_PROTECTION) {
952             return 1;
953         }
954         goto inject_exc;
955     }
956     access_memmove(env, &desta, &srca, ra);
957     return 0; /* data moved */
958 inject_exc:
959 #if !defined(CONFIG_USER_ONLY)
960     if (exc != PGM_ADDRESSING) {
961         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
962                  env->tlb_fill_tec);
963     }
964     if (exc == PGM_PAGE_TRANS) {
965         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
966                  r1 << 4 | r2);
967     }
968 #endif
969     tcg_s390_program_interrupt(env, exc, ra);
970 }
971 
972 /* string copy */
973 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
974 {
975     const int mmu_idx = cpu_mmu_index(env, false);
976     const uint64_t d = get_address(env, r1);
977     const uint64_t s = get_address(env, r2);
978     const uint8_t c = env->regs[0];
979     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
980     S390Access srca, desta;
981     uintptr_t ra = GETPC();
982     int i;
983 
984     if (env->regs[0] & 0xffffff00ull) {
985         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
986     }
987 
988     /*
989      * Our access should not exceed single pages, as we must not report access
990      * exceptions exceeding the actually copied range (which we don't know at
991      * this point). We might over-indicate watchpoints within the pages
992      * (if we ever care, we have to limit processing to a single byte).
993      */
994     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
995     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
996     for (i = 0; i < len; i++) {
997         const uint8_t v = access_get_byte(env, &srca, i, ra);
998 
999         access_set_byte(env, &desta, i, v, ra);
1000         if (v == c) {
1001             set_address_zero(env, r1, d + i);
1002             return 1;
1003         }
1004     }
1005     set_address_zero(env, r1, d + len);
1006     set_address_zero(env, r2, s + len);
1007     return 3;
1008 }
1009 
1010 /* load access registers r1 to r3 from memory at a2 */
1011 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1012 {
1013     uintptr_t ra = GETPC();
1014     int i;
1015 
1016     if (a2 & 0x3) {
1017         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1018     }
1019 
1020     for (i = r1;; i = (i + 1) % 16) {
1021         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1022         a2 += 4;
1023 
1024         if (i == r3) {
1025             break;
1026         }
1027     }
1028 }
1029 
1030 /* store access registers r1 to r3 in memory at a2 */
1031 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1032 {
1033     uintptr_t ra = GETPC();
1034     int i;
1035 
1036     if (a2 & 0x3) {
1037         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1038     }
1039 
1040     for (i = r1;; i = (i + 1) % 16) {
1041         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1042         a2 += 4;
1043 
1044         if (i == r3) {
1045             break;
1046         }
1047     }
1048 }
1049 
1050 /* move long helper */
1051 static inline uint32_t do_mvcl(CPUS390XState *env,
1052                                uint64_t *dest, uint64_t *destlen,
1053                                uint64_t *src, uint64_t *srclen,
1054                                uint16_t pad, int wordsize, uintptr_t ra)
1055 {
1056     const int mmu_idx = cpu_mmu_index(env, false);
1057     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1058     S390Access srca, desta;
1059     int i, cc;
1060 
1061     if (*destlen == *srclen) {
1062         cc = 0;
1063     } else if (*destlen < *srclen) {
1064         cc = 1;
1065     } else {
1066         cc = 2;
1067     }
1068 
1069     if (!*destlen) {
1070         return cc;
1071     }
1072 
1073     /*
1074      * Only perform one type of type of operation (move/pad) at a time.
1075      * Stay within single pages.
1076      */
1077     if (*srclen) {
1078         /* Copy the src array */
1079         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1080         *destlen -= len;
1081         *srclen -= len;
1082         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1083         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084         access_memmove(env, &desta, &srca, ra);
1085         *src = wrap_address(env, *src + len);
1086         *dest = wrap_address(env, *dest + len);
1087     } else if (wordsize == 1) {
1088         /* Pad the remaining area */
1089         *destlen -= len;
1090         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091         access_memset(env, &desta, pad, ra);
1092         *dest = wrap_address(env, *dest + len);
1093     } else {
1094         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095 
1096         /* The remaining length selects the padding byte. */
1097         for (i = 0; i < len; (*destlen)--, i++) {
1098             if (*destlen & 1) {
1099                 access_set_byte(env, &desta, i, pad, ra);
1100             } else {
1101                 access_set_byte(env, &desta, i, pad >> 8, ra);
1102             }
1103         }
1104         *dest = wrap_address(env, *dest + len);
1105     }
1106 
1107     return *destlen ? 3 : cc;
1108 }
1109 
1110 /* move long */
1111 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1112 {
1113     const int mmu_idx = cpu_mmu_index(env, false);
1114     uintptr_t ra = GETPC();
1115     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1116     uint64_t dest = get_address(env, r1);
1117     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1118     uint64_t src = get_address(env, r2);
1119     uint8_t pad = env->regs[r2 + 1] >> 24;
1120     CPUState *cs = env_cpu(env);
1121     S390Access srca, desta;
1122     uint32_t cc, cur_len;
1123 
1124     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1125         cc = 3;
1126     } else if (srclen == destlen) {
1127         cc = 0;
1128     } else if (destlen < srclen) {
1129         cc = 1;
1130     } else {
1131         cc = 2;
1132     }
1133 
1134     /* We might have to zero-out some bits even if there was no action. */
1135     if (unlikely(!destlen || cc == 3)) {
1136         set_address_zero(env, r2, src);
1137         set_address_zero(env, r1, dest);
1138         return cc;
1139     } else if (!srclen) {
1140         set_address_zero(env, r2, src);
1141     }
1142 
1143     /*
1144      * Only perform one type of type of operation (move/pad) in one step.
1145      * Stay within single pages.
1146      */
1147     while (destlen) {
1148         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1149         if (!srclen) {
1150             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                    ra);
1152             access_memset(env, &desta, pad, ra);
1153         } else {
1154             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1155 
1156             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1157                                   ra);
1158             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1159                                    ra);
1160             access_memmove(env, &desta, &srca, ra);
1161             src = wrap_address(env, src + cur_len);
1162             srclen -= cur_len;
1163             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1164             set_address_zero(env, r2, src);
1165         }
1166         dest = wrap_address(env, dest + cur_len);
1167         destlen -= cur_len;
1168         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1169         set_address_zero(env, r1, dest);
1170 
1171         /*
1172          * MVCL is interruptible. Return to the main loop if requested after
1173          * writing back all state to registers. If no interrupt will get
1174          * injected, we'll end up back in this handler and continue processing
1175          * the remaining parts.
1176          */
1177         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1178             cpu_loop_exit_restore(cs, ra);
1179         }
1180     }
1181     return cc;
1182 }
1183 
1184 /* move long extended */
1185 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1186                        uint32_t r3)
1187 {
1188     uintptr_t ra = GETPC();
1189     uint64_t destlen = get_length(env, r1 + 1);
1190     uint64_t dest = get_address(env, r1);
1191     uint64_t srclen = get_length(env, r3 + 1);
1192     uint64_t src = get_address(env, r3);
1193     uint8_t pad = a2;
1194     uint32_t cc;
1195 
1196     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1197 
1198     set_length(env, r1 + 1, destlen);
1199     set_length(env, r3 + 1, srclen);
1200     set_address(env, r1, dest);
1201     set_address(env, r3, src);
1202 
1203     return cc;
1204 }
1205 
1206 /* move long unicode */
1207 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1208                        uint32_t r3)
1209 {
1210     uintptr_t ra = GETPC();
1211     uint64_t destlen = get_length(env, r1 + 1);
1212     uint64_t dest = get_address(env, r1);
1213     uint64_t srclen = get_length(env, r3 + 1);
1214     uint64_t src = get_address(env, r3);
1215     uint16_t pad = a2;
1216     uint32_t cc;
1217 
1218     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1219 
1220     set_length(env, r1 + 1, destlen);
1221     set_length(env, r3 + 1, srclen);
1222     set_address(env, r1, dest);
1223     set_address(env, r3, src);
1224 
1225     return cc;
1226 }
1227 
1228 /* compare logical long helper */
1229 static inline uint32_t do_clcl(CPUS390XState *env,
1230                                uint64_t *src1, uint64_t *src1len,
1231                                uint64_t *src3, uint64_t *src3len,
1232                                uint16_t pad, uint64_t limit,
1233                                int wordsize, uintptr_t ra)
1234 {
1235     uint64_t len = MAX(*src1len, *src3len);
1236     uint32_t cc = 0;
1237 
1238     check_alignment(env, *src1len | *src3len, wordsize, ra);
1239 
1240     if (!len) {
1241         return cc;
1242     }
1243 
1244     /* Lest we fail to service interrupts in a timely manner, limit the
1245        amount of work we're willing to do.  */
1246     if (len > limit) {
1247         len = limit;
1248         cc = 3;
1249     }
1250 
1251     for (; len; len -= wordsize) {
1252         uint16_t v1 = pad;
1253         uint16_t v3 = pad;
1254 
1255         if (*src1len) {
1256             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1257         }
1258         if (*src3len) {
1259             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1260         }
1261 
1262         if (v1 != v3) {
1263             cc = (v1 < v3) ? 1 : 2;
1264             break;
1265         }
1266 
1267         if (*src1len) {
1268             *src1 += wordsize;
1269             *src1len -= wordsize;
1270         }
1271         if (*src3len) {
1272             *src3 += wordsize;
1273             *src3len -= wordsize;
1274         }
1275     }
1276 
1277     return cc;
1278 }
1279 
1280 
1281 /* compare logical long */
1282 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1283 {
1284     uintptr_t ra = GETPC();
1285     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1286     uint64_t src1 = get_address(env, r1);
1287     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1288     uint64_t src3 = get_address(env, r2);
1289     uint8_t pad = env->regs[r2 + 1] >> 24;
1290     uint32_t cc;
1291 
1292     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1293 
1294     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1295     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1296     set_address(env, r1, src1);
1297     set_address(env, r2, src3);
1298 
1299     return cc;
1300 }
1301 
1302 /* compare logical long extended memcompare insn with padding */
1303 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1304                        uint32_t r3)
1305 {
1306     uintptr_t ra = GETPC();
1307     uint64_t src1len = get_length(env, r1 + 1);
1308     uint64_t src1 = get_address(env, r1);
1309     uint64_t src3len = get_length(env, r3 + 1);
1310     uint64_t src3 = get_address(env, r3);
1311     uint8_t pad = a2;
1312     uint32_t cc;
1313 
1314     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1315 
1316     set_length(env, r1 + 1, src1len);
1317     set_length(env, r3 + 1, src3len);
1318     set_address(env, r1, src1);
1319     set_address(env, r3, src3);
1320 
1321     return cc;
1322 }
1323 
1324 /* compare logical long unicode memcompare insn with padding */
1325 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1326                        uint32_t r3)
1327 {
1328     uintptr_t ra = GETPC();
1329     uint64_t src1len = get_length(env, r1 + 1);
1330     uint64_t src1 = get_address(env, r1);
1331     uint64_t src3len = get_length(env, r3 + 1);
1332     uint64_t src3 = get_address(env, r3);
1333     uint16_t pad = a2;
1334     uint32_t cc = 0;
1335 
1336     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1337 
1338     set_length(env, r1 + 1, src1len);
1339     set_length(env, r3 + 1, src3len);
1340     set_address(env, r1, src1);
1341     set_address(env, r3, src3);
1342 
1343     return cc;
1344 }
1345 
1346 /* checksum */
1347 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1348                       uint64_t src, uint64_t src_len)
1349 {
1350     uintptr_t ra = GETPC();
1351     uint64_t max_len, len;
1352     uint64_t cksm = (uint32_t)r1;
1353 
1354     /* Lest we fail to service interrupts in a timely manner, limit the
1355        amount of work we're willing to do.  For now, let's cap at 8k.  */
1356     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1357 
1358     /* Process full words as available.  */
1359     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1360         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1361     }
1362 
1363     switch (max_len - len) {
1364     case 1:
1365         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1366         len += 1;
1367         break;
1368     case 2:
1369         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1370         len += 2;
1371         break;
1372     case 3:
1373         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1374         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1375         len += 3;
1376         break;
1377     }
1378 
1379     /* Fold the carry from the checksum.  Note that we can see carry-out
1380        during folding more than once (but probably not more than twice).  */
1381     while (cksm > 0xffffffffull) {
1382         cksm = (uint32_t)cksm + (cksm >> 32);
1383     }
1384 
1385     /* Indicate whether or not we've processed everything.  */
1386     env->cc_op = (len == src_len ? 0 : 3);
1387 
1388     /* Return both cksm and processed length.  */
1389     env->retxl = cksm;
1390     return len;
1391 }
1392 
1393 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1394 {
1395     uintptr_t ra = GETPC();
1396     int len_dest = len >> 4;
1397     int len_src = len & 0xf;
1398     uint8_t b;
1399 
1400     dest += len_dest;
1401     src += len_src;
1402 
1403     /* last byte is special, it only flips the nibbles */
1404     b = cpu_ldub_data_ra(env, src, ra);
1405     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1406     src--;
1407     len_src--;
1408 
1409     /* now pack every value */
1410     while (len_dest > 0) {
1411         b = 0;
1412 
1413         if (len_src >= 0) {
1414             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415             src--;
1416             len_src--;
1417         }
1418         if (len_src >= 0) {
1419             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1420             src--;
1421             len_src--;
1422         }
1423 
1424         len_dest--;
1425         dest--;
1426         cpu_stb_data_ra(env, dest, b, ra);
1427     }
1428 }
1429 
1430 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1431                            uint32_t srclen, int ssize, uintptr_t ra)
1432 {
1433     int i;
1434     /* The destination operand is always 16 bytes long.  */
1435     const int destlen = 16;
1436 
1437     /* The operands are processed from right to left.  */
1438     src += srclen - 1;
1439     dest += destlen - 1;
1440 
1441     for (i = 0; i < destlen; i++) {
1442         uint8_t b = 0;
1443 
1444         /* Start with a positive sign */
1445         if (i == 0) {
1446             b = 0xc;
1447         } else if (srclen > ssize) {
1448             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1449             src -= ssize;
1450             srclen -= ssize;
1451         }
1452 
1453         if (srclen > ssize) {
1454             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1455             src -= ssize;
1456             srclen -= ssize;
1457         }
1458 
1459         cpu_stb_data_ra(env, dest, b, ra);
1460         dest--;
1461     }
1462 }
1463 
1464 
1465 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1466                  uint32_t srclen)
1467 {
1468     do_pkau(env, dest, src, srclen, 1, GETPC());
1469 }
1470 
1471 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                  uint32_t srclen)
1473 {
1474     do_pkau(env, dest, src, srclen, 2, GETPC());
1475 }
1476 
1477 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1478                   uint64_t src)
1479 {
1480     uintptr_t ra = GETPC();
1481     int len_dest = len >> 4;
1482     int len_src = len & 0xf;
1483     uint8_t b;
1484     int second_nibble = 0;
1485 
1486     dest += len_dest;
1487     src += len_src;
1488 
1489     /* last byte is special, it only flips the nibbles */
1490     b = cpu_ldub_data_ra(env, src, ra);
1491     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1492     src--;
1493     len_src--;
1494 
1495     /* now pad every nibble with 0xf0 */
1496 
1497     while (len_dest > 0) {
1498         uint8_t cur_byte = 0;
1499 
1500         if (len_src > 0) {
1501             cur_byte = cpu_ldub_data_ra(env, src, ra);
1502         }
1503 
1504         len_dest--;
1505         dest--;
1506 
1507         /* only advance one nibble at a time */
1508         if (second_nibble) {
1509             cur_byte >>= 4;
1510             len_src--;
1511             src--;
1512         }
1513         second_nibble = !second_nibble;
1514 
1515         /* digit */
1516         cur_byte = (cur_byte & 0xf);
1517         /* zone bits */
1518         cur_byte |= 0xf0;
1519 
1520         cpu_stb_data_ra(env, dest, cur_byte, ra);
1521     }
1522 }
1523 
1524 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1525                                  uint32_t destlen, int dsize, uint64_t src,
1526                                  uintptr_t ra)
1527 {
1528     int i;
1529     uint32_t cc;
1530     uint8_t b;
1531     /* The source operand is always 16 bytes long.  */
1532     const int srclen = 16;
1533 
1534     /* The operands are processed from right to left.  */
1535     src += srclen - 1;
1536     dest += destlen - dsize;
1537 
1538     /* Check for the sign.  */
1539     b = cpu_ldub_data_ra(env, src, ra);
1540     src--;
1541     switch (b & 0xf) {
1542     case 0xa:
1543     case 0xc:
1544     case 0xe ... 0xf:
1545         cc = 0;  /* plus */
1546         break;
1547     case 0xb:
1548     case 0xd:
1549         cc = 1;  /* minus */
1550         break;
1551     default:
1552     case 0x0 ... 0x9:
1553         cc = 3;  /* invalid */
1554         break;
1555     }
1556 
1557     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1558     for (i = 0; i < destlen; i += dsize) {
1559         if (i == (31 * dsize)) {
1560             /* If length is 32/64 bytes, the leftmost byte is 0. */
1561             b = 0;
1562         } else if (i % (2 * dsize)) {
1563             b = cpu_ldub_data_ra(env, src, ra);
1564             src--;
1565         } else {
1566             b >>= 4;
1567         }
1568         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1569         dest -= dsize;
1570     }
1571 
1572     return cc;
1573 }
1574 
1575 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1576                        uint64_t src)
1577 {
1578     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1579 }
1580 
1581 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                        uint64_t src)
1583 {
1584     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1585 }
1586 
1587 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1588 {
1589     uintptr_t ra = GETPC();
1590     uint32_t cc = 0;
1591     int i;
1592 
1593     for (i = 0; i < destlen; i++) {
1594         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1595         /* digit */
1596         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1597 
1598         if (i == (destlen - 1)) {
1599             /* sign */
1600             cc |= (b & 0xf) < 0xa ? 1 : 0;
1601         } else {
1602             /* digit */
1603             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1604         }
1605     }
1606 
1607     return cc;
1608 }
1609 
1610 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1611                              uint64_t trans, uintptr_t ra)
1612 {
1613     uint32_t i;
1614 
1615     for (i = 0; i <= len; i++) {
1616         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1617         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1618         cpu_stb_data_ra(env, array + i, new_byte, ra);
1619     }
1620 
1621     return env->cc_op;
1622 }
1623 
1624 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1625                 uint64_t trans)
1626 {
1627     do_helper_tr(env, len, array, trans, GETPC());
1628 }
1629 
1630 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1631                      uint64_t len, uint64_t trans)
1632 {
1633     uintptr_t ra = GETPC();
1634     uint8_t end = env->regs[0] & 0xff;
1635     uint64_t l = len;
1636     uint64_t i;
1637     uint32_t cc = 0;
1638 
1639     if (!(env->psw.mask & PSW_MASK_64)) {
1640         array &= 0x7fffffff;
1641         l = (uint32_t)l;
1642     }
1643 
1644     /* Lest we fail to service interrupts in a timely manner, limit the
1645        amount of work we're willing to do.  For now, let's cap at 8k.  */
1646     if (l > 0x2000) {
1647         l = 0x2000;
1648         cc = 3;
1649     }
1650 
1651     for (i = 0; i < l; i++) {
1652         uint8_t byte, new_byte;
1653 
1654         byte = cpu_ldub_data_ra(env, array + i, ra);
1655 
1656         if (byte == end) {
1657             cc = 1;
1658             break;
1659         }
1660 
1661         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1662         cpu_stb_data_ra(env, array + i, new_byte, ra);
1663     }
1664 
1665     env->cc_op = cc;
1666     env->retxl = len - i;
1667     return array + i;
1668 }
1669 
1670 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1671                                      uint64_t array, uint64_t trans,
1672                                      int inc, uintptr_t ra)
1673 {
1674     int i;
1675 
1676     for (i = 0; i <= len; i++) {
1677         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1678         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1679 
1680         if (sbyte != 0) {
1681             set_address(env, 1, array + i * inc);
1682             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1683             return (i == len) ? 2 : 1;
1684         }
1685     }
1686 
1687     return 0;
1688 }
1689 
1690 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1691                                   uint64_t array, uint64_t trans,
1692                                   uintptr_t ra)
1693 {
1694     return do_helper_trt(env, len, array, trans, 1, ra);
1695 }
1696 
1697 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1698                      uint64_t trans)
1699 {
1700     return do_helper_trt(env, len, array, trans, 1, GETPC());
1701 }
1702 
1703 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1704                                    uint64_t array, uint64_t trans,
1705                                    uintptr_t ra)
1706 {
1707     return do_helper_trt(env, len, array, trans, -1, ra);
1708 }
1709 
1710 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                       uint64_t trans)
1712 {
1713     return do_helper_trt(env, len, array, trans, -1, GETPC());
1714 }
1715 
1716 /* Translate one/two to one/two */
1717 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1718                       uint32_t tst, uint32_t sizes)
1719 {
1720     uintptr_t ra = GETPC();
1721     int dsize = (sizes & 1) ? 1 : 2;
1722     int ssize = (sizes & 2) ? 1 : 2;
1723     uint64_t tbl = get_address(env, 1);
1724     uint64_t dst = get_address(env, r1);
1725     uint64_t len = get_length(env, r1 + 1);
1726     uint64_t src = get_address(env, r2);
1727     uint32_t cc = 3;
1728     int i;
1729 
1730     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1731        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1732        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1733     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1734         tbl &= -4096;
1735     } else {
1736         tbl &= -8;
1737     }
1738 
1739     check_alignment(env, len, ssize, ra);
1740 
1741     /* Lest we fail to service interrupts in a timely manner, */
1742     /* limit the amount of work we're willing to do.   */
1743     for (i = 0; i < 0x2000; i++) {
1744         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1745         uint64_t tble = tbl + (sval * dsize);
1746         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1747         if (dval == tst) {
1748             cc = 1;
1749             break;
1750         }
1751         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1752 
1753         len -= ssize;
1754         src += ssize;
1755         dst += dsize;
1756 
1757         if (len == 0) {
1758             cc = 0;
1759             break;
1760         }
1761     }
1762 
1763     set_address(env, r1, dst);
1764     set_length(env, r1 + 1, len);
1765     set_address(env, r2, src);
1766 
1767     return cc;
1768 }
1769 
1770 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1771                   uint32_t r1, uint32_t r3)
1772 {
1773     uintptr_t ra = GETPC();
1774     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1775     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1776     Int128 oldv;
1777     uint64_t oldh, oldl;
1778     bool fail;
1779 
1780     check_alignment(env, addr, 16, ra);
1781 
1782     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1783     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1784 
1785     oldv = int128_make128(oldl, oldh);
1786     fail = !int128_eq(oldv, cmpv);
1787     if (fail) {
1788         newv = oldv;
1789     }
1790 
1791     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1792     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1793 
1794     env->cc_op = fail;
1795     env->regs[r1] = int128_gethi(oldv);
1796     env->regs[r1 + 1] = int128_getlo(oldv);
1797 }
1798 
1799 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1800                            uint32_t r1, uint32_t r3)
1801 {
1802     uintptr_t ra = GETPC();
1803     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1804     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1805     int mem_idx;
1806     TCGMemOpIdx oi;
1807     Int128 oldv;
1808     bool fail;
1809 
1810     assert(HAVE_CMPXCHG128);
1811 
1812     mem_idx = cpu_mmu_index(env, false);
1813     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1814     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1815     fail = !int128_eq(oldv, cmpv);
1816 
1817     env->cc_op = fail;
1818     env->regs[r1] = int128_gethi(oldv);
1819     env->regs[r1 + 1] = int128_getlo(oldv);
1820 }
1821 
1822 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1823                         uint64_t a2, bool parallel)
1824 {
1825     uint32_t mem_idx = cpu_mmu_index(env, false);
1826     uintptr_t ra = GETPC();
1827     uint32_t fc = extract32(env->regs[0], 0, 8);
1828     uint32_t sc = extract32(env->regs[0], 8, 8);
1829     uint64_t pl = get_address(env, 1) & -16;
1830     uint64_t svh, svl;
1831     uint32_t cc;
1832 
1833     /* Sanity check the function code and storage characteristic.  */
1834     if (fc > 1 || sc > 3) {
1835         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1836             goto spec_exception;
1837         }
1838         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1839             goto spec_exception;
1840         }
1841     }
1842 
1843     /* Sanity check the alignments.  */
1844     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1845         goto spec_exception;
1846     }
1847 
1848     /* Sanity check writability of the store address.  */
1849     probe_write(env, a2, 1 << sc, mem_idx, ra);
1850 
1851     /*
1852      * Note that the compare-and-swap is atomic, and the store is atomic,
1853      * but the complete operation is not.  Therefore we do not need to
1854      * assert serial context in order to implement this.  That said,
1855      * restart early if we can't support either operation that is supposed
1856      * to be atomic.
1857      */
1858     if (parallel) {
1859         uint32_t max = 2;
1860 #ifdef CONFIG_ATOMIC64
1861         max = 3;
1862 #endif
1863         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1864             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1865             cpu_loop_exit_atomic(env_cpu(env), ra);
1866         }
1867     }
1868 
1869     /* All loads happen before all stores.  For simplicity, load the entire
1870        store value area from the parameter list.  */
1871     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1872     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1873 
1874     switch (fc) {
1875     case 0:
1876         {
1877             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1878             uint32_t cv = env->regs[r3];
1879             uint32_t ov;
1880 
1881             if (parallel) {
1882 #ifdef CONFIG_USER_ONLY
1883                 uint32_t *haddr = g2h(env_cpu(env), a1);
1884                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1885 #else
1886                 TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1887                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1888 #endif
1889             } else {
1890                 ov = cpu_ldl_data_ra(env, a1, ra);
1891                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1892             }
1893             cc = (ov != cv);
1894             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1895         }
1896         break;
1897 
1898     case 1:
1899         {
1900             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1901             uint64_t cv = env->regs[r3];
1902             uint64_t ov;
1903 
1904             if (parallel) {
1905 #ifdef CONFIG_ATOMIC64
1906                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1907                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1908 #else
1909                 /* Note that we asserted !parallel above.  */
1910                 g_assert_not_reached();
1911 #endif
1912             } else {
1913                 ov = cpu_ldq_data_ra(env, a1, ra);
1914                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1915             }
1916             cc = (ov != cv);
1917             env->regs[r3] = ov;
1918         }
1919         break;
1920 
1921     case 2:
1922         {
1923             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1924             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1925             Int128 nv = int128_make128(nvl, nvh);
1926             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1927             Int128 ov;
1928 
1929             if (!parallel) {
1930                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1931                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1932 
1933                 ov = int128_make128(ol, oh);
1934                 cc = !int128_eq(ov, cv);
1935                 if (cc) {
1936                     nv = ov;
1937                 }
1938 
1939                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1940                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1941             } else if (HAVE_CMPXCHG128) {
1942                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1943                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1944                 cc = !int128_eq(ov, cv);
1945             } else {
1946                 /* Note that we asserted !parallel above.  */
1947                 g_assert_not_reached();
1948             }
1949 
1950             env->regs[r3 + 0] = int128_gethi(ov);
1951             env->regs[r3 + 1] = int128_getlo(ov);
1952         }
1953         break;
1954 
1955     default:
1956         g_assert_not_reached();
1957     }
1958 
1959     /* Store only if the comparison succeeded.  Note that above we use a pair
1960        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1961        from the most-significant bits of svh.  */
1962     if (cc == 0) {
1963         switch (sc) {
1964         case 0:
1965             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1966             break;
1967         case 1:
1968             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1969             break;
1970         case 2:
1971             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1972             break;
1973         case 3:
1974             cpu_stq_data_ra(env, a2, svh, ra);
1975             break;
1976         case 4:
1977             if (!parallel) {
1978                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1979                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1980             } else if (HAVE_ATOMIC128) {
1981                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1982                 Int128 sv = int128_make128(svl, svh);
1983                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1984             } else {
1985                 /* Note that we asserted !parallel above.  */
1986                 g_assert_not_reached();
1987             }
1988             break;
1989         default:
1990             g_assert_not_reached();
1991         }
1992     }
1993 
1994     return cc;
1995 
1996  spec_exception:
1997     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1998 }
1999 
2000 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2001 {
2002     return do_csst(env, r3, a1, a2, false);
2003 }
2004 
2005 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2006                                uint64_t a2)
2007 {
2008     return do_csst(env, r3, a1, a2, true);
2009 }
2010 
2011 #if !defined(CONFIG_USER_ONLY)
2012 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2013 {
2014     uintptr_t ra = GETPC();
2015     bool PERchanged = false;
2016     uint64_t src = a2;
2017     uint32_t i;
2018 
2019     if (src & 0x7) {
2020         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2021     }
2022 
2023     for (i = r1;; i = (i + 1) % 16) {
2024         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2025         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2026             PERchanged = true;
2027         }
2028         env->cregs[i] = val;
2029         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2030                    i, src, val);
2031         src += sizeof(uint64_t);
2032 
2033         if (i == r3) {
2034             break;
2035         }
2036     }
2037 
2038     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2039         s390_cpu_recompute_watchpoints(env_cpu(env));
2040     }
2041 
2042     tlb_flush(env_cpu(env));
2043 }
2044 
2045 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2046 {
2047     uintptr_t ra = GETPC();
2048     bool PERchanged = false;
2049     uint64_t src = a2;
2050     uint32_t i;
2051 
2052     if (src & 0x3) {
2053         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2054     }
2055 
2056     for (i = r1;; i = (i + 1) % 16) {
2057         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2058         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2059             PERchanged = true;
2060         }
2061         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2062         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2063         src += sizeof(uint32_t);
2064 
2065         if (i == r3) {
2066             break;
2067         }
2068     }
2069 
2070     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2071         s390_cpu_recompute_watchpoints(env_cpu(env));
2072     }
2073 
2074     tlb_flush(env_cpu(env));
2075 }
2076 
2077 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2078 {
2079     uintptr_t ra = GETPC();
2080     uint64_t dest = a2;
2081     uint32_t i;
2082 
2083     if (dest & 0x7) {
2084         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2085     }
2086 
2087     for (i = r1;; i = (i + 1) % 16) {
2088         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2089         dest += sizeof(uint64_t);
2090 
2091         if (i == r3) {
2092             break;
2093         }
2094     }
2095 }
2096 
2097 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2098 {
2099     uintptr_t ra = GETPC();
2100     uint64_t dest = a2;
2101     uint32_t i;
2102 
2103     if (dest & 0x3) {
2104         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2105     }
2106 
2107     for (i = r1;; i = (i + 1) % 16) {
2108         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2109         dest += sizeof(uint32_t);
2110 
2111         if (i == r3) {
2112             break;
2113         }
2114     }
2115 }
2116 
2117 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2118 {
2119     uintptr_t ra = GETPC();
2120     int i;
2121 
2122     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2123 
2124     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2125         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2126     }
2127 
2128     return 0;
2129 }
2130 
2131 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2132 {
2133     S390CPU *cpu = env_archcpu(env);
2134     CPUState *cs = env_cpu(env);
2135 
2136     /*
2137      * TODO: we currently don't handle all access protection types
2138      * (including access-list and key-controlled) as well as AR mode.
2139      */
2140     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2141         /* Fetching permitted; storing permitted */
2142         return 0;
2143     }
2144 
2145     if (env->int_pgm_code == PGM_PROTECTION) {
2146         /* retry if reading is possible */
2147         cs->exception_index = -1;
2148         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2149             /* Fetching permitted; storing not permitted */
2150             return 1;
2151         }
2152     }
2153 
2154     switch (env->int_pgm_code) {
2155     case PGM_PROTECTION:
2156         /* Fetching not permitted; storing not permitted */
2157         cs->exception_index = -1;
2158         return 2;
2159     case PGM_ADDRESSING:
2160     case PGM_TRANS_SPEC:
2161         /* exceptions forwarded to the guest */
2162         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2163         return 0;
2164     }
2165 
2166     /* Translation not available */
2167     cs->exception_index = -1;
2168     return 3;
2169 }
2170 
2171 /* insert storage key extended */
2172 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2173 {
2174     MachineState *ms = MACHINE(qdev_get_machine());
2175     static S390SKeysState *ss;
2176     static S390SKeysClass *skeyclass;
2177     uint64_t addr = wrap_address(env, r2);
2178     uint8_t key;
2179 
2180     if (addr > ms->ram_size) {
2181         return 0;
2182     }
2183 
2184     if (unlikely(!ss)) {
2185         ss = s390_get_skeys_device();
2186         skeyclass = S390_SKEYS_GET_CLASS(ss);
2187     }
2188 
2189     if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
2190         return 0;
2191     }
2192     return key;
2193 }
2194 
2195 /* set storage key extended */
2196 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2197 {
2198     MachineState *ms = MACHINE(qdev_get_machine());
2199     static S390SKeysState *ss;
2200     static S390SKeysClass *skeyclass;
2201     uint64_t addr = wrap_address(env, r2);
2202     uint8_t key;
2203 
2204     if (addr > ms->ram_size) {
2205         return;
2206     }
2207 
2208     if (unlikely(!ss)) {
2209         ss = s390_get_skeys_device();
2210         skeyclass = S390_SKEYS_GET_CLASS(ss);
2211     }
2212 
2213     key = (uint8_t) r1;
2214     skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2215    /*
2216     * As we can only flush by virtual address and not all the entries
2217     * that point to a physical address we have to flush the whole TLB.
2218     */
2219     tlb_flush_all_cpus_synced(env_cpu(env));
2220 }
2221 
2222 /* reset reference bit extended */
2223 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2224 {
2225     MachineState *ms = MACHINE(qdev_get_machine());
2226     static S390SKeysState *ss;
2227     static S390SKeysClass *skeyclass;
2228     uint8_t re, key;
2229 
2230     if (r2 > ms->ram_size) {
2231         return 0;
2232     }
2233 
2234     if (unlikely(!ss)) {
2235         ss = s390_get_skeys_device();
2236         skeyclass = S390_SKEYS_GET_CLASS(ss);
2237     }
2238 
2239     if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2240         return 0;
2241     }
2242 
2243     re = key & (SK_R | SK_C);
2244     key &= ~SK_R;
2245 
2246     if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2247         return 0;
2248     }
2249    /*
2250     * As we can only flush by virtual address and not all the entries
2251     * that point to a physical address we have to flush the whole TLB.
2252     */
2253     tlb_flush_all_cpus_synced(env_cpu(env));
2254 
2255     /*
2256      * cc
2257      *
2258      * 0  Reference bit zero; change bit zero
2259      * 1  Reference bit zero; change bit one
2260      * 2  Reference bit one; change bit zero
2261      * 3  Reference bit one; change bit one
2262      */
2263 
2264     return re >> 1;
2265 }
2266 
2267 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2268 {
2269     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2270     S390Access srca, desta;
2271     uintptr_t ra = GETPC();
2272     int cc = 0;
2273 
2274     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2275                __func__, l, a1, a2);
2276 
2277     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2278         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2279         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2280     }
2281 
2282     l = wrap_length32(env, l);
2283     if (l > 256) {
2284         /* max 256 */
2285         l = 256;
2286         cc = 3;
2287     } else if (!l) {
2288         return cc;
2289     }
2290 
2291     /* TODO: Access key handling */
2292     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2293     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2294     access_memmove(env, &desta, &srca, ra);
2295     return cc;
2296 }
2297 
2298 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2299 {
2300     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2301     S390Access srca, desta;
2302     uintptr_t ra = GETPC();
2303     int cc = 0;
2304 
2305     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2306                __func__, l, a1, a2);
2307 
2308     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2309         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2310         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2311     }
2312 
2313     l = wrap_length32(env, l);
2314     if (l > 256) {
2315         /* max 256 */
2316         l = 256;
2317         cc = 3;
2318     } else if (!l) {
2319         return cc;
2320     }
2321 
2322     /* TODO: Access key handling */
2323     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2324     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2325     access_memmove(env, &desta, &srca, ra);
2326     return cc;
2327 }
2328 
2329 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2330 {
2331     CPUState *cs = env_cpu(env);
2332     const uintptr_t ra = GETPC();
2333     uint64_t table, entry, raddr;
2334     uint16_t entries, i, index = 0;
2335 
2336     if (r2 & 0xff000) {
2337         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2338     }
2339 
2340     if (!(r2 & 0x800)) {
2341         /* invalidation-and-clearing operation */
2342         table = r1 & ASCE_ORIGIN;
2343         entries = (r2 & 0x7ff) + 1;
2344 
2345         switch (r1 & ASCE_TYPE_MASK) {
2346         case ASCE_TYPE_REGION1:
2347             index = (r2 >> 53) & 0x7ff;
2348             break;
2349         case ASCE_TYPE_REGION2:
2350             index = (r2 >> 42) & 0x7ff;
2351             break;
2352         case ASCE_TYPE_REGION3:
2353             index = (r2 >> 31) & 0x7ff;
2354             break;
2355         case ASCE_TYPE_SEGMENT:
2356             index = (r2 >> 20) & 0x7ff;
2357             break;
2358         }
2359         for (i = 0; i < entries; i++) {
2360             /* addresses are not wrapped in 24/31bit mode but table index is */
2361             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2362             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2363             if (!(entry & REGION_ENTRY_I)) {
2364                 /* we are allowed to not store if already invalid */
2365                 entry |= REGION_ENTRY_I;
2366                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2367             }
2368         }
2369     }
2370 
2371     /* We simply flush the complete tlb, therefore we can ignore r3. */
2372     if (m4 & 1) {
2373         tlb_flush(cs);
2374     } else {
2375         tlb_flush_all_cpus_synced(cs);
2376     }
2377 }
2378 
2379 /* invalidate pte */
2380 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2381                   uint32_t m4)
2382 {
2383     CPUState *cs = env_cpu(env);
2384     const uintptr_t ra = GETPC();
2385     uint64_t page = vaddr & TARGET_PAGE_MASK;
2386     uint64_t pte_addr, pte;
2387 
2388     /* Compute the page table entry address */
2389     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2390     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2391 
2392     /* Mark the page table entry as invalid */
2393     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2394     pte |= PAGE_ENTRY_I;
2395     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2396 
2397     /* XXX we exploit the fact that Linux passes the exact virtual
2398        address here - it's not obliged to! */
2399     if (m4 & 1) {
2400         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2401             tlb_flush_page(cs, page);
2402             /* XXX 31-bit hack */
2403             tlb_flush_page(cs, page ^ 0x80000000);
2404         } else {
2405             /* looks like we don't have a valid virtual address */
2406             tlb_flush(cs);
2407         }
2408     } else {
2409         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2410             tlb_flush_page_all_cpus_synced(cs, page);
2411             /* XXX 31-bit hack */
2412             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2413         } else {
2414             /* looks like we don't have a valid virtual address */
2415             tlb_flush_all_cpus_synced(cs);
2416         }
2417     }
2418 }
2419 
2420 /* flush local tlb */
2421 void HELPER(ptlb)(CPUS390XState *env)
2422 {
2423     tlb_flush(env_cpu(env));
2424 }
2425 
2426 /* flush global tlb */
2427 void HELPER(purge)(CPUS390XState *env)
2428 {
2429     tlb_flush_all_cpus_synced(env_cpu(env));
2430 }
2431 
2432 /* load real address */
2433 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2434 {
2435     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2436     uint64_t ret, tec;
2437     int flags, exc, cc;
2438 
2439     /* XXX incomplete - has more corner cases */
2440     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2441         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2442     }
2443 
2444     exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
2445     if (exc) {
2446         cc = 3;
2447         ret = exc | 0x80000000;
2448     } else {
2449         cc = 0;
2450         ret |= addr & ~TARGET_PAGE_MASK;
2451     }
2452 
2453     env->cc_op = cc;
2454     return ret;
2455 }
2456 #endif
2457 
2458 /* load pair from quadword */
2459 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2460 {
2461     uintptr_t ra = GETPC();
2462     uint64_t hi, lo;
2463 
2464     check_alignment(env, addr, 16, ra);
2465     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2466     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2467 
2468     env->retxl = lo;
2469     return hi;
2470 }
2471 
2472 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2473 {
2474     uintptr_t ra = GETPC();
2475     uint64_t hi, lo;
2476     int mem_idx;
2477     TCGMemOpIdx oi;
2478     Int128 v;
2479 
2480     assert(HAVE_ATOMIC128);
2481 
2482     mem_idx = cpu_mmu_index(env, false);
2483     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2484     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2485     hi = int128_gethi(v);
2486     lo = int128_getlo(v);
2487 
2488     env->retxl = lo;
2489     return hi;
2490 }
2491 
2492 /* store pair to quadword */
2493 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2494                   uint64_t low, uint64_t high)
2495 {
2496     uintptr_t ra = GETPC();
2497 
2498     check_alignment(env, addr, 16, ra);
2499     cpu_stq_data_ra(env, addr + 0, high, ra);
2500     cpu_stq_data_ra(env, addr + 8, low, ra);
2501 }
2502 
2503 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2504                            uint64_t low, uint64_t high)
2505 {
2506     uintptr_t ra = GETPC();
2507     int mem_idx;
2508     TCGMemOpIdx oi;
2509     Int128 v;
2510 
2511     assert(HAVE_ATOMIC128);
2512 
2513     mem_idx = cpu_mmu_index(env, false);
2514     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2515     v = int128_make128(low, high);
2516     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2517 }
2518 
2519 /* Execute instruction.  This instruction executes an insn modified with
2520    the contents of r1.  It does not change the executed instruction in memory;
2521    it does not change the program counter.
2522 
2523    Perform this by recording the modified instruction in env->ex_value.
2524    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2525 */
2526 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2527 {
2528     uint64_t insn = cpu_lduw_code(env, addr);
2529     uint8_t opc = insn >> 8;
2530 
2531     /* Or in the contents of R1[56:63].  */
2532     insn |= r1 & 0xff;
2533 
2534     /* Load the rest of the instruction.  */
2535     insn <<= 48;
2536     switch (get_ilen(opc)) {
2537     case 2:
2538         break;
2539     case 4:
2540         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2541         break;
2542     case 6:
2543         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2544         break;
2545     default:
2546         g_assert_not_reached();
2547     }
2548 
2549     /* The very most common cases can be sped up by avoiding a new TB.  */
2550     if ((opc & 0xf0) == 0xd0) {
2551         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2552                                       uint64_t, uintptr_t);
2553         static const dx_helper dx[16] = {
2554             [0x0] = do_helper_trt_bkwd,
2555             [0x2] = do_helper_mvc,
2556             [0x4] = do_helper_nc,
2557             [0x5] = do_helper_clc,
2558             [0x6] = do_helper_oc,
2559             [0x7] = do_helper_xc,
2560             [0xc] = do_helper_tr,
2561             [0xd] = do_helper_trt_fwd,
2562         };
2563         dx_helper helper = dx[opc & 0xf];
2564 
2565         if (helper) {
2566             uint32_t l = extract64(insn, 48, 8);
2567             uint32_t b1 = extract64(insn, 44, 4);
2568             uint32_t d1 = extract64(insn, 32, 12);
2569             uint32_t b2 = extract64(insn, 28, 4);
2570             uint32_t d2 = extract64(insn, 16, 12);
2571             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2572             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2573 
2574             env->cc_op = helper(env, l, a1, a2, 0);
2575             env->psw.addr += ilen;
2576             return;
2577         }
2578     } else if (opc == 0x0a) {
2579         env->int_svc_code = extract64(insn, 48, 8);
2580         env->int_svc_ilen = ilen;
2581         helper_exception(env, EXCP_SVC);
2582         g_assert_not_reached();
2583     }
2584 
2585     /* Record the insn we want to execute as well as the ilen to use
2586        during the execution of the target insn.  This will also ensure
2587        that ex_value is non-zero, which flags that we are in a state
2588        that requires such execution.  */
2589     env->ex_value = insn | ilen;
2590 }
2591 
2592 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2593                        uint64_t len)
2594 {
2595     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2596     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2597     const uint64_t r0 = env->regs[0];
2598     const uintptr_t ra = GETPC();
2599     uint8_t dest_key, dest_as, dest_k, dest_a;
2600     uint8_t src_key, src_as, src_k, src_a;
2601     uint64_t val;
2602     int cc = 0;
2603 
2604     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2605                __func__, dest, src, len);
2606 
2607     if (!(env->psw.mask & PSW_MASK_DAT)) {
2608         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2609     }
2610 
2611     /* OAC (operand access control) for the first operand -> dest */
2612     val = (r0 & 0xffff0000ULL) >> 16;
2613     dest_key = (val >> 12) & 0xf;
2614     dest_as = (val >> 6) & 0x3;
2615     dest_k = (val >> 1) & 0x1;
2616     dest_a = val & 0x1;
2617 
2618     /* OAC (operand access control) for the second operand -> src */
2619     val = (r0 & 0x0000ffffULL);
2620     src_key = (val >> 12) & 0xf;
2621     src_as = (val >> 6) & 0x3;
2622     src_k = (val >> 1) & 0x1;
2623     src_a = val & 0x1;
2624 
2625     if (!dest_k) {
2626         dest_key = psw_key;
2627     }
2628     if (!src_k) {
2629         src_key = psw_key;
2630     }
2631     if (!dest_a) {
2632         dest_as = psw_as;
2633     }
2634     if (!src_a) {
2635         src_as = psw_as;
2636     }
2637 
2638     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2639         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2640     }
2641     if (!(env->cregs[0] & CR0_SECONDARY) &&
2642         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2643         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2644     }
2645     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2646         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2647     }
2648 
2649     len = wrap_length32(env, len);
2650     if (len > 4096) {
2651         cc = 3;
2652         len = 4096;
2653     }
2654 
2655     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2656     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2657         (env->psw.mask & PSW_MASK_PSTATE)) {
2658         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2659                       __func__);
2660         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2661     }
2662 
2663     /* FIXME: Access using correct keys and AR-mode */
2664     if (len) {
2665         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2666                                          mmu_idx_from_as(src_as), ra);
2667         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2668                                           mmu_idx_from_as(dest_as), ra);
2669 
2670         access_memmove(env, &desta, &srca, ra);
2671     }
2672 
2673     return cc;
2674 }
2675 
2676 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2677    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2678    value >= 0 indicates failure, and the CC value to be returned.  */
2679 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2680                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2681                                  uint32_t *ochar, uint32_t *olen);
2682 
2683 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2684    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2685    indicates failure, and the CC value to be returned.  */
2686 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2687                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2688                                  uint32_t *olen);
2689 
2690 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2691                        bool enh_check, uintptr_t ra,
2692                        uint32_t *ochar, uint32_t *olen)
2693 {
2694     uint8_t s0, s1, s2, s3;
2695     uint32_t c, l;
2696 
2697     if (ilen < 1) {
2698         return 0;
2699     }
2700     s0 = cpu_ldub_data_ra(env, addr, ra);
2701     if (s0 <= 0x7f) {
2702         /* one byte character */
2703         l = 1;
2704         c = s0;
2705     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2706         /* invalid character */
2707         return 2;
2708     } else if (s0 <= 0xdf) {
2709         /* two byte character */
2710         l = 2;
2711         if (ilen < 2) {
2712             return 0;
2713         }
2714         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2715         c = s0 & 0x1f;
2716         c = (c << 6) | (s1 & 0x3f);
2717         if (enh_check && (s1 & 0xc0) != 0x80) {
2718             return 2;
2719         }
2720     } else if (s0 <= 0xef) {
2721         /* three byte character */
2722         l = 3;
2723         if (ilen < 3) {
2724             return 0;
2725         }
2726         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2727         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2728         c = s0 & 0x0f;
2729         c = (c << 6) | (s1 & 0x3f);
2730         c = (c << 6) | (s2 & 0x3f);
2731         /* Fold the byte-by-byte range descriptions in the PoO into
2732            tests against the complete value.  It disallows encodings
2733            that could be smaller, and the UTF-16 surrogates.  */
2734         if (enh_check
2735             && ((s1 & 0xc0) != 0x80
2736                 || (s2 & 0xc0) != 0x80
2737                 || c < 0x1000
2738                 || (c >= 0xd800 && c <= 0xdfff))) {
2739             return 2;
2740         }
2741     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2742         /* four byte character */
2743         l = 4;
2744         if (ilen < 4) {
2745             return 0;
2746         }
2747         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2748         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2749         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2750         c = s0 & 0x07;
2751         c = (c << 6) | (s1 & 0x3f);
2752         c = (c << 6) | (s2 & 0x3f);
2753         c = (c << 6) | (s3 & 0x3f);
2754         /* See above.  */
2755         if (enh_check
2756             && ((s1 & 0xc0) != 0x80
2757                 || (s2 & 0xc0) != 0x80
2758                 || (s3 & 0xc0) != 0x80
2759                 || c < 0x010000
2760                 || c > 0x10ffff)) {
2761             return 2;
2762         }
2763     } else {
2764         /* invalid character */
2765         return 2;
2766     }
2767 
2768     *ochar = c;
2769     *olen = l;
2770     return -1;
2771 }
2772 
2773 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2774                         bool enh_check, uintptr_t ra,
2775                         uint32_t *ochar, uint32_t *olen)
2776 {
2777     uint16_t s0, s1;
2778     uint32_t c, l;
2779 
2780     if (ilen < 2) {
2781         return 0;
2782     }
2783     s0 = cpu_lduw_data_ra(env, addr, ra);
2784     if ((s0 & 0xfc00) != 0xd800) {
2785         /* one word character */
2786         l = 2;
2787         c = s0;
2788     } else {
2789         /* two word character */
2790         l = 4;
2791         if (ilen < 4) {
2792             return 0;
2793         }
2794         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2795         c = extract32(s0, 6, 4) + 1;
2796         c = (c << 6) | (s0 & 0x3f);
2797         c = (c << 10) | (s1 & 0x3ff);
2798         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2799             /* invalid surrogate character */
2800             return 2;
2801         }
2802     }
2803 
2804     *ochar = c;
2805     *olen = l;
2806     return -1;
2807 }
2808 
2809 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2810                         bool enh_check, uintptr_t ra,
2811                         uint32_t *ochar, uint32_t *olen)
2812 {
2813     uint32_t c;
2814 
2815     if (ilen < 4) {
2816         return 0;
2817     }
2818     c = cpu_ldl_data_ra(env, addr, ra);
2819     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2820         /* invalid unicode character */
2821         return 2;
2822     }
2823 
2824     *ochar = c;
2825     *olen = 4;
2826     return -1;
2827 }
2828 
2829 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2830                        uintptr_t ra, uint32_t c, uint32_t *olen)
2831 {
2832     uint8_t d[4];
2833     uint32_t l, i;
2834 
2835     if (c <= 0x7f) {
2836         /* one byte character */
2837         l = 1;
2838         d[0] = c;
2839     } else if (c <= 0x7ff) {
2840         /* two byte character */
2841         l = 2;
2842         d[1] = 0x80 | extract32(c, 0, 6);
2843         d[0] = 0xc0 | extract32(c, 6, 5);
2844     } else if (c <= 0xffff) {
2845         /* three byte character */
2846         l = 3;
2847         d[2] = 0x80 | extract32(c, 0, 6);
2848         d[1] = 0x80 | extract32(c, 6, 6);
2849         d[0] = 0xe0 | extract32(c, 12, 4);
2850     } else {
2851         /* four byte character */
2852         l = 4;
2853         d[3] = 0x80 | extract32(c, 0, 6);
2854         d[2] = 0x80 | extract32(c, 6, 6);
2855         d[1] = 0x80 | extract32(c, 12, 6);
2856         d[0] = 0xf0 | extract32(c, 18, 3);
2857     }
2858 
2859     if (ilen < l) {
2860         return 1;
2861     }
2862     for (i = 0; i < l; ++i) {
2863         cpu_stb_data_ra(env, addr + i, d[i], ra);
2864     }
2865 
2866     *olen = l;
2867     return -1;
2868 }
2869 
2870 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2871                         uintptr_t ra, uint32_t c, uint32_t *olen)
2872 {
2873     uint16_t d0, d1;
2874 
2875     if (c <= 0xffff) {
2876         /* one word character */
2877         if (ilen < 2) {
2878             return 1;
2879         }
2880         cpu_stw_data_ra(env, addr, c, ra);
2881         *olen = 2;
2882     } else {
2883         /* two word character */
2884         if (ilen < 4) {
2885             return 1;
2886         }
2887         d1 = 0xdc00 | extract32(c, 0, 10);
2888         d0 = 0xd800 | extract32(c, 10, 6);
2889         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2890         cpu_stw_data_ra(env, addr + 0, d0, ra);
2891         cpu_stw_data_ra(env, addr + 2, d1, ra);
2892         *olen = 4;
2893     }
2894 
2895     return -1;
2896 }
2897 
2898 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2899                         uintptr_t ra, uint32_t c, uint32_t *olen)
2900 {
2901     if (ilen < 4) {
2902         return 1;
2903     }
2904     cpu_stl_data_ra(env, addr, c, ra);
2905     *olen = 4;
2906     return -1;
2907 }
2908 
2909 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2910                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2911                                        decode_unicode_fn decode,
2912                                        encode_unicode_fn encode)
2913 {
2914     uint64_t dst = get_address(env, r1);
2915     uint64_t dlen = get_length(env, r1 + 1);
2916     uint64_t src = get_address(env, r2);
2917     uint64_t slen = get_length(env, r2 + 1);
2918     bool enh_check = m3 & 1;
2919     int cc, i;
2920 
2921     /* Lest we fail to service interrupts in a timely manner, limit the
2922        amount of work we're willing to do.  For now, let's cap at 256.  */
2923     for (i = 0; i < 256; ++i) {
2924         uint32_t c, ilen, olen;
2925 
2926         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2927         if (unlikely(cc >= 0)) {
2928             break;
2929         }
2930         cc = encode(env, dst, dlen, ra, c, &olen);
2931         if (unlikely(cc >= 0)) {
2932             break;
2933         }
2934 
2935         src += ilen;
2936         slen -= ilen;
2937         dst += olen;
2938         dlen -= olen;
2939         cc = 3;
2940     }
2941 
2942     set_address(env, r1, dst);
2943     set_length(env, r1 + 1, dlen);
2944     set_address(env, r2, src);
2945     set_length(env, r2 + 1, slen);
2946 
2947     return cc;
2948 }
2949 
2950 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2951 {
2952     return convert_unicode(env, r1, r2, m3, GETPC(),
2953                            decode_utf8, encode_utf16);
2954 }
2955 
2956 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2957 {
2958     return convert_unicode(env, r1, r2, m3, GETPC(),
2959                            decode_utf8, encode_utf32);
2960 }
2961 
2962 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2963 {
2964     return convert_unicode(env, r1, r2, m3, GETPC(),
2965                            decode_utf16, encode_utf8);
2966 }
2967 
2968 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2969 {
2970     return convert_unicode(env, r1, r2, m3, GETPC(),
2971                            decode_utf16, encode_utf32);
2972 }
2973 
2974 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2975 {
2976     return convert_unicode(env, r1, r2, m3, GETPC(),
2977                            decode_utf32, encode_utf8);
2978 }
2979 
2980 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2981 {
2982     return convert_unicode(env, r1, r2, m3, GETPC(),
2983                            decode_utf32, encode_utf16);
2984 }
2985 
2986 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2987                         uintptr_t ra)
2988 {
2989     /* test the actual access, not just any access to the page due to LAP */
2990     while (len) {
2991         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2992         const uint64_t curlen = MIN(pagelen, len);
2993 
2994         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2995         addr = wrap_address(env, addr + curlen);
2996         len -= curlen;
2997     }
2998 }
2999 
3000 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3001 {
3002     probe_write_access(env, addr, len, GETPC());
3003 }
3004