xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 1580b897)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "s390x-internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 #include "tcg/tcg.h"
31 
32 #if !defined(CONFIG_USER_ONLY)
33 #include "hw/s390x/storage-keys.h"
34 #include "hw/boards.h"
35 #endif
36 
37 /*****************************************************************************/
38 /* Softmmu support */
39 
40 /* #define DEBUG_HELPER */
41 #ifdef DEBUG_HELPER
42 #define HELPER_LOG(x...) qemu_log(x)
43 #else
44 #define HELPER_LOG(x...)
45 #endif
46 
47 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
48 {
49     uint16_t pkm = env->cregs[3] >> 16;
50 
51     if (env->psw.mask & PSW_MASK_PSTATE) {
52         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
53         return pkm & (0x80 >> psw_key);
54     }
55     return true;
56 }
57 
58 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
59                                    uint64_t src, uint32_t len)
60 {
61     if (!len || src == dest) {
62         return false;
63     }
64     /* Take care of wrapping at the end of address space. */
65     if (unlikely(wrap_address(env, src + len - 1) < src)) {
66         return dest > src || dest <= wrap_address(env, src + len - 1);
67     }
68     return dest > src && dest <= src + len - 1;
69 }
70 
71 /* Trigger a SPECIFICATION exception if an address or a length is not
72    naturally aligned.  */
73 static inline void check_alignment(CPUS390XState *env, uint64_t v,
74                                    int wordsize, uintptr_t ra)
75 {
76     if (v % wordsize) {
77         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
78     }
79 }
80 
81 /* Load a value from memory according to its size.  */
82 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
83                                            int wordsize, uintptr_t ra)
84 {
85     switch (wordsize) {
86     case 1:
87         return cpu_ldub_data_ra(env, addr, ra);
88     case 2:
89         return cpu_lduw_data_ra(env, addr, ra);
90     default:
91         abort();
92     }
93 }
94 
95 /* Store a to memory according to its size.  */
96 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
97                                       uint64_t value, int wordsize,
98                                       uintptr_t ra)
99 {
100     switch (wordsize) {
101     case 1:
102         cpu_stb_data_ra(env, addr, value, ra);
103         break;
104     case 2:
105         cpu_stw_data_ra(env, addr, value, ra);
106         break;
107     default:
108         abort();
109     }
110 }
111 
112 /* An access covers at most 4096 bytes and therefore at most two pages. */
113 typedef struct S390Access {
114     target_ulong vaddr1;
115     target_ulong vaddr2;
116     char *haddr1;
117     char *haddr2;
118     uint16_t size1;
119     uint16_t size2;
120     /*
121      * If we can't access the host page directly, we'll have to do I/O access
122      * via ld/st helpers. These are internal details, so we store the
123      * mmu idx to do the access here instead of passing it around in the
124      * helpers. Maybe, one day we can get rid of ld/st access - once we can
125      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
126      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
127      * pages, we might trigger a new MMU translation - very unlikely that
128      * the mapping changes in between and we would trigger a fault.
129      */
130     int mmu_idx;
131 } S390Access;
132 
133 /*
134  * With nonfault=1, return the PGM_ exception that would have been injected
135  * into the guest; return 0 if no exception was detected.
136  *
137  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
138  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
139  */
140 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
141                              MMUAccessType access_type, int mmu_idx,
142                              bool nonfault, void **phost, uintptr_t ra)
143 {
144     int flags;
145 
146 #if defined(CONFIG_USER_ONLY)
147     flags = page_get_flags(addr);
148     if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
149         env->__excp_addr = addr;
150         flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
151         if (nonfault) {
152             return flags;
153         }
154         tcg_s390_program_interrupt(env, flags, ra);
155     }
156     *phost = g2h(env_cpu(env), addr);
157 #else
158     /*
159      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
160      * to detect if there was an exception during tlb_fill().
161      */
162     env->tlb_fill_exc = 0;
163     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
164                                ra);
165     if (env->tlb_fill_exc) {
166         return env->tlb_fill_exc;
167     }
168 
169     if (unlikely(flags & TLB_WATCHPOINT)) {
170         /* S390 does not presently use transaction attributes. */
171         cpu_check_watchpoint(env_cpu(env), addr, size,
172                              MEMTXATTRS_UNSPECIFIED,
173                              (access_type == MMU_DATA_STORE
174                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
175     }
176 #endif
177     return 0;
178 }
179 
180 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
181                              bool nonfault, vaddr vaddr1, int size,
182                              MMUAccessType access_type,
183                              int mmu_idx, uintptr_t ra)
184 {
185     void *haddr1, *haddr2 = NULL;
186     int size1, size2, exc;
187     vaddr vaddr2 = 0;
188 
189     assert(size > 0 && size <= 4096);
190 
191     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
192     size2 = size - size1;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &haddr1, ra);
196     if (exc) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr2 = wrap_address(env, vaddr1 + size1);
202         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
203                                 nonfault, &haddr2, ra);
204         if (exc) {
205             return exc;
206         }
207     }
208 
209     *access = (S390Access) {
210         .vaddr1 = vaddr1,
211         .vaddr2 = vaddr2,
212         .haddr1 = haddr1,
213         .haddr2 = haddr2,
214         .size1 = size1,
215         .size2 = size2,
216         .mmu_idx = mmu_idx
217     };
218     return 0;
219 }
220 
221 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
222                                  MMUAccessType access_type, int mmu_idx,
223                                  uintptr_t ra)
224 {
225     S390Access ret;
226     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
227                                 access_type, mmu_idx, ra);
228     assert(!exc);
229     return ret;
230 }
231 
232 /* Helper to handle memset on a single page. */
233 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
234                              uint8_t byte, uint16_t size, int mmu_idx,
235                              uintptr_t ra)
236 {
237 #ifdef CONFIG_USER_ONLY
238     g_assert(haddr);
239     memset(haddr, byte, size);
240 #else
241     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
242     int i;
243 
244     if (likely(haddr)) {
245         memset(haddr, byte, size);
246     } else {
247         /*
248          * Do a single access and test if we can then get access to the
249          * page. This is especially relevant to speed up TLB_NOTDIRTY.
250          */
251         g_assert(size > 0);
252         helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
253         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
254         if (likely(haddr)) {
255             memset(haddr + 1, byte, size - 1);
256         } else {
257             for (i = 1; i < size; i++) {
258                 helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
259             }
260         }
261     }
262 #endif
263 }
264 
265 static void access_memset(CPUS390XState *env, S390Access *desta,
266                           uint8_t byte, uintptr_t ra)
267 {
268 
269     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
270                      desta->mmu_idx, ra);
271     if (likely(!desta->size2)) {
272         return;
273     }
274     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
275                      desta->mmu_idx, ra);
276 }
277 
278 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
279                                   int offset, int mmu_idx, uintptr_t ra)
280 {
281 #ifdef CONFIG_USER_ONLY
282     return ldub_p(*haddr + offset);
283 #else
284     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
285     uint8_t byte;
286 
287     if (likely(*haddr)) {
288         return ldub_p(*haddr + offset);
289     }
290     /*
291      * Do a single access and test if we can then get access to the
292      * page. This is especially relevant to speed up TLB_NOTDIRTY.
293      */
294     byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
295     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
296     return byte;
297 #endif
298 }
299 
300 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
301                                int offset, uintptr_t ra)
302 {
303     if (offset < access->size1) {
304         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
305                                   offset, access->mmu_idx, ra);
306     }
307     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
308                               offset - access->size1, access->mmu_idx, ra);
309 }
310 
311 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
312                                int offset, uint8_t byte, int mmu_idx,
313                                uintptr_t ra)
314 {
315 #ifdef CONFIG_USER_ONLY
316     stb_p(*haddr + offset, byte);
317 #else
318     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
319 
320     if (likely(*haddr)) {
321         stb_p(*haddr + offset, byte);
322         return;
323     }
324     /*
325      * Do a single access and test if we can then get access to the
326      * page. This is especially relevant to speed up TLB_NOTDIRTY.
327      */
328     helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
329     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
330 #endif
331 }
332 
333 static void access_set_byte(CPUS390XState *env, S390Access *access,
334                             int offset, uint8_t byte, uintptr_t ra)
335 {
336     if (offset < access->size1) {
337         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
338                            access->mmu_idx, ra);
339     } else {
340         do_access_set_byte(env, access->vaddr2, &access->haddr2,
341                            offset - access->size1, byte, access->mmu_idx, ra);
342     }
343 }
344 
345 /*
346  * Move data with the same semantics as memmove() in case ranges don't overlap
347  * or src > dest. Undefined behavior on destructive overlaps.
348  */
349 static void access_memmove(CPUS390XState *env, S390Access *desta,
350                            S390Access *srca, uintptr_t ra)
351 {
352     int diff;
353 
354     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
355 
356     /* Fallback to slow access in case we don't have access to all host pages */
357     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
358                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
359         int i;
360 
361         for (i = 0; i < desta->size1 + desta->size2; i++) {
362             uint8_t byte = access_get_byte(env, srca, i, ra);
363 
364             access_set_byte(env, desta, i, byte, ra);
365         }
366         return;
367     }
368 
369     if (srca->size1 == desta->size1) {
370         memmove(desta->haddr1, srca->haddr1, srca->size1);
371         if (unlikely(srca->size2)) {
372             memmove(desta->haddr2, srca->haddr2, srca->size2);
373         }
374     } else if (srca->size1 < desta->size1) {
375         diff = desta->size1 - srca->size1;
376         memmove(desta->haddr1, srca->haddr1, srca->size1);
377         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
378         if (likely(desta->size2)) {
379             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
380         }
381     } else {
382         diff = srca->size1 - desta->size1;
383         memmove(desta->haddr1, srca->haddr1, desta->size1);
384         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
385         if (likely(srca->size2)) {
386             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
387         }
388     }
389 }
390 
391 static int mmu_idx_from_as(uint8_t as)
392 {
393     switch (as) {
394     case AS_PRIMARY:
395         return MMU_PRIMARY_IDX;
396     case AS_SECONDARY:
397         return MMU_SECONDARY_IDX;
398     case AS_HOME:
399         return MMU_HOME_IDX;
400     default:
401         /* FIXME AS_ACCREG */
402         g_assert_not_reached();
403     }
404 }
405 
406 /* and on array */
407 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
408                              uint64_t src, uintptr_t ra)
409 {
410     const int mmu_idx = cpu_mmu_index(env, false);
411     S390Access srca1, srca2, desta;
412     uint32_t i;
413     uint8_t c = 0;
414 
415     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
416                __func__, l, dest, src);
417 
418     /* NC always processes one more byte than specified - maximum is 256 */
419     l++;
420 
421     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
422     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
423     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
424     for (i = 0; i < l; i++) {
425         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
426                           access_get_byte(env, &srca2, i, ra);
427 
428         c |= x;
429         access_set_byte(env, &desta, i, x, ra);
430     }
431     return c != 0;
432 }
433 
434 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
435                     uint64_t src)
436 {
437     return do_helper_nc(env, l, dest, src, GETPC());
438 }
439 
440 /* xor on array */
441 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
442                              uint64_t src, uintptr_t ra)
443 {
444     const int mmu_idx = cpu_mmu_index(env, false);
445     S390Access srca1, srca2, desta;
446     uint32_t i;
447     uint8_t c = 0;
448 
449     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
450                __func__, l, dest, src);
451 
452     /* XC always processes one more byte than specified - maximum is 256 */
453     l++;
454 
455     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
456     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
457     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
458 
459     /* xor with itself is the same as memset(0) */
460     if (src == dest) {
461         access_memset(env, &desta, 0, ra);
462         return 0;
463     }
464 
465     for (i = 0; i < l; i++) {
466         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
467                           access_get_byte(env, &srca2, i, ra);
468 
469         c |= x;
470         access_set_byte(env, &desta, i, x, ra);
471     }
472     return c != 0;
473 }
474 
475 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
476                     uint64_t src)
477 {
478     return do_helper_xc(env, l, dest, src, GETPC());
479 }
480 
481 /* or on array */
482 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
483                              uint64_t src, uintptr_t ra)
484 {
485     const int mmu_idx = cpu_mmu_index(env, false);
486     S390Access srca1, srca2, desta;
487     uint32_t i;
488     uint8_t c = 0;
489 
490     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
491                __func__, l, dest, src);
492 
493     /* OC always processes one more byte than specified - maximum is 256 */
494     l++;
495 
496     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
497     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
498     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
499     for (i = 0; i < l; i++) {
500         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
501                           access_get_byte(env, &srca2, i, ra);
502 
503         c |= x;
504         access_set_byte(env, &desta, i, x, ra);
505     }
506     return c != 0;
507 }
508 
509 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
510                     uint64_t src)
511 {
512     return do_helper_oc(env, l, dest, src, GETPC());
513 }
514 
515 /* memmove */
516 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
517                               uint64_t src, uintptr_t ra)
518 {
519     const int mmu_idx = cpu_mmu_index(env, false);
520     S390Access srca, desta;
521     uint32_t i;
522 
523     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
524                __func__, l, dest, src);
525 
526     /* MVC always copies one more byte than specified - maximum is 256 */
527     l++;
528 
529     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
530     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
531 
532     /*
533      * "When the operands overlap, the result is obtained as if the operands
534      * were processed one byte at a time". Only non-destructive overlaps
535      * behave like memmove().
536      */
537     if (dest == src + 1) {
538         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
539     } else if (!is_destructive_overlap(env, dest, src, l)) {
540         access_memmove(env, &desta, &srca, ra);
541     } else {
542         for (i = 0; i < l; i++) {
543             uint8_t byte = access_get_byte(env, &srca, i, ra);
544 
545             access_set_byte(env, &desta, i, byte, ra);
546         }
547     }
548 
549     return env->cc_op;
550 }
551 
552 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
553 {
554     do_helper_mvc(env, l, dest, src, GETPC());
555 }
556 
557 /* move inverse  */
558 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
559 {
560     const int mmu_idx = cpu_mmu_index(env, false);
561     S390Access srca, desta;
562     uintptr_t ra = GETPC();
563     int i;
564 
565     /* MVCIN always copies one more byte than specified - maximum is 256 */
566     l++;
567 
568     src = wrap_address(env, src - l + 1);
569     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
570     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
571     for (i = 0; i < l; i++) {
572         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
573 
574         access_set_byte(env, &desta, i, x, ra);
575     }
576 }
577 
578 /* move numerics  */
579 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
580 {
581     const int mmu_idx = cpu_mmu_index(env, false);
582     S390Access srca1, srca2, desta;
583     uintptr_t ra = GETPC();
584     int i;
585 
586     /* MVN always copies one more byte than specified - maximum is 256 */
587     l++;
588 
589     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
590     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
591     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
592     for (i = 0; i < l; i++) {
593         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
594                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
595 
596         access_set_byte(env, &desta, i, x, ra);
597     }
598 }
599 
600 /* move with offset  */
601 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
602 {
603     const int mmu_idx = cpu_mmu_index(env, false);
604     /* MVO always processes one more byte than specified - maximum is 16 */
605     const int len_dest = (l >> 4) + 1;
606     const int len_src = (l & 0xf) + 1;
607     uintptr_t ra = GETPC();
608     uint8_t byte_dest, byte_src;
609     S390Access srca, desta;
610     int i, j;
611 
612     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
613     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
614 
615     /* Handle rightmost byte */
616     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
617     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
618     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
619     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
620 
621     /* Process remaining bytes from right to left */
622     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
623         byte_dest = byte_src >> 4;
624         if (j >= 0) {
625             byte_src = access_get_byte(env, &srca, j, ra);
626         } else {
627             byte_src = 0;
628         }
629         byte_dest |= byte_src << 4;
630         access_set_byte(env, &desta, i, byte_dest, ra);
631     }
632 }
633 
634 /* move zones  */
635 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
636 {
637     const int mmu_idx = cpu_mmu_index(env, false);
638     S390Access srca1, srca2, desta;
639     uintptr_t ra = GETPC();
640     int i;
641 
642     /* MVZ always copies one more byte than specified - maximum is 256 */
643     l++;
644 
645     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
646     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
647     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
648     for (i = 0; i < l; i++) {
649         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
650                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
651 
652         access_set_byte(env, &desta, i, x, ra);
653     }
654 }
655 
656 /* compare unsigned byte arrays */
657 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
658                               uint64_t s2, uintptr_t ra)
659 {
660     uint32_t i;
661     uint32_t cc = 0;
662 
663     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
664                __func__, l, s1, s2);
665 
666     for (i = 0; i <= l; i++) {
667         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
668         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
669         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
670         if (x < y) {
671             cc = 1;
672             break;
673         } else if (x > y) {
674             cc = 2;
675             break;
676         }
677     }
678 
679     HELPER_LOG("\n");
680     return cc;
681 }
682 
683 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
684 {
685     return do_helper_clc(env, l, s1, s2, GETPC());
686 }
687 
688 /* compare logical under mask */
689 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
690                      uint64_t addr)
691 {
692     uintptr_t ra = GETPC();
693     uint32_t cc = 0;
694 
695     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
696                mask, addr);
697 
698     while (mask) {
699         if (mask & 8) {
700             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
701             uint8_t r = extract32(r1, 24, 8);
702             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
703                        addr);
704             if (r < d) {
705                 cc = 1;
706                 break;
707             } else if (r > d) {
708                 cc = 2;
709                 break;
710             }
711             addr++;
712         }
713         mask = (mask << 1) & 0xf;
714         r1 <<= 8;
715     }
716 
717     HELPER_LOG("\n");
718     return cc;
719 }
720 
721 static inline uint64_t get_address(CPUS390XState *env, int reg)
722 {
723     return wrap_address(env, env->regs[reg]);
724 }
725 
726 /*
727  * Store the address to the given register, zeroing out unused leftmost
728  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
729  */
730 static inline void set_address_zero(CPUS390XState *env, int reg,
731                                     uint64_t address)
732 {
733     if (env->psw.mask & PSW_MASK_64) {
734         env->regs[reg] = address;
735     } else {
736         if (!(env->psw.mask & PSW_MASK_32)) {
737             address &= 0x00ffffff;
738         } else {
739             address &= 0x7fffffff;
740         }
741         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
742     }
743 }
744 
745 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
746 {
747     if (env->psw.mask & PSW_MASK_64) {
748         /* 64-Bit mode */
749         env->regs[reg] = address;
750     } else {
751         if (!(env->psw.mask & PSW_MASK_32)) {
752             /* 24-Bit mode. According to the PoO it is implementation
753             dependent if bits 32-39 remain unchanged or are set to
754             zeros.  Choose the former so that the function can also be
755             used for TRT.  */
756             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
757         } else {
758             /* 31-Bit mode. According to the PoO it is implementation
759             dependent if bit 32 remains unchanged or is set to zero.
760             Choose the latter so that the function can also be used for
761             TRT.  */
762             address &= 0x7fffffff;
763             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
764         }
765     }
766 }
767 
768 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
769 {
770     if (!(env->psw.mask & PSW_MASK_64)) {
771         return (uint32_t)length;
772     }
773     return length;
774 }
775 
776 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
777 {
778     if (!(env->psw.mask & PSW_MASK_64)) {
779         /* 24-Bit and 31-Bit mode */
780         length &= 0x7fffffff;
781     }
782     return length;
783 }
784 
785 static inline uint64_t get_length(CPUS390XState *env, int reg)
786 {
787     return wrap_length31(env, env->regs[reg]);
788 }
789 
790 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
791 {
792     if (env->psw.mask & PSW_MASK_64) {
793         /* 64-Bit mode */
794         env->regs[reg] = length;
795     } else {
796         /* 24-Bit and 31-Bit mode */
797         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
798     }
799 }
800 
801 /* search string (c is byte to search, r2 is string, r1 end of string) */
802 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
803 {
804     uintptr_t ra = GETPC();
805     uint64_t end, str;
806     uint32_t len;
807     uint8_t v, c = env->regs[0];
808 
809     /* Bits 32-55 must contain all 0.  */
810     if (env->regs[0] & 0xffffff00u) {
811         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
812     }
813 
814     str = get_address(env, r2);
815     end = get_address(env, r1);
816 
817     /* Lest we fail to service interrupts in a timely manner, limit the
818        amount of work we're willing to do.  For now, let's cap at 8k.  */
819     for (len = 0; len < 0x2000; ++len) {
820         if (str + len == end) {
821             /* Character not found.  R1 & R2 are unmodified.  */
822             env->cc_op = 2;
823             return;
824         }
825         v = cpu_ldub_data_ra(env, str + len, ra);
826         if (v == c) {
827             /* Character found.  Set R1 to the location; R2 is unmodified.  */
828             env->cc_op = 1;
829             set_address(env, r1, str + len);
830             return;
831         }
832     }
833 
834     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
835     env->cc_op = 3;
836     set_address(env, r2, str + len);
837 }
838 
839 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
840 {
841     uintptr_t ra = GETPC();
842     uint32_t len;
843     uint16_t v, c = env->regs[0];
844     uint64_t end, str, adj_end;
845 
846     /* Bits 32-47 of R0 must be zero.  */
847     if (env->regs[0] & 0xffff0000u) {
848         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
849     }
850 
851     str = get_address(env, r2);
852     end = get_address(env, r1);
853 
854     /* If the LSB of the two addresses differ, use one extra byte.  */
855     adj_end = end + ((str ^ end) & 1);
856 
857     /* Lest we fail to service interrupts in a timely manner, limit the
858        amount of work we're willing to do.  For now, let's cap at 8k.  */
859     for (len = 0; len < 0x2000; len += 2) {
860         if (str + len == adj_end) {
861             /* End of input found.  */
862             env->cc_op = 2;
863             return;
864         }
865         v = cpu_lduw_data_ra(env, str + len, ra);
866         if (v == c) {
867             /* Character found.  Set R1 to the location; R2 is unmodified.  */
868             env->cc_op = 1;
869             set_address(env, r1, str + len);
870             return;
871         }
872     }
873 
874     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
875     env->cc_op = 3;
876     set_address(env, r2, str + len);
877 }
878 
879 /* unsigned string compare (c is string terminator) */
880 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
881 {
882     uintptr_t ra = GETPC();
883     uint32_t len;
884 
885     c = c & 0xff;
886     s1 = wrap_address(env, s1);
887     s2 = wrap_address(env, s2);
888 
889     /* Lest we fail to service interrupts in a timely manner, limit the
890        amount of work we're willing to do.  For now, let's cap at 8k.  */
891     for (len = 0; len < 0x2000; ++len) {
892         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
893         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
894         if (v1 == v2) {
895             if (v1 == c) {
896                 /* Equal.  CC=0, and don't advance the registers.  */
897                 env->cc_op = 0;
898                 env->retxl = s2;
899                 return s1;
900             }
901         } else {
902             /* Unequal.  CC={1,2}, and advance the registers.  Note that
903                the terminator need not be zero, but the string that contains
904                the terminator is by definition "low".  */
905             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
906             env->retxl = s2 + len;
907             return s1 + len;
908         }
909     }
910 
911     /* CPU-determined bytes equal; advance the registers.  */
912     env->cc_op = 3;
913     env->retxl = s2 + len;
914     return s1 + len;
915 }
916 
917 /* move page */
918 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
919 {
920     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
921     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
922     const int mmu_idx = cpu_mmu_index(env, false);
923     const bool f = extract64(r0, 11, 1);
924     const bool s = extract64(r0, 10, 1);
925     const bool cco = extract64(r0, 8, 1);
926     uintptr_t ra = GETPC();
927     S390Access srca, desta;
928     int exc;
929 
930     if ((f && s) || extract64(r0, 12, 4)) {
931         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
932     }
933 
934     /*
935      * We always manually handle exceptions such that we can properly store
936      * r1/r2 to the lowcore on page-translation exceptions.
937      *
938      * TODO: Access key handling
939      */
940     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
941                             MMU_DATA_LOAD, mmu_idx, ra);
942     if (exc) {
943         if (cco) {
944             return 2;
945         }
946         goto inject_exc;
947     }
948     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
949                             MMU_DATA_STORE, mmu_idx, ra);
950     if (exc) {
951         if (cco && exc != PGM_PROTECTION) {
952             return 1;
953         }
954         goto inject_exc;
955     }
956     access_memmove(env, &desta, &srca, ra);
957     return 0; /* data moved */
958 inject_exc:
959 #if !defined(CONFIG_USER_ONLY)
960     if (exc != PGM_ADDRESSING) {
961         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
962                  env->tlb_fill_tec);
963     }
964     if (exc == PGM_PAGE_TRANS) {
965         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
966                  r1 << 4 | r2);
967     }
968 #endif
969     tcg_s390_program_interrupt(env, exc, ra);
970 }
971 
972 /* string copy */
973 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
974 {
975     const int mmu_idx = cpu_mmu_index(env, false);
976     const uint64_t d = get_address(env, r1);
977     const uint64_t s = get_address(env, r2);
978     const uint8_t c = env->regs[0];
979     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
980     S390Access srca, desta;
981     uintptr_t ra = GETPC();
982     int i;
983 
984     if (env->regs[0] & 0xffffff00ull) {
985         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
986     }
987 
988     /*
989      * Our access should not exceed single pages, as we must not report access
990      * exceptions exceeding the actually copied range (which we don't know at
991      * this point). We might over-indicate watchpoints within the pages
992      * (if we ever care, we have to limit processing to a single byte).
993      */
994     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
995     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
996     for (i = 0; i < len; i++) {
997         const uint8_t v = access_get_byte(env, &srca, i, ra);
998 
999         access_set_byte(env, &desta, i, v, ra);
1000         if (v == c) {
1001             set_address_zero(env, r1, d + i);
1002             return 1;
1003         }
1004     }
1005     set_address_zero(env, r1, d + len);
1006     set_address_zero(env, r2, s + len);
1007     return 3;
1008 }
1009 
1010 /* load access registers r1 to r3 from memory at a2 */
1011 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1012 {
1013     uintptr_t ra = GETPC();
1014     int i;
1015 
1016     if (a2 & 0x3) {
1017         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1018     }
1019 
1020     for (i = r1;; i = (i + 1) % 16) {
1021         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1022         a2 += 4;
1023 
1024         if (i == r3) {
1025             break;
1026         }
1027     }
1028 }
1029 
1030 /* store access registers r1 to r3 in memory at a2 */
1031 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1032 {
1033     uintptr_t ra = GETPC();
1034     int i;
1035 
1036     if (a2 & 0x3) {
1037         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1038     }
1039 
1040     for (i = r1;; i = (i + 1) % 16) {
1041         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1042         a2 += 4;
1043 
1044         if (i == r3) {
1045             break;
1046         }
1047     }
1048 }
1049 
1050 /* move long helper */
1051 static inline uint32_t do_mvcl(CPUS390XState *env,
1052                                uint64_t *dest, uint64_t *destlen,
1053                                uint64_t *src, uint64_t *srclen,
1054                                uint16_t pad, int wordsize, uintptr_t ra)
1055 {
1056     const int mmu_idx = cpu_mmu_index(env, false);
1057     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1058     S390Access srca, desta;
1059     int i, cc;
1060 
1061     if (*destlen == *srclen) {
1062         cc = 0;
1063     } else if (*destlen < *srclen) {
1064         cc = 1;
1065     } else {
1066         cc = 2;
1067     }
1068 
1069     if (!*destlen) {
1070         return cc;
1071     }
1072 
1073     /*
1074      * Only perform one type of type of operation (move/pad) at a time.
1075      * Stay within single pages.
1076      */
1077     if (*srclen) {
1078         /* Copy the src array */
1079         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1080         *destlen -= len;
1081         *srclen -= len;
1082         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1083         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084         access_memmove(env, &desta, &srca, ra);
1085         *src = wrap_address(env, *src + len);
1086         *dest = wrap_address(env, *dest + len);
1087     } else if (wordsize == 1) {
1088         /* Pad the remaining area */
1089         *destlen -= len;
1090         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091         access_memset(env, &desta, pad, ra);
1092         *dest = wrap_address(env, *dest + len);
1093     } else {
1094         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095 
1096         /* The remaining length selects the padding byte. */
1097         for (i = 0; i < len; (*destlen)--, i++) {
1098             if (*destlen & 1) {
1099                 access_set_byte(env, &desta, i, pad, ra);
1100             } else {
1101                 access_set_byte(env, &desta, i, pad >> 8, ra);
1102             }
1103         }
1104         *dest = wrap_address(env, *dest + len);
1105     }
1106 
1107     return *destlen ? 3 : cc;
1108 }
1109 
1110 /* move long */
1111 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1112 {
1113     const int mmu_idx = cpu_mmu_index(env, false);
1114     uintptr_t ra = GETPC();
1115     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1116     uint64_t dest = get_address(env, r1);
1117     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1118     uint64_t src = get_address(env, r2);
1119     uint8_t pad = env->regs[r2 + 1] >> 24;
1120     CPUState *cs = env_cpu(env);
1121     S390Access srca, desta;
1122     uint32_t cc, cur_len;
1123 
1124     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1125         cc = 3;
1126     } else if (srclen == destlen) {
1127         cc = 0;
1128     } else if (destlen < srclen) {
1129         cc = 1;
1130     } else {
1131         cc = 2;
1132     }
1133 
1134     /* We might have to zero-out some bits even if there was no action. */
1135     if (unlikely(!destlen || cc == 3)) {
1136         set_address_zero(env, r2, src);
1137         set_address_zero(env, r1, dest);
1138         return cc;
1139     } else if (!srclen) {
1140         set_address_zero(env, r2, src);
1141     }
1142 
1143     /*
1144      * Only perform one type of type of operation (move/pad) in one step.
1145      * Stay within single pages.
1146      */
1147     while (destlen) {
1148         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1149         if (!srclen) {
1150             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                    ra);
1152             access_memset(env, &desta, pad, ra);
1153         } else {
1154             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1155 
1156             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1157                                   ra);
1158             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1159                                    ra);
1160             access_memmove(env, &desta, &srca, ra);
1161             src = wrap_address(env, src + cur_len);
1162             srclen -= cur_len;
1163             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1164             set_address_zero(env, r2, src);
1165         }
1166         dest = wrap_address(env, dest + cur_len);
1167         destlen -= cur_len;
1168         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1169         set_address_zero(env, r1, dest);
1170 
1171         /*
1172          * MVCL is interruptible. Return to the main loop if requested after
1173          * writing back all state to registers. If no interrupt will get
1174          * injected, we'll end up back in this handler and continue processing
1175          * the remaining parts.
1176          */
1177         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1178             cpu_loop_exit_restore(cs, ra);
1179         }
1180     }
1181     return cc;
1182 }
1183 
1184 /* move long extended */
1185 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1186                        uint32_t r3)
1187 {
1188     uintptr_t ra = GETPC();
1189     uint64_t destlen = get_length(env, r1 + 1);
1190     uint64_t dest = get_address(env, r1);
1191     uint64_t srclen = get_length(env, r3 + 1);
1192     uint64_t src = get_address(env, r3);
1193     uint8_t pad = a2;
1194     uint32_t cc;
1195 
1196     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1197 
1198     set_length(env, r1 + 1, destlen);
1199     set_length(env, r3 + 1, srclen);
1200     set_address(env, r1, dest);
1201     set_address(env, r3, src);
1202 
1203     return cc;
1204 }
1205 
1206 /* move long unicode */
1207 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1208                        uint32_t r3)
1209 {
1210     uintptr_t ra = GETPC();
1211     uint64_t destlen = get_length(env, r1 + 1);
1212     uint64_t dest = get_address(env, r1);
1213     uint64_t srclen = get_length(env, r3 + 1);
1214     uint64_t src = get_address(env, r3);
1215     uint16_t pad = a2;
1216     uint32_t cc;
1217 
1218     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1219 
1220     set_length(env, r1 + 1, destlen);
1221     set_length(env, r3 + 1, srclen);
1222     set_address(env, r1, dest);
1223     set_address(env, r3, src);
1224 
1225     return cc;
1226 }
1227 
1228 /* compare logical long helper */
1229 static inline uint32_t do_clcl(CPUS390XState *env,
1230                                uint64_t *src1, uint64_t *src1len,
1231                                uint64_t *src3, uint64_t *src3len,
1232                                uint16_t pad, uint64_t limit,
1233                                int wordsize, uintptr_t ra)
1234 {
1235     uint64_t len = MAX(*src1len, *src3len);
1236     uint32_t cc = 0;
1237 
1238     check_alignment(env, *src1len | *src3len, wordsize, ra);
1239 
1240     if (!len) {
1241         return cc;
1242     }
1243 
1244     /* Lest we fail to service interrupts in a timely manner, limit the
1245        amount of work we're willing to do.  */
1246     if (len > limit) {
1247         len = limit;
1248         cc = 3;
1249     }
1250 
1251     for (; len; len -= wordsize) {
1252         uint16_t v1 = pad;
1253         uint16_t v3 = pad;
1254 
1255         if (*src1len) {
1256             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1257         }
1258         if (*src3len) {
1259             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1260         }
1261 
1262         if (v1 != v3) {
1263             cc = (v1 < v3) ? 1 : 2;
1264             break;
1265         }
1266 
1267         if (*src1len) {
1268             *src1 += wordsize;
1269             *src1len -= wordsize;
1270         }
1271         if (*src3len) {
1272             *src3 += wordsize;
1273             *src3len -= wordsize;
1274         }
1275     }
1276 
1277     return cc;
1278 }
1279 
1280 
1281 /* compare logical long */
1282 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1283 {
1284     uintptr_t ra = GETPC();
1285     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1286     uint64_t src1 = get_address(env, r1);
1287     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1288     uint64_t src3 = get_address(env, r2);
1289     uint8_t pad = env->regs[r2 + 1] >> 24;
1290     uint32_t cc;
1291 
1292     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1293 
1294     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1295     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1296     set_address(env, r1, src1);
1297     set_address(env, r2, src3);
1298 
1299     return cc;
1300 }
1301 
1302 /* compare logical long extended memcompare insn with padding */
1303 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1304                        uint32_t r3)
1305 {
1306     uintptr_t ra = GETPC();
1307     uint64_t src1len = get_length(env, r1 + 1);
1308     uint64_t src1 = get_address(env, r1);
1309     uint64_t src3len = get_length(env, r3 + 1);
1310     uint64_t src3 = get_address(env, r3);
1311     uint8_t pad = a2;
1312     uint32_t cc;
1313 
1314     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1315 
1316     set_length(env, r1 + 1, src1len);
1317     set_length(env, r3 + 1, src3len);
1318     set_address(env, r1, src1);
1319     set_address(env, r3, src3);
1320 
1321     return cc;
1322 }
1323 
1324 /* compare logical long unicode memcompare insn with padding */
1325 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1326                        uint32_t r3)
1327 {
1328     uintptr_t ra = GETPC();
1329     uint64_t src1len = get_length(env, r1 + 1);
1330     uint64_t src1 = get_address(env, r1);
1331     uint64_t src3len = get_length(env, r3 + 1);
1332     uint64_t src3 = get_address(env, r3);
1333     uint16_t pad = a2;
1334     uint32_t cc = 0;
1335 
1336     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1337 
1338     set_length(env, r1 + 1, src1len);
1339     set_length(env, r3 + 1, src3len);
1340     set_address(env, r1, src1);
1341     set_address(env, r3, src3);
1342 
1343     return cc;
1344 }
1345 
1346 /* checksum */
1347 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1348                       uint64_t src, uint64_t src_len)
1349 {
1350     uintptr_t ra = GETPC();
1351     uint64_t max_len, len;
1352     uint64_t cksm = (uint32_t)r1;
1353 
1354     /* Lest we fail to service interrupts in a timely manner, limit the
1355        amount of work we're willing to do.  For now, let's cap at 8k.  */
1356     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1357 
1358     /* Process full words as available.  */
1359     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1360         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1361     }
1362 
1363     switch (max_len - len) {
1364     case 1:
1365         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1366         len += 1;
1367         break;
1368     case 2:
1369         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1370         len += 2;
1371         break;
1372     case 3:
1373         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1374         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1375         len += 3;
1376         break;
1377     }
1378 
1379     /* Fold the carry from the checksum.  Note that we can see carry-out
1380        during folding more than once (but probably not more than twice).  */
1381     while (cksm > 0xffffffffull) {
1382         cksm = (uint32_t)cksm + (cksm >> 32);
1383     }
1384 
1385     /* Indicate whether or not we've processed everything.  */
1386     env->cc_op = (len == src_len ? 0 : 3);
1387 
1388     /* Return both cksm and processed length.  */
1389     env->retxl = cksm;
1390     return len;
1391 }
1392 
1393 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1394 {
1395     uintptr_t ra = GETPC();
1396     int len_dest = len >> 4;
1397     int len_src = len & 0xf;
1398     uint8_t b;
1399 
1400     dest += len_dest;
1401     src += len_src;
1402 
1403     /* last byte is special, it only flips the nibbles */
1404     b = cpu_ldub_data_ra(env, src, ra);
1405     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1406     src--;
1407     len_src--;
1408 
1409     /* now pack every value */
1410     while (len_dest > 0) {
1411         b = 0;
1412 
1413         if (len_src >= 0) {
1414             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415             src--;
1416             len_src--;
1417         }
1418         if (len_src >= 0) {
1419             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1420             src--;
1421             len_src--;
1422         }
1423 
1424         len_dest--;
1425         dest--;
1426         cpu_stb_data_ra(env, dest, b, ra);
1427     }
1428 }
1429 
1430 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1431                            uint32_t srclen, int ssize, uintptr_t ra)
1432 {
1433     int i;
1434     /* The destination operand is always 16 bytes long.  */
1435     const int destlen = 16;
1436 
1437     /* The operands are processed from right to left.  */
1438     src += srclen - 1;
1439     dest += destlen - 1;
1440 
1441     for (i = 0; i < destlen; i++) {
1442         uint8_t b = 0;
1443 
1444         /* Start with a positive sign */
1445         if (i == 0) {
1446             b = 0xc;
1447         } else if (srclen > ssize) {
1448             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1449             src -= ssize;
1450             srclen -= ssize;
1451         }
1452 
1453         if (srclen > ssize) {
1454             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1455             src -= ssize;
1456             srclen -= ssize;
1457         }
1458 
1459         cpu_stb_data_ra(env, dest, b, ra);
1460         dest--;
1461     }
1462 }
1463 
1464 
1465 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1466                  uint32_t srclen)
1467 {
1468     do_pkau(env, dest, src, srclen, 1, GETPC());
1469 }
1470 
1471 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                  uint32_t srclen)
1473 {
1474     do_pkau(env, dest, src, srclen, 2, GETPC());
1475 }
1476 
1477 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1478                   uint64_t src)
1479 {
1480     uintptr_t ra = GETPC();
1481     int len_dest = len >> 4;
1482     int len_src = len & 0xf;
1483     uint8_t b;
1484     int second_nibble = 0;
1485 
1486     dest += len_dest;
1487     src += len_src;
1488 
1489     /* last byte is special, it only flips the nibbles */
1490     b = cpu_ldub_data_ra(env, src, ra);
1491     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1492     src--;
1493     len_src--;
1494 
1495     /* now pad every nibble with 0xf0 */
1496 
1497     while (len_dest > 0) {
1498         uint8_t cur_byte = 0;
1499 
1500         if (len_src > 0) {
1501             cur_byte = cpu_ldub_data_ra(env, src, ra);
1502         }
1503 
1504         len_dest--;
1505         dest--;
1506 
1507         /* only advance one nibble at a time */
1508         if (second_nibble) {
1509             cur_byte >>= 4;
1510             len_src--;
1511             src--;
1512         }
1513         second_nibble = !second_nibble;
1514 
1515         /* digit */
1516         cur_byte = (cur_byte & 0xf);
1517         /* zone bits */
1518         cur_byte |= 0xf0;
1519 
1520         cpu_stb_data_ra(env, dest, cur_byte, ra);
1521     }
1522 }
1523 
1524 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1525                                  uint32_t destlen, int dsize, uint64_t src,
1526                                  uintptr_t ra)
1527 {
1528     int i;
1529     uint32_t cc;
1530     uint8_t b;
1531     /* The source operand is always 16 bytes long.  */
1532     const int srclen = 16;
1533 
1534     /* The operands are processed from right to left.  */
1535     src += srclen - 1;
1536     dest += destlen - dsize;
1537 
1538     /* Check for the sign.  */
1539     b = cpu_ldub_data_ra(env, src, ra);
1540     src--;
1541     switch (b & 0xf) {
1542     case 0xa:
1543     case 0xc:
1544     case 0xe ... 0xf:
1545         cc = 0;  /* plus */
1546         break;
1547     case 0xb:
1548     case 0xd:
1549         cc = 1;  /* minus */
1550         break;
1551     default:
1552     case 0x0 ... 0x9:
1553         cc = 3;  /* invalid */
1554         break;
1555     }
1556 
1557     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1558     for (i = 0; i < destlen; i += dsize) {
1559         if (i == (31 * dsize)) {
1560             /* If length is 32/64 bytes, the leftmost byte is 0. */
1561             b = 0;
1562         } else if (i % (2 * dsize)) {
1563             b = cpu_ldub_data_ra(env, src, ra);
1564             src--;
1565         } else {
1566             b >>= 4;
1567         }
1568         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1569         dest -= dsize;
1570     }
1571 
1572     return cc;
1573 }
1574 
1575 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1576                        uint64_t src)
1577 {
1578     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1579 }
1580 
1581 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                        uint64_t src)
1583 {
1584     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1585 }
1586 
1587 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1588 {
1589     uintptr_t ra = GETPC();
1590     uint32_t cc = 0;
1591     int i;
1592 
1593     for (i = 0; i < destlen; i++) {
1594         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1595         /* digit */
1596         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1597 
1598         if (i == (destlen - 1)) {
1599             /* sign */
1600             cc |= (b & 0xf) < 0xa ? 1 : 0;
1601         } else {
1602             /* digit */
1603             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1604         }
1605     }
1606 
1607     return cc;
1608 }
1609 
1610 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1611                              uint64_t trans, uintptr_t ra)
1612 {
1613     uint32_t i;
1614 
1615     for (i = 0; i <= len; i++) {
1616         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1617         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1618         cpu_stb_data_ra(env, array + i, new_byte, ra);
1619     }
1620 
1621     return env->cc_op;
1622 }
1623 
1624 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1625                 uint64_t trans)
1626 {
1627     do_helper_tr(env, len, array, trans, GETPC());
1628 }
1629 
1630 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1631                      uint64_t len, uint64_t trans)
1632 {
1633     uintptr_t ra = GETPC();
1634     uint8_t end = env->regs[0] & 0xff;
1635     uint64_t l = len;
1636     uint64_t i;
1637     uint32_t cc = 0;
1638 
1639     if (!(env->psw.mask & PSW_MASK_64)) {
1640         array &= 0x7fffffff;
1641         l = (uint32_t)l;
1642     }
1643 
1644     /* Lest we fail to service interrupts in a timely manner, limit the
1645        amount of work we're willing to do.  For now, let's cap at 8k.  */
1646     if (l > 0x2000) {
1647         l = 0x2000;
1648         cc = 3;
1649     }
1650 
1651     for (i = 0; i < l; i++) {
1652         uint8_t byte, new_byte;
1653 
1654         byte = cpu_ldub_data_ra(env, array + i, ra);
1655 
1656         if (byte == end) {
1657             cc = 1;
1658             break;
1659         }
1660 
1661         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1662         cpu_stb_data_ra(env, array + i, new_byte, ra);
1663     }
1664 
1665     env->cc_op = cc;
1666     env->retxl = len - i;
1667     return array + i;
1668 }
1669 
1670 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1671                                      uint64_t array, uint64_t trans,
1672                                      int inc, uintptr_t ra)
1673 {
1674     int i;
1675 
1676     for (i = 0; i <= len; i++) {
1677         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1678         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1679 
1680         if (sbyte != 0) {
1681             set_address(env, 1, array + i * inc);
1682             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1683             return (i == len) ? 2 : 1;
1684         }
1685     }
1686 
1687     return 0;
1688 }
1689 
1690 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1691                                   uint64_t array, uint64_t trans,
1692                                   uintptr_t ra)
1693 {
1694     return do_helper_trt(env, len, array, trans, 1, ra);
1695 }
1696 
1697 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1698                      uint64_t trans)
1699 {
1700     return do_helper_trt(env, len, array, trans, 1, GETPC());
1701 }
1702 
1703 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1704                                    uint64_t array, uint64_t trans,
1705                                    uintptr_t ra)
1706 {
1707     return do_helper_trt(env, len, array, trans, -1, ra);
1708 }
1709 
1710 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                       uint64_t trans)
1712 {
1713     return do_helper_trt(env, len, array, trans, -1, GETPC());
1714 }
1715 
1716 /* Translate one/two to one/two */
1717 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1718                       uint32_t tst, uint32_t sizes)
1719 {
1720     uintptr_t ra = GETPC();
1721     int dsize = (sizes & 1) ? 1 : 2;
1722     int ssize = (sizes & 2) ? 1 : 2;
1723     uint64_t tbl = get_address(env, 1);
1724     uint64_t dst = get_address(env, r1);
1725     uint64_t len = get_length(env, r1 + 1);
1726     uint64_t src = get_address(env, r2);
1727     uint32_t cc = 3;
1728     int i;
1729 
1730     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1731        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1732        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1733     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1734         tbl &= -4096;
1735     } else {
1736         tbl &= -8;
1737     }
1738 
1739     check_alignment(env, len, ssize, ra);
1740 
1741     /* Lest we fail to service interrupts in a timely manner, */
1742     /* limit the amount of work we're willing to do.   */
1743     for (i = 0; i < 0x2000; i++) {
1744         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1745         uint64_t tble = tbl + (sval * dsize);
1746         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1747         if (dval == tst) {
1748             cc = 1;
1749             break;
1750         }
1751         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1752 
1753         len -= ssize;
1754         src += ssize;
1755         dst += dsize;
1756 
1757         if (len == 0) {
1758             cc = 0;
1759             break;
1760         }
1761     }
1762 
1763     set_address(env, r1, dst);
1764     set_length(env, r1 + 1, len);
1765     set_address(env, r2, src);
1766 
1767     return cc;
1768 }
1769 
1770 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1771                   uint32_t r1, uint32_t r3)
1772 {
1773     uintptr_t ra = GETPC();
1774     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1775     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1776     Int128 oldv;
1777     uint64_t oldh, oldl;
1778     bool fail;
1779 
1780     check_alignment(env, addr, 16, ra);
1781 
1782     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1783     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1784 
1785     oldv = int128_make128(oldl, oldh);
1786     fail = !int128_eq(oldv, cmpv);
1787     if (fail) {
1788         newv = oldv;
1789     }
1790 
1791     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1792     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1793 
1794     env->cc_op = fail;
1795     env->regs[r1] = int128_gethi(oldv);
1796     env->regs[r1 + 1] = int128_getlo(oldv);
1797 }
1798 
1799 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1800                            uint32_t r1, uint32_t r3)
1801 {
1802     uintptr_t ra = GETPC();
1803     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1804     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1805     int mem_idx;
1806     TCGMemOpIdx oi;
1807     Int128 oldv;
1808     bool fail;
1809 
1810     assert(HAVE_CMPXCHG128);
1811 
1812     mem_idx = cpu_mmu_index(env, false);
1813     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1814     oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1815     fail = !int128_eq(oldv, cmpv);
1816 
1817     env->cc_op = fail;
1818     env->regs[r1] = int128_gethi(oldv);
1819     env->regs[r1 + 1] = int128_getlo(oldv);
1820 }
1821 
1822 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1823                         uint64_t a2, bool parallel)
1824 {
1825     uint32_t mem_idx = cpu_mmu_index(env, false);
1826     uintptr_t ra = GETPC();
1827     uint32_t fc = extract32(env->regs[0], 0, 8);
1828     uint32_t sc = extract32(env->regs[0], 8, 8);
1829     uint64_t pl = get_address(env, 1) & -16;
1830     uint64_t svh, svl;
1831     uint32_t cc;
1832 
1833     /* Sanity check the function code and storage characteristic.  */
1834     if (fc > 1 || sc > 3) {
1835         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1836             goto spec_exception;
1837         }
1838         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1839             goto spec_exception;
1840         }
1841     }
1842 
1843     /* Sanity check the alignments.  */
1844     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1845         goto spec_exception;
1846     }
1847 
1848     /* Sanity check writability of the store address.  */
1849     probe_write(env, a2, 1 << sc, mem_idx, ra);
1850 
1851     /*
1852      * Note that the compare-and-swap is atomic, and the store is atomic,
1853      * but the complete operation is not.  Therefore we do not need to
1854      * assert serial context in order to implement this.  That said,
1855      * restart early if we can't support either operation that is supposed
1856      * to be atomic.
1857      */
1858     if (parallel) {
1859         uint32_t max = 2;
1860 #ifdef CONFIG_ATOMIC64
1861         max = 3;
1862 #endif
1863         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1864             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1865             cpu_loop_exit_atomic(env_cpu(env), ra);
1866         }
1867     }
1868 
1869     /* All loads happen before all stores.  For simplicity, load the entire
1870        store value area from the parameter list.  */
1871     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1872     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1873 
1874     switch (fc) {
1875     case 0:
1876         {
1877             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1878             uint32_t cv = env->regs[r3];
1879             uint32_t ov;
1880 
1881             if (parallel) {
1882 #ifdef CONFIG_USER_ONLY
1883                 uint32_t *haddr = g2h(env_cpu(env), a1);
1884                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1885 #else
1886                 TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1887                 ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1888 #endif
1889             } else {
1890                 ov = cpu_ldl_data_ra(env, a1, ra);
1891                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1892             }
1893             cc = (ov != cv);
1894             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1895         }
1896         break;
1897 
1898     case 1:
1899         {
1900             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1901             uint64_t cv = env->regs[r3];
1902             uint64_t ov;
1903 
1904             if (parallel) {
1905 #ifdef CONFIG_ATOMIC64
1906 # ifdef CONFIG_USER_ONLY
1907                 uint64_t *haddr = g2h(env_cpu(env), a1);
1908                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1909 # else
1910                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1911                 ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1912 # endif
1913 #else
1914                 /* Note that we asserted !parallel above.  */
1915                 g_assert_not_reached();
1916 #endif
1917             } else {
1918                 ov = cpu_ldq_data_ra(env, a1, ra);
1919                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1920             }
1921             cc = (ov != cv);
1922             env->regs[r3] = ov;
1923         }
1924         break;
1925 
1926     case 2:
1927         {
1928             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1929             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1930             Int128 nv = int128_make128(nvl, nvh);
1931             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1932             Int128 ov;
1933 
1934             if (!parallel) {
1935                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1936                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1937 
1938                 ov = int128_make128(ol, oh);
1939                 cc = !int128_eq(ov, cv);
1940                 if (cc) {
1941                     nv = ov;
1942                 }
1943 
1944                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1945                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1946             } else if (HAVE_CMPXCHG128) {
1947                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1948                 ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1949                 cc = !int128_eq(ov, cv);
1950             } else {
1951                 /* Note that we asserted !parallel above.  */
1952                 g_assert_not_reached();
1953             }
1954 
1955             env->regs[r3 + 0] = int128_gethi(ov);
1956             env->regs[r3 + 1] = int128_getlo(ov);
1957         }
1958         break;
1959 
1960     default:
1961         g_assert_not_reached();
1962     }
1963 
1964     /* Store only if the comparison succeeded.  Note that above we use a pair
1965        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1966        from the most-significant bits of svh.  */
1967     if (cc == 0) {
1968         switch (sc) {
1969         case 0:
1970             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1971             break;
1972         case 1:
1973             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1974             break;
1975         case 2:
1976             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1977             break;
1978         case 3:
1979             cpu_stq_data_ra(env, a2, svh, ra);
1980             break;
1981         case 4:
1982             if (!parallel) {
1983                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1984                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1985             } else if (HAVE_ATOMIC128) {
1986                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1987                 Int128 sv = int128_make128(svl, svh);
1988                 helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1989             } else {
1990                 /* Note that we asserted !parallel above.  */
1991                 g_assert_not_reached();
1992             }
1993             break;
1994         default:
1995             g_assert_not_reached();
1996         }
1997     }
1998 
1999     return cc;
2000 
2001  spec_exception:
2002     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2003 }
2004 
2005 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2006 {
2007     return do_csst(env, r3, a1, a2, false);
2008 }
2009 
2010 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2011                                uint64_t a2)
2012 {
2013     return do_csst(env, r3, a1, a2, true);
2014 }
2015 
2016 #if !defined(CONFIG_USER_ONLY)
2017 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2018 {
2019     uintptr_t ra = GETPC();
2020     bool PERchanged = false;
2021     uint64_t src = a2;
2022     uint32_t i;
2023 
2024     if (src & 0x7) {
2025         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2026     }
2027 
2028     for (i = r1;; i = (i + 1) % 16) {
2029         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2030         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2031             PERchanged = true;
2032         }
2033         env->cregs[i] = val;
2034         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2035                    i, src, val);
2036         src += sizeof(uint64_t);
2037 
2038         if (i == r3) {
2039             break;
2040         }
2041     }
2042 
2043     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2044         s390_cpu_recompute_watchpoints(env_cpu(env));
2045     }
2046 
2047     tlb_flush(env_cpu(env));
2048 }
2049 
2050 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2051 {
2052     uintptr_t ra = GETPC();
2053     bool PERchanged = false;
2054     uint64_t src = a2;
2055     uint32_t i;
2056 
2057     if (src & 0x3) {
2058         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2059     }
2060 
2061     for (i = r1;; i = (i + 1) % 16) {
2062         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2063         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2064             PERchanged = true;
2065         }
2066         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2067         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2068         src += sizeof(uint32_t);
2069 
2070         if (i == r3) {
2071             break;
2072         }
2073     }
2074 
2075     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2076         s390_cpu_recompute_watchpoints(env_cpu(env));
2077     }
2078 
2079     tlb_flush(env_cpu(env));
2080 }
2081 
2082 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2083 {
2084     uintptr_t ra = GETPC();
2085     uint64_t dest = a2;
2086     uint32_t i;
2087 
2088     if (dest & 0x7) {
2089         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2090     }
2091 
2092     for (i = r1;; i = (i + 1) % 16) {
2093         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2094         dest += sizeof(uint64_t);
2095 
2096         if (i == r3) {
2097             break;
2098         }
2099     }
2100 }
2101 
2102 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2103 {
2104     uintptr_t ra = GETPC();
2105     uint64_t dest = a2;
2106     uint32_t i;
2107 
2108     if (dest & 0x3) {
2109         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2110     }
2111 
2112     for (i = r1;; i = (i + 1) % 16) {
2113         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2114         dest += sizeof(uint32_t);
2115 
2116         if (i == r3) {
2117             break;
2118         }
2119     }
2120 }
2121 
2122 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2123 {
2124     uintptr_t ra = GETPC();
2125     int i;
2126 
2127     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2128 
2129     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2130         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2131     }
2132 
2133     return 0;
2134 }
2135 
2136 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2137 {
2138     S390CPU *cpu = env_archcpu(env);
2139     CPUState *cs = env_cpu(env);
2140 
2141     /*
2142      * TODO: we currently don't handle all access protection types
2143      * (including access-list and key-controlled) as well as AR mode.
2144      */
2145     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2146         /* Fetching permitted; storing permitted */
2147         return 0;
2148     }
2149 
2150     if (env->int_pgm_code == PGM_PROTECTION) {
2151         /* retry if reading is possible */
2152         cs->exception_index = -1;
2153         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2154             /* Fetching permitted; storing not permitted */
2155             return 1;
2156         }
2157     }
2158 
2159     switch (env->int_pgm_code) {
2160     case PGM_PROTECTION:
2161         /* Fetching not permitted; storing not permitted */
2162         cs->exception_index = -1;
2163         return 2;
2164     case PGM_ADDRESSING:
2165     case PGM_TRANS_SPEC:
2166         /* exceptions forwarded to the guest */
2167         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2168         return 0;
2169     }
2170 
2171     /* Translation not available */
2172     cs->exception_index = -1;
2173     return 3;
2174 }
2175 
2176 /* insert storage key extended */
2177 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2178 {
2179     MachineState *ms = MACHINE(qdev_get_machine());
2180     static S390SKeysState *ss;
2181     static S390SKeysClass *skeyclass;
2182     uint64_t addr = wrap_address(env, r2);
2183     uint8_t key;
2184 
2185     if (addr > ms->ram_size) {
2186         return 0;
2187     }
2188 
2189     if (unlikely(!ss)) {
2190         ss = s390_get_skeys_device();
2191         skeyclass = S390_SKEYS_GET_CLASS(ss);
2192     }
2193 
2194     if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
2195         return 0;
2196     }
2197     return key;
2198 }
2199 
2200 /* set storage key extended */
2201 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2202 {
2203     MachineState *ms = MACHINE(qdev_get_machine());
2204     static S390SKeysState *ss;
2205     static S390SKeysClass *skeyclass;
2206     uint64_t addr = wrap_address(env, r2);
2207     uint8_t key;
2208 
2209     if (addr > ms->ram_size) {
2210         return;
2211     }
2212 
2213     if (unlikely(!ss)) {
2214         ss = s390_get_skeys_device();
2215         skeyclass = S390_SKEYS_GET_CLASS(ss);
2216     }
2217 
2218     key = (uint8_t) r1;
2219     skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2220    /*
2221     * As we can only flush by virtual address and not all the entries
2222     * that point to a physical address we have to flush the whole TLB.
2223     */
2224     tlb_flush_all_cpus_synced(env_cpu(env));
2225 }
2226 
2227 /* reset reference bit extended */
2228 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2229 {
2230     MachineState *ms = MACHINE(qdev_get_machine());
2231     static S390SKeysState *ss;
2232     static S390SKeysClass *skeyclass;
2233     uint8_t re, key;
2234 
2235     if (r2 > ms->ram_size) {
2236         return 0;
2237     }
2238 
2239     if (unlikely(!ss)) {
2240         ss = s390_get_skeys_device();
2241         skeyclass = S390_SKEYS_GET_CLASS(ss);
2242     }
2243 
2244     if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2245         return 0;
2246     }
2247 
2248     re = key & (SK_R | SK_C);
2249     key &= ~SK_R;
2250 
2251     if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2252         return 0;
2253     }
2254    /*
2255     * As we can only flush by virtual address and not all the entries
2256     * that point to a physical address we have to flush the whole TLB.
2257     */
2258     tlb_flush_all_cpus_synced(env_cpu(env));
2259 
2260     /*
2261      * cc
2262      *
2263      * 0  Reference bit zero; change bit zero
2264      * 1  Reference bit zero; change bit one
2265      * 2  Reference bit one; change bit zero
2266      * 3  Reference bit one; change bit one
2267      */
2268 
2269     return re >> 1;
2270 }
2271 
2272 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2273 {
2274     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2275     S390Access srca, desta;
2276     uintptr_t ra = GETPC();
2277     int cc = 0;
2278 
2279     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2280                __func__, l, a1, a2);
2281 
2282     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2283         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2284         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2285     }
2286 
2287     l = wrap_length32(env, l);
2288     if (l > 256) {
2289         /* max 256 */
2290         l = 256;
2291         cc = 3;
2292     } else if (!l) {
2293         return cc;
2294     }
2295 
2296     /* TODO: Access key handling */
2297     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2298     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2299     access_memmove(env, &desta, &srca, ra);
2300     return cc;
2301 }
2302 
2303 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2304 {
2305     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2306     S390Access srca, desta;
2307     uintptr_t ra = GETPC();
2308     int cc = 0;
2309 
2310     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2311                __func__, l, a1, a2);
2312 
2313     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2314         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2315         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2316     }
2317 
2318     l = wrap_length32(env, l);
2319     if (l > 256) {
2320         /* max 256 */
2321         l = 256;
2322         cc = 3;
2323     } else if (!l) {
2324         return cc;
2325     }
2326 
2327     /* TODO: Access key handling */
2328     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2329     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2330     access_memmove(env, &desta, &srca, ra);
2331     return cc;
2332 }
2333 
2334 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2335 {
2336     CPUState *cs = env_cpu(env);
2337     const uintptr_t ra = GETPC();
2338     uint64_t table, entry, raddr;
2339     uint16_t entries, i, index = 0;
2340 
2341     if (r2 & 0xff000) {
2342         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2343     }
2344 
2345     if (!(r2 & 0x800)) {
2346         /* invalidation-and-clearing operation */
2347         table = r1 & ASCE_ORIGIN;
2348         entries = (r2 & 0x7ff) + 1;
2349 
2350         switch (r1 & ASCE_TYPE_MASK) {
2351         case ASCE_TYPE_REGION1:
2352             index = (r2 >> 53) & 0x7ff;
2353             break;
2354         case ASCE_TYPE_REGION2:
2355             index = (r2 >> 42) & 0x7ff;
2356             break;
2357         case ASCE_TYPE_REGION3:
2358             index = (r2 >> 31) & 0x7ff;
2359             break;
2360         case ASCE_TYPE_SEGMENT:
2361             index = (r2 >> 20) & 0x7ff;
2362             break;
2363         }
2364         for (i = 0; i < entries; i++) {
2365             /* addresses are not wrapped in 24/31bit mode but table index is */
2366             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2367             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2368             if (!(entry & REGION_ENTRY_I)) {
2369                 /* we are allowed to not store if already invalid */
2370                 entry |= REGION_ENTRY_I;
2371                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2372             }
2373         }
2374     }
2375 
2376     /* We simply flush the complete tlb, therefore we can ignore r3. */
2377     if (m4 & 1) {
2378         tlb_flush(cs);
2379     } else {
2380         tlb_flush_all_cpus_synced(cs);
2381     }
2382 }
2383 
2384 /* invalidate pte */
2385 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2386                   uint32_t m4)
2387 {
2388     CPUState *cs = env_cpu(env);
2389     const uintptr_t ra = GETPC();
2390     uint64_t page = vaddr & TARGET_PAGE_MASK;
2391     uint64_t pte_addr, pte;
2392 
2393     /* Compute the page table entry address */
2394     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2395     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2396 
2397     /* Mark the page table entry as invalid */
2398     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2399     pte |= PAGE_ENTRY_I;
2400     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2401 
2402     /* XXX we exploit the fact that Linux passes the exact virtual
2403        address here - it's not obliged to! */
2404     if (m4 & 1) {
2405         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2406             tlb_flush_page(cs, page);
2407             /* XXX 31-bit hack */
2408             tlb_flush_page(cs, page ^ 0x80000000);
2409         } else {
2410             /* looks like we don't have a valid virtual address */
2411             tlb_flush(cs);
2412         }
2413     } else {
2414         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2415             tlb_flush_page_all_cpus_synced(cs, page);
2416             /* XXX 31-bit hack */
2417             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2418         } else {
2419             /* looks like we don't have a valid virtual address */
2420             tlb_flush_all_cpus_synced(cs);
2421         }
2422     }
2423 }
2424 
2425 /* flush local tlb */
2426 void HELPER(ptlb)(CPUS390XState *env)
2427 {
2428     tlb_flush(env_cpu(env));
2429 }
2430 
2431 /* flush global tlb */
2432 void HELPER(purge)(CPUS390XState *env)
2433 {
2434     tlb_flush_all_cpus_synced(env_cpu(env));
2435 }
2436 
2437 /* load real address */
2438 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2439 {
2440     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2441     uint64_t ret, tec;
2442     int flags, exc, cc;
2443 
2444     /* XXX incomplete - has more corner cases */
2445     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2446         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2447     }
2448 
2449     exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
2450     if (exc) {
2451         cc = 3;
2452         ret = exc | 0x80000000;
2453     } else {
2454         cc = 0;
2455         ret |= addr & ~TARGET_PAGE_MASK;
2456     }
2457 
2458     env->cc_op = cc;
2459     return ret;
2460 }
2461 #endif
2462 
2463 /* load pair from quadword */
2464 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2465 {
2466     uintptr_t ra = GETPC();
2467     uint64_t hi, lo;
2468 
2469     check_alignment(env, addr, 16, ra);
2470     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2471     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2472 
2473     env->retxl = lo;
2474     return hi;
2475 }
2476 
2477 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2478 {
2479     uintptr_t ra = GETPC();
2480     uint64_t hi, lo;
2481     int mem_idx;
2482     TCGMemOpIdx oi;
2483     Int128 v;
2484 
2485     assert(HAVE_ATOMIC128);
2486 
2487     mem_idx = cpu_mmu_index(env, false);
2488     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2489     v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2490     hi = int128_gethi(v);
2491     lo = int128_getlo(v);
2492 
2493     env->retxl = lo;
2494     return hi;
2495 }
2496 
2497 /* store pair to quadword */
2498 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2499                   uint64_t low, uint64_t high)
2500 {
2501     uintptr_t ra = GETPC();
2502 
2503     check_alignment(env, addr, 16, ra);
2504     cpu_stq_data_ra(env, addr + 0, high, ra);
2505     cpu_stq_data_ra(env, addr + 8, low, ra);
2506 }
2507 
2508 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2509                            uint64_t low, uint64_t high)
2510 {
2511     uintptr_t ra = GETPC();
2512     int mem_idx;
2513     TCGMemOpIdx oi;
2514     Int128 v;
2515 
2516     assert(HAVE_ATOMIC128);
2517 
2518     mem_idx = cpu_mmu_index(env, false);
2519     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2520     v = int128_make128(low, high);
2521     helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2522 }
2523 
2524 /* Execute instruction.  This instruction executes an insn modified with
2525    the contents of r1.  It does not change the executed instruction in memory;
2526    it does not change the program counter.
2527 
2528    Perform this by recording the modified instruction in env->ex_value.
2529    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2530 */
2531 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2532 {
2533     uint64_t insn = cpu_lduw_code(env, addr);
2534     uint8_t opc = insn >> 8;
2535 
2536     /* Or in the contents of R1[56:63].  */
2537     insn |= r1 & 0xff;
2538 
2539     /* Load the rest of the instruction.  */
2540     insn <<= 48;
2541     switch (get_ilen(opc)) {
2542     case 2:
2543         break;
2544     case 4:
2545         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2546         break;
2547     case 6:
2548         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2549         break;
2550     default:
2551         g_assert_not_reached();
2552     }
2553 
2554     /* The very most common cases can be sped up by avoiding a new TB.  */
2555     if ((opc & 0xf0) == 0xd0) {
2556         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2557                                       uint64_t, uintptr_t);
2558         static const dx_helper dx[16] = {
2559             [0x0] = do_helper_trt_bkwd,
2560             [0x2] = do_helper_mvc,
2561             [0x4] = do_helper_nc,
2562             [0x5] = do_helper_clc,
2563             [0x6] = do_helper_oc,
2564             [0x7] = do_helper_xc,
2565             [0xc] = do_helper_tr,
2566             [0xd] = do_helper_trt_fwd,
2567         };
2568         dx_helper helper = dx[opc & 0xf];
2569 
2570         if (helper) {
2571             uint32_t l = extract64(insn, 48, 8);
2572             uint32_t b1 = extract64(insn, 44, 4);
2573             uint32_t d1 = extract64(insn, 32, 12);
2574             uint32_t b2 = extract64(insn, 28, 4);
2575             uint32_t d2 = extract64(insn, 16, 12);
2576             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2577             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2578 
2579             env->cc_op = helper(env, l, a1, a2, 0);
2580             env->psw.addr += ilen;
2581             return;
2582         }
2583     } else if (opc == 0x0a) {
2584         env->int_svc_code = extract64(insn, 48, 8);
2585         env->int_svc_ilen = ilen;
2586         helper_exception(env, EXCP_SVC);
2587         g_assert_not_reached();
2588     }
2589 
2590     /* Record the insn we want to execute as well as the ilen to use
2591        during the execution of the target insn.  This will also ensure
2592        that ex_value is non-zero, which flags that we are in a state
2593        that requires such execution.  */
2594     env->ex_value = insn | ilen;
2595 }
2596 
2597 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2598                        uint64_t len)
2599 {
2600     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2601     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2602     const uint64_t r0 = env->regs[0];
2603     const uintptr_t ra = GETPC();
2604     uint8_t dest_key, dest_as, dest_k, dest_a;
2605     uint8_t src_key, src_as, src_k, src_a;
2606     uint64_t val;
2607     int cc = 0;
2608 
2609     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2610                __func__, dest, src, len);
2611 
2612     if (!(env->psw.mask & PSW_MASK_DAT)) {
2613         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2614     }
2615 
2616     /* OAC (operand access control) for the first operand -> dest */
2617     val = (r0 & 0xffff0000ULL) >> 16;
2618     dest_key = (val >> 12) & 0xf;
2619     dest_as = (val >> 6) & 0x3;
2620     dest_k = (val >> 1) & 0x1;
2621     dest_a = val & 0x1;
2622 
2623     /* OAC (operand access control) for the second operand -> src */
2624     val = (r0 & 0x0000ffffULL);
2625     src_key = (val >> 12) & 0xf;
2626     src_as = (val >> 6) & 0x3;
2627     src_k = (val >> 1) & 0x1;
2628     src_a = val & 0x1;
2629 
2630     if (!dest_k) {
2631         dest_key = psw_key;
2632     }
2633     if (!src_k) {
2634         src_key = psw_key;
2635     }
2636     if (!dest_a) {
2637         dest_as = psw_as;
2638     }
2639     if (!src_a) {
2640         src_as = psw_as;
2641     }
2642 
2643     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2644         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2645     }
2646     if (!(env->cregs[0] & CR0_SECONDARY) &&
2647         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2648         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2649     }
2650     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2651         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2652     }
2653 
2654     len = wrap_length32(env, len);
2655     if (len > 4096) {
2656         cc = 3;
2657         len = 4096;
2658     }
2659 
2660     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2661     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2662         (env->psw.mask & PSW_MASK_PSTATE)) {
2663         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2664                       __func__);
2665         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2666     }
2667 
2668     /* FIXME: Access using correct keys and AR-mode */
2669     if (len) {
2670         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2671                                          mmu_idx_from_as(src_as), ra);
2672         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2673                                           mmu_idx_from_as(dest_as), ra);
2674 
2675         access_memmove(env, &desta, &srca, ra);
2676     }
2677 
2678     return cc;
2679 }
2680 
2681 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2682    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2683    value >= 0 indicates failure, and the CC value to be returned.  */
2684 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2685                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2686                                  uint32_t *ochar, uint32_t *olen);
2687 
2688 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2689    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2690    indicates failure, and the CC value to be returned.  */
2691 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2692                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2693                                  uint32_t *olen);
2694 
2695 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2696                        bool enh_check, uintptr_t ra,
2697                        uint32_t *ochar, uint32_t *olen)
2698 {
2699     uint8_t s0, s1, s2, s3;
2700     uint32_t c, l;
2701 
2702     if (ilen < 1) {
2703         return 0;
2704     }
2705     s0 = cpu_ldub_data_ra(env, addr, ra);
2706     if (s0 <= 0x7f) {
2707         /* one byte character */
2708         l = 1;
2709         c = s0;
2710     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2711         /* invalid character */
2712         return 2;
2713     } else if (s0 <= 0xdf) {
2714         /* two byte character */
2715         l = 2;
2716         if (ilen < 2) {
2717             return 0;
2718         }
2719         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2720         c = s0 & 0x1f;
2721         c = (c << 6) | (s1 & 0x3f);
2722         if (enh_check && (s1 & 0xc0) != 0x80) {
2723             return 2;
2724         }
2725     } else if (s0 <= 0xef) {
2726         /* three byte character */
2727         l = 3;
2728         if (ilen < 3) {
2729             return 0;
2730         }
2731         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2732         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2733         c = s0 & 0x0f;
2734         c = (c << 6) | (s1 & 0x3f);
2735         c = (c << 6) | (s2 & 0x3f);
2736         /* Fold the byte-by-byte range descriptions in the PoO into
2737            tests against the complete value.  It disallows encodings
2738            that could be smaller, and the UTF-16 surrogates.  */
2739         if (enh_check
2740             && ((s1 & 0xc0) != 0x80
2741                 || (s2 & 0xc0) != 0x80
2742                 || c < 0x1000
2743                 || (c >= 0xd800 && c <= 0xdfff))) {
2744             return 2;
2745         }
2746     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2747         /* four byte character */
2748         l = 4;
2749         if (ilen < 4) {
2750             return 0;
2751         }
2752         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2753         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2754         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2755         c = s0 & 0x07;
2756         c = (c << 6) | (s1 & 0x3f);
2757         c = (c << 6) | (s2 & 0x3f);
2758         c = (c << 6) | (s3 & 0x3f);
2759         /* See above.  */
2760         if (enh_check
2761             && ((s1 & 0xc0) != 0x80
2762                 || (s2 & 0xc0) != 0x80
2763                 || (s3 & 0xc0) != 0x80
2764                 || c < 0x010000
2765                 || c > 0x10ffff)) {
2766             return 2;
2767         }
2768     } else {
2769         /* invalid character */
2770         return 2;
2771     }
2772 
2773     *ochar = c;
2774     *olen = l;
2775     return -1;
2776 }
2777 
2778 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2779                         bool enh_check, uintptr_t ra,
2780                         uint32_t *ochar, uint32_t *olen)
2781 {
2782     uint16_t s0, s1;
2783     uint32_t c, l;
2784 
2785     if (ilen < 2) {
2786         return 0;
2787     }
2788     s0 = cpu_lduw_data_ra(env, addr, ra);
2789     if ((s0 & 0xfc00) != 0xd800) {
2790         /* one word character */
2791         l = 2;
2792         c = s0;
2793     } else {
2794         /* two word character */
2795         l = 4;
2796         if (ilen < 4) {
2797             return 0;
2798         }
2799         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2800         c = extract32(s0, 6, 4) + 1;
2801         c = (c << 6) | (s0 & 0x3f);
2802         c = (c << 10) | (s1 & 0x3ff);
2803         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2804             /* invalid surrogate character */
2805             return 2;
2806         }
2807     }
2808 
2809     *ochar = c;
2810     *olen = l;
2811     return -1;
2812 }
2813 
2814 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2815                         bool enh_check, uintptr_t ra,
2816                         uint32_t *ochar, uint32_t *olen)
2817 {
2818     uint32_t c;
2819 
2820     if (ilen < 4) {
2821         return 0;
2822     }
2823     c = cpu_ldl_data_ra(env, addr, ra);
2824     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2825         /* invalid unicode character */
2826         return 2;
2827     }
2828 
2829     *ochar = c;
2830     *olen = 4;
2831     return -1;
2832 }
2833 
2834 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2835                        uintptr_t ra, uint32_t c, uint32_t *olen)
2836 {
2837     uint8_t d[4];
2838     uint32_t l, i;
2839 
2840     if (c <= 0x7f) {
2841         /* one byte character */
2842         l = 1;
2843         d[0] = c;
2844     } else if (c <= 0x7ff) {
2845         /* two byte character */
2846         l = 2;
2847         d[1] = 0x80 | extract32(c, 0, 6);
2848         d[0] = 0xc0 | extract32(c, 6, 5);
2849     } else if (c <= 0xffff) {
2850         /* three byte character */
2851         l = 3;
2852         d[2] = 0x80 | extract32(c, 0, 6);
2853         d[1] = 0x80 | extract32(c, 6, 6);
2854         d[0] = 0xe0 | extract32(c, 12, 4);
2855     } else {
2856         /* four byte character */
2857         l = 4;
2858         d[3] = 0x80 | extract32(c, 0, 6);
2859         d[2] = 0x80 | extract32(c, 6, 6);
2860         d[1] = 0x80 | extract32(c, 12, 6);
2861         d[0] = 0xf0 | extract32(c, 18, 3);
2862     }
2863 
2864     if (ilen < l) {
2865         return 1;
2866     }
2867     for (i = 0; i < l; ++i) {
2868         cpu_stb_data_ra(env, addr + i, d[i], ra);
2869     }
2870 
2871     *olen = l;
2872     return -1;
2873 }
2874 
2875 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2876                         uintptr_t ra, uint32_t c, uint32_t *olen)
2877 {
2878     uint16_t d0, d1;
2879 
2880     if (c <= 0xffff) {
2881         /* one word character */
2882         if (ilen < 2) {
2883             return 1;
2884         }
2885         cpu_stw_data_ra(env, addr, c, ra);
2886         *olen = 2;
2887     } else {
2888         /* two word character */
2889         if (ilen < 4) {
2890             return 1;
2891         }
2892         d1 = 0xdc00 | extract32(c, 0, 10);
2893         d0 = 0xd800 | extract32(c, 10, 6);
2894         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2895         cpu_stw_data_ra(env, addr + 0, d0, ra);
2896         cpu_stw_data_ra(env, addr + 2, d1, ra);
2897         *olen = 4;
2898     }
2899 
2900     return -1;
2901 }
2902 
2903 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2904                         uintptr_t ra, uint32_t c, uint32_t *olen)
2905 {
2906     if (ilen < 4) {
2907         return 1;
2908     }
2909     cpu_stl_data_ra(env, addr, c, ra);
2910     *olen = 4;
2911     return -1;
2912 }
2913 
2914 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2915                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2916                                        decode_unicode_fn decode,
2917                                        encode_unicode_fn encode)
2918 {
2919     uint64_t dst = get_address(env, r1);
2920     uint64_t dlen = get_length(env, r1 + 1);
2921     uint64_t src = get_address(env, r2);
2922     uint64_t slen = get_length(env, r2 + 1);
2923     bool enh_check = m3 & 1;
2924     int cc, i;
2925 
2926     /* Lest we fail to service interrupts in a timely manner, limit the
2927        amount of work we're willing to do.  For now, let's cap at 256.  */
2928     for (i = 0; i < 256; ++i) {
2929         uint32_t c, ilen, olen;
2930 
2931         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2932         if (unlikely(cc >= 0)) {
2933             break;
2934         }
2935         cc = encode(env, dst, dlen, ra, c, &olen);
2936         if (unlikely(cc >= 0)) {
2937             break;
2938         }
2939 
2940         src += ilen;
2941         slen -= ilen;
2942         dst += olen;
2943         dlen -= olen;
2944         cc = 3;
2945     }
2946 
2947     set_address(env, r1, dst);
2948     set_length(env, r1 + 1, dlen);
2949     set_address(env, r2, src);
2950     set_length(env, r2 + 1, slen);
2951 
2952     return cc;
2953 }
2954 
2955 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2956 {
2957     return convert_unicode(env, r1, r2, m3, GETPC(),
2958                            decode_utf8, encode_utf16);
2959 }
2960 
2961 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2962 {
2963     return convert_unicode(env, r1, r2, m3, GETPC(),
2964                            decode_utf8, encode_utf32);
2965 }
2966 
2967 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2968 {
2969     return convert_unicode(env, r1, r2, m3, GETPC(),
2970                            decode_utf16, encode_utf8);
2971 }
2972 
2973 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2974 {
2975     return convert_unicode(env, r1, r2, m3, GETPC(),
2976                            decode_utf16, encode_utf32);
2977 }
2978 
2979 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2980 {
2981     return convert_unicode(env, r1, r2, m3, GETPC(),
2982                            decode_utf32, encode_utf8);
2983 }
2984 
2985 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2986 {
2987     return convert_unicode(env, r1, r2, m3, GETPC(),
2988                            decode_utf32, encode_utf16);
2989 }
2990 
2991 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2992                         uintptr_t ra)
2993 {
2994     /* test the actual access, not just any access to the page due to LAP */
2995     while (len) {
2996         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2997         const uint64_t curlen = MIN(pagelen, len);
2998 
2999         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3000         addr = wrap_address(env, addr + curlen);
3001         len -= curlen;
3002     }
3003 }
3004 
3005 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3006 {
3007     probe_write_access(env, addr, len, GETPC());
3008 }
3009