xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 6016b7b46edb714a53a31536b30ead9c3aafaef7)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "s390x-internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 #include "trace.h"
31 
32 #if !defined(CONFIG_USER_ONLY)
33 #include "hw/s390x/storage-keys.h"
34 #include "hw/boards.h"
35 #endif
36 
37 /*****************************************************************************/
38 /* Softmmu support */
39 
40 /* #define DEBUG_HELPER */
41 #ifdef DEBUG_HELPER
42 #define HELPER_LOG(x...) qemu_log(x)
43 #else
44 #define HELPER_LOG(x...)
45 #endif
46 
47 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
48 {
49     uint16_t pkm = env->cregs[3] >> 16;
50 
51     if (env->psw.mask & PSW_MASK_PSTATE) {
52         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
53         return pkm & (0x80 >> psw_key);
54     }
55     return true;
56 }
57 
58 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
59                                    uint64_t src, uint32_t len)
60 {
61     if (!len || src == dest) {
62         return false;
63     }
64     /* Take care of wrapping at the end of address space. */
65     if (unlikely(wrap_address(env, src + len - 1) < src)) {
66         return dest > src || dest <= wrap_address(env, src + len - 1);
67     }
68     return dest > src && dest <= src + len - 1;
69 }
70 
71 /* Trigger a SPECIFICATION exception if an address or a length is not
72    naturally aligned.  */
73 static inline void check_alignment(CPUS390XState *env, uint64_t v,
74                                    int wordsize, uintptr_t ra)
75 {
76     if (v % wordsize) {
77         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
78     }
79 }
80 
81 /* Load a value from memory according to its size.  */
82 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
83                                            int wordsize, uintptr_t ra)
84 {
85     switch (wordsize) {
86     case 1:
87         return cpu_ldub_data_ra(env, addr, ra);
88     case 2:
89         return cpu_lduw_data_ra(env, addr, ra);
90     default:
91         abort();
92     }
93 }
94 
95 /* Store a to memory according to its size.  */
96 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
97                                       uint64_t value, int wordsize,
98                                       uintptr_t ra)
99 {
100     switch (wordsize) {
101     case 1:
102         cpu_stb_data_ra(env, addr, value, ra);
103         break;
104     case 2:
105         cpu_stw_data_ra(env, addr, value, ra);
106         break;
107     default:
108         abort();
109     }
110 }
111 
112 /* An access covers at most 4096 bytes and therefore at most two pages. */
113 typedef struct S390Access {
114     target_ulong vaddr1;
115     target_ulong vaddr2;
116     char *haddr1;
117     char *haddr2;
118     uint16_t size1;
119     uint16_t size2;
120     /*
121      * If we can't access the host page directly, we'll have to do I/O access
122      * via ld/st helpers. These are internal details, so we store the
123      * mmu idx to do the access here instead of passing it around in the
124      * helpers. Maybe, one day we can get rid of ld/st access - once we can
125      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
126      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
127      * pages, we might trigger a new MMU translation - very unlikely that
128      * the mapping changes in between and we would trigger a fault.
129      */
130     int mmu_idx;
131 } S390Access;
132 
133 /*
134  * With nonfault=1, return the PGM_ exception that would have been injected
135  * into the guest; return 0 if no exception was detected.
136  *
137  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
138  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
139  */
140 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
141                              MMUAccessType access_type, int mmu_idx,
142                              bool nonfault, void **phost, uintptr_t ra)
143 {
144 #if defined(CONFIG_USER_ONLY)
145     return probe_access_flags(env, addr, access_type, mmu_idx,
146                               nonfault, phost, ra);
147 #else
148     int flags;
149 
150     /*
151      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
152      * to detect if there was an exception during tlb_fill().
153      */
154     env->tlb_fill_exc = 0;
155     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
156                                ra);
157     if (env->tlb_fill_exc) {
158         return env->tlb_fill_exc;
159     }
160 
161     if (unlikely(flags & TLB_WATCHPOINT)) {
162         /* S390 does not presently use transaction attributes. */
163         cpu_check_watchpoint(env_cpu(env), addr, size,
164                              MEMTXATTRS_UNSPECIFIED,
165                              (access_type == MMU_DATA_STORE
166                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
167     }
168     return 0;
169 #endif
170 }
171 
172 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
173                              bool nonfault, vaddr vaddr1, int size,
174                              MMUAccessType access_type,
175                              int mmu_idx, uintptr_t ra)
176 {
177     void *haddr1, *haddr2 = NULL;
178     int size1, size2, exc;
179     vaddr vaddr2 = 0;
180 
181     assert(size > 0 && size <= 4096);
182 
183     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
184     size2 = size - size1;
185 
186     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
187                             &haddr1, ra);
188     if (exc) {
189         return exc;
190     }
191     if (unlikely(size2)) {
192         /* The access crosses page boundaries. */
193         vaddr2 = wrap_address(env, vaddr1 + size1);
194         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
195                                 nonfault, &haddr2, ra);
196         if (exc) {
197             return exc;
198         }
199     }
200 
201     *access = (S390Access) {
202         .vaddr1 = vaddr1,
203         .vaddr2 = vaddr2,
204         .haddr1 = haddr1,
205         .haddr2 = haddr2,
206         .size1 = size1,
207         .size2 = size2,
208         .mmu_idx = mmu_idx
209     };
210     return 0;
211 }
212 
213 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
214                                  MMUAccessType access_type, int mmu_idx,
215                                  uintptr_t ra)
216 {
217     S390Access ret;
218     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
219                                 access_type, mmu_idx, ra);
220     assert(!exc);
221     return ret;
222 }
223 
224 /* Helper to handle memset on a single page. */
225 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
226                              uint8_t byte, uint16_t size, int mmu_idx,
227                              uintptr_t ra)
228 {
229 #ifdef CONFIG_USER_ONLY
230     g_assert(haddr);
231     memset(haddr, byte, size);
232 #else
233     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
234     int i;
235 
236     if (likely(haddr)) {
237         memset(haddr, byte, size);
238     } else {
239         /*
240          * Do a single access and test if we can then get access to the
241          * page. This is especially relevant to speed up TLB_NOTDIRTY.
242          */
243         g_assert(size > 0);
244         cpu_stb_mmu(env, vaddr, byte, oi, ra);
245         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
246         if (likely(haddr)) {
247             memset(haddr + 1, byte, size - 1);
248         } else {
249             for (i = 1; i < size; i++) {
250                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
251             }
252         }
253     }
254 #endif
255 }
256 
257 static void access_memset(CPUS390XState *env, S390Access *desta,
258                           uint8_t byte, uintptr_t ra)
259 {
260 
261     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
262                      desta->mmu_idx, ra);
263     if (likely(!desta->size2)) {
264         return;
265     }
266     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
267                      desta->mmu_idx, ra);
268 }
269 
270 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
271                                   int offset, int mmu_idx, uintptr_t ra)
272 {
273 #ifdef CONFIG_USER_ONLY
274     return ldub_p(*haddr + offset);
275 #else
276     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
277     uint8_t byte;
278 
279     if (likely(*haddr)) {
280         return ldub_p(*haddr + offset);
281     }
282     /*
283      * Do a single access and test if we can then get access to the
284      * page. This is especially relevant to speed up TLB_NOTDIRTY.
285      */
286     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
287     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
288     return byte;
289 #endif
290 }
291 
292 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
293                                int offset, uintptr_t ra)
294 {
295     if (offset < access->size1) {
296         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
297                                   offset, access->mmu_idx, ra);
298     }
299     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
300                               offset - access->size1, access->mmu_idx, ra);
301 }
302 
303 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
304                                int offset, uint8_t byte, int mmu_idx,
305                                uintptr_t ra)
306 {
307 #ifdef CONFIG_USER_ONLY
308     stb_p(*haddr + offset, byte);
309 #else
310     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
311 
312     if (likely(*haddr)) {
313         stb_p(*haddr + offset, byte);
314         return;
315     }
316     /*
317      * Do a single access and test if we can then get access to the
318      * page. This is especially relevant to speed up TLB_NOTDIRTY.
319      */
320     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
321     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
322 #endif
323 }
324 
325 static void access_set_byte(CPUS390XState *env, S390Access *access,
326                             int offset, uint8_t byte, uintptr_t ra)
327 {
328     if (offset < access->size1) {
329         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
330                            access->mmu_idx, ra);
331     } else {
332         do_access_set_byte(env, access->vaddr2, &access->haddr2,
333                            offset - access->size1, byte, access->mmu_idx, ra);
334     }
335 }
336 
337 /*
338  * Move data with the same semantics as memmove() in case ranges don't overlap
339  * or src > dest. Undefined behavior on destructive overlaps.
340  */
341 static void access_memmove(CPUS390XState *env, S390Access *desta,
342                            S390Access *srca, uintptr_t ra)
343 {
344     int diff;
345 
346     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
347 
348     /* Fallback to slow access in case we don't have access to all host pages */
349     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
350                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
351         int i;
352 
353         for (i = 0; i < desta->size1 + desta->size2; i++) {
354             uint8_t byte = access_get_byte(env, srca, i, ra);
355 
356             access_set_byte(env, desta, i, byte, ra);
357         }
358         return;
359     }
360 
361     if (srca->size1 == desta->size1) {
362         memmove(desta->haddr1, srca->haddr1, srca->size1);
363         if (unlikely(srca->size2)) {
364             memmove(desta->haddr2, srca->haddr2, srca->size2);
365         }
366     } else if (srca->size1 < desta->size1) {
367         diff = desta->size1 - srca->size1;
368         memmove(desta->haddr1, srca->haddr1, srca->size1);
369         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
370         if (likely(desta->size2)) {
371             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
372         }
373     } else {
374         diff = srca->size1 - desta->size1;
375         memmove(desta->haddr1, srca->haddr1, desta->size1);
376         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
377         if (likely(srca->size2)) {
378             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
379         }
380     }
381 }
382 
383 static int mmu_idx_from_as(uint8_t as)
384 {
385     switch (as) {
386     case AS_PRIMARY:
387         return MMU_PRIMARY_IDX;
388     case AS_SECONDARY:
389         return MMU_SECONDARY_IDX;
390     case AS_HOME:
391         return MMU_HOME_IDX;
392     default:
393         /* FIXME AS_ACCREG */
394         g_assert_not_reached();
395     }
396 }
397 
398 /* and on array */
399 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
400                              uint64_t src, uintptr_t ra)
401 {
402     const int mmu_idx = cpu_mmu_index(env, false);
403     S390Access srca1, srca2, desta;
404     uint32_t i;
405     uint8_t c = 0;
406 
407     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
408                __func__, l, dest, src);
409 
410     /* NC always processes one more byte than specified - maximum is 256 */
411     l++;
412 
413     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
414     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
415     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
416     for (i = 0; i < l; i++) {
417         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
418                           access_get_byte(env, &srca2, i, ra);
419 
420         c |= x;
421         access_set_byte(env, &desta, i, x, ra);
422     }
423     return c != 0;
424 }
425 
426 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
427                     uint64_t src)
428 {
429     return do_helper_nc(env, l, dest, src, GETPC());
430 }
431 
432 /* xor on array */
433 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
434                              uint64_t src, uintptr_t ra)
435 {
436     const int mmu_idx = cpu_mmu_index(env, false);
437     S390Access srca1, srca2, desta;
438     uint32_t i;
439     uint8_t c = 0;
440 
441     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
442                __func__, l, dest, src);
443 
444     /* XC always processes one more byte than specified - maximum is 256 */
445     l++;
446 
447     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
448     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
449     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
450 
451     /* xor with itself is the same as memset(0) */
452     if (src == dest) {
453         access_memset(env, &desta, 0, ra);
454         return 0;
455     }
456 
457     for (i = 0; i < l; i++) {
458         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
459                           access_get_byte(env, &srca2, i, ra);
460 
461         c |= x;
462         access_set_byte(env, &desta, i, x, ra);
463     }
464     return c != 0;
465 }
466 
467 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
468                     uint64_t src)
469 {
470     return do_helper_xc(env, l, dest, src, GETPC());
471 }
472 
473 /* or on array */
474 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
475                              uint64_t src, uintptr_t ra)
476 {
477     const int mmu_idx = cpu_mmu_index(env, false);
478     S390Access srca1, srca2, desta;
479     uint32_t i;
480     uint8_t c = 0;
481 
482     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
483                __func__, l, dest, src);
484 
485     /* OC always processes one more byte than specified - maximum is 256 */
486     l++;
487 
488     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
489     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
490     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
491     for (i = 0; i < l; i++) {
492         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
493                           access_get_byte(env, &srca2, i, ra);
494 
495         c |= x;
496         access_set_byte(env, &desta, i, x, ra);
497     }
498     return c != 0;
499 }
500 
501 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
502                     uint64_t src)
503 {
504     return do_helper_oc(env, l, dest, src, GETPC());
505 }
506 
507 /* memmove */
508 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
509                               uint64_t src, uintptr_t ra)
510 {
511     const int mmu_idx = cpu_mmu_index(env, false);
512     S390Access srca, desta;
513     uint32_t i;
514 
515     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
516                __func__, l, dest, src);
517 
518     /* MVC always copies one more byte than specified - maximum is 256 */
519     l++;
520 
521     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
522     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
523 
524     /*
525      * "When the operands overlap, the result is obtained as if the operands
526      * were processed one byte at a time". Only non-destructive overlaps
527      * behave like memmove().
528      */
529     if (dest == src + 1) {
530         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
531     } else if (!is_destructive_overlap(env, dest, src, l)) {
532         access_memmove(env, &desta, &srca, ra);
533     } else {
534         for (i = 0; i < l; i++) {
535             uint8_t byte = access_get_byte(env, &srca, i, ra);
536 
537             access_set_byte(env, &desta, i, byte, ra);
538         }
539     }
540 
541     return env->cc_op;
542 }
543 
544 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
545 {
546     do_helper_mvc(env, l, dest, src, GETPC());
547 }
548 
549 /* move inverse  */
550 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
551 {
552     const int mmu_idx = cpu_mmu_index(env, false);
553     S390Access srca, desta;
554     uintptr_t ra = GETPC();
555     int i;
556 
557     /* MVCIN always copies one more byte than specified - maximum is 256 */
558     l++;
559 
560     src = wrap_address(env, src - l + 1);
561     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
562     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
563     for (i = 0; i < l; i++) {
564         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
565 
566         access_set_byte(env, &desta, i, x, ra);
567     }
568 }
569 
570 /* move numerics  */
571 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
572 {
573     const int mmu_idx = cpu_mmu_index(env, false);
574     S390Access srca1, srca2, desta;
575     uintptr_t ra = GETPC();
576     int i;
577 
578     /* MVN always copies one more byte than specified - maximum is 256 */
579     l++;
580 
581     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
582     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
583     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
584     for (i = 0; i < l; i++) {
585         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
586                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
587 
588         access_set_byte(env, &desta, i, x, ra);
589     }
590 }
591 
592 /* move with offset  */
593 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
594 {
595     const int mmu_idx = cpu_mmu_index(env, false);
596     /* MVO always processes one more byte than specified - maximum is 16 */
597     const int len_dest = (l >> 4) + 1;
598     const int len_src = (l & 0xf) + 1;
599     uintptr_t ra = GETPC();
600     uint8_t byte_dest, byte_src;
601     S390Access srca, desta;
602     int i, j;
603 
604     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
605     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
606 
607     /* Handle rightmost byte */
608     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
609     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
610     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
611     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
612 
613     /* Process remaining bytes from right to left */
614     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
615         byte_dest = byte_src >> 4;
616         if (j >= 0) {
617             byte_src = access_get_byte(env, &srca, j, ra);
618         } else {
619             byte_src = 0;
620         }
621         byte_dest |= byte_src << 4;
622         access_set_byte(env, &desta, i, byte_dest, ra);
623     }
624 }
625 
626 /* move zones  */
627 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
628 {
629     const int mmu_idx = cpu_mmu_index(env, false);
630     S390Access srca1, srca2, desta;
631     uintptr_t ra = GETPC();
632     int i;
633 
634     /* MVZ always copies one more byte than specified - maximum is 256 */
635     l++;
636 
637     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
638     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
639     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
640     for (i = 0; i < l; i++) {
641         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
642                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
643 
644         access_set_byte(env, &desta, i, x, ra);
645     }
646 }
647 
648 /* compare unsigned byte arrays */
649 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
650                               uint64_t s2, uintptr_t ra)
651 {
652     uint32_t i;
653     uint32_t cc = 0;
654 
655     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
656                __func__, l, s1, s2);
657 
658     for (i = 0; i <= l; i++) {
659         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
660         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
661         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
662         if (x < y) {
663             cc = 1;
664             break;
665         } else if (x > y) {
666             cc = 2;
667             break;
668         }
669     }
670 
671     HELPER_LOG("\n");
672     return cc;
673 }
674 
675 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
676 {
677     return do_helper_clc(env, l, s1, s2, GETPC());
678 }
679 
680 /* compare logical under mask */
681 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
682                      uint64_t addr)
683 {
684     uintptr_t ra = GETPC();
685     uint32_t cc = 0;
686 
687     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
688                mask, addr);
689 
690     while (mask) {
691         if (mask & 8) {
692             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
693             uint8_t r = extract32(r1, 24, 8);
694             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
695                        addr);
696             if (r < d) {
697                 cc = 1;
698                 break;
699             } else if (r > d) {
700                 cc = 2;
701                 break;
702             }
703             addr++;
704         }
705         mask = (mask << 1) & 0xf;
706         r1 <<= 8;
707     }
708 
709     HELPER_LOG("\n");
710     return cc;
711 }
712 
713 static inline uint64_t get_address(CPUS390XState *env, int reg)
714 {
715     return wrap_address(env, env->regs[reg]);
716 }
717 
718 /*
719  * Store the address to the given register, zeroing out unused leftmost
720  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
721  */
722 static inline void set_address_zero(CPUS390XState *env, int reg,
723                                     uint64_t address)
724 {
725     if (env->psw.mask & PSW_MASK_64) {
726         env->regs[reg] = address;
727     } else {
728         if (!(env->psw.mask & PSW_MASK_32)) {
729             address &= 0x00ffffff;
730         } else {
731             address &= 0x7fffffff;
732         }
733         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
734     }
735 }
736 
737 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
738 {
739     if (env->psw.mask & PSW_MASK_64) {
740         /* 64-Bit mode */
741         env->regs[reg] = address;
742     } else {
743         if (!(env->psw.mask & PSW_MASK_32)) {
744             /* 24-Bit mode. According to the PoO it is implementation
745             dependent if bits 32-39 remain unchanged or are set to
746             zeros.  Choose the former so that the function can also be
747             used for TRT.  */
748             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
749         } else {
750             /* 31-Bit mode. According to the PoO it is implementation
751             dependent if bit 32 remains unchanged or is set to zero.
752             Choose the latter so that the function can also be used for
753             TRT.  */
754             address &= 0x7fffffff;
755             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
756         }
757     }
758 }
759 
760 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
761 {
762     if (!(env->psw.mask & PSW_MASK_64)) {
763         return (uint32_t)length;
764     }
765     return length;
766 }
767 
768 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
769 {
770     if (!(env->psw.mask & PSW_MASK_64)) {
771         /* 24-Bit and 31-Bit mode */
772         length &= 0x7fffffff;
773     }
774     return length;
775 }
776 
777 static inline uint64_t get_length(CPUS390XState *env, int reg)
778 {
779     return wrap_length31(env, env->regs[reg]);
780 }
781 
782 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
783 {
784     if (env->psw.mask & PSW_MASK_64) {
785         /* 64-Bit mode */
786         env->regs[reg] = length;
787     } else {
788         /* 24-Bit and 31-Bit mode */
789         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
790     }
791 }
792 
793 /* search string (c is byte to search, r2 is string, r1 end of string) */
794 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
795 {
796     uintptr_t ra = GETPC();
797     uint64_t end, str;
798     uint32_t len;
799     uint8_t v, c = env->regs[0];
800 
801     /* Bits 32-55 must contain all 0.  */
802     if (env->regs[0] & 0xffffff00u) {
803         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
804     }
805 
806     str = get_address(env, r2);
807     end = get_address(env, r1);
808 
809     /* Lest we fail to service interrupts in a timely manner, limit the
810        amount of work we're willing to do.  For now, let's cap at 8k.  */
811     for (len = 0; len < 0x2000; ++len) {
812         if (str + len == end) {
813             /* Character not found.  R1 & R2 are unmodified.  */
814             env->cc_op = 2;
815             return;
816         }
817         v = cpu_ldub_data_ra(env, str + len, ra);
818         if (v == c) {
819             /* Character found.  Set R1 to the location; R2 is unmodified.  */
820             env->cc_op = 1;
821             set_address(env, r1, str + len);
822             return;
823         }
824     }
825 
826     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
827     env->cc_op = 3;
828     set_address(env, r2, str + len);
829 }
830 
831 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
832 {
833     uintptr_t ra = GETPC();
834     uint32_t len;
835     uint16_t v, c = env->regs[0];
836     uint64_t end, str, adj_end;
837 
838     /* Bits 32-47 of R0 must be zero.  */
839     if (env->regs[0] & 0xffff0000u) {
840         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
841     }
842 
843     str = get_address(env, r2);
844     end = get_address(env, r1);
845 
846     /* If the LSB of the two addresses differ, use one extra byte.  */
847     adj_end = end + ((str ^ end) & 1);
848 
849     /* Lest we fail to service interrupts in a timely manner, limit the
850        amount of work we're willing to do.  For now, let's cap at 8k.  */
851     for (len = 0; len < 0x2000; len += 2) {
852         if (str + len == adj_end) {
853             /* End of input found.  */
854             env->cc_op = 2;
855             return;
856         }
857         v = cpu_lduw_data_ra(env, str + len, ra);
858         if (v == c) {
859             /* Character found.  Set R1 to the location; R2 is unmodified.  */
860             env->cc_op = 1;
861             set_address(env, r1, str + len);
862             return;
863         }
864     }
865 
866     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
867     env->cc_op = 3;
868     set_address(env, r2, str + len);
869 }
870 
871 /* unsigned string compare (c is string terminator) */
872 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
873 {
874     uintptr_t ra = GETPC();
875     uint32_t len;
876 
877     c = c & 0xff;
878     s1 = wrap_address(env, s1);
879     s2 = wrap_address(env, s2);
880 
881     /* Lest we fail to service interrupts in a timely manner, limit the
882        amount of work we're willing to do.  For now, let's cap at 8k.  */
883     for (len = 0; len < 0x2000; ++len) {
884         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
885         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
886         if (v1 == v2) {
887             if (v1 == c) {
888                 /* Equal.  CC=0, and don't advance the registers.  */
889                 env->cc_op = 0;
890                 env->retxl = s2;
891                 return s1;
892             }
893         } else {
894             /* Unequal.  CC={1,2}, and advance the registers.  Note that
895                the terminator need not be zero, but the string that contains
896                the terminator is by definition "low".  */
897             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
898             env->retxl = s2 + len;
899             return s1 + len;
900         }
901     }
902 
903     /* CPU-determined bytes equal; advance the registers.  */
904     env->cc_op = 3;
905     env->retxl = s2 + len;
906     return s1 + len;
907 }
908 
909 /* move page */
910 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
911 {
912     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
913     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
914     const int mmu_idx = cpu_mmu_index(env, false);
915     const bool f = extract64(r0, 11, 1);
916     const bool s = extract64(r0, 10, 1);
917     const bool cco = extract64(r0, 8, 1);
918     uintptr_t ra = GETPC();
919     S390Access srca, desta;
920     int exc;
921 
922     if ((f && s) || extract64(r0, 12, 4)) {
923         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
924     }
925 
926     /*
927      * We always manually handle exceptions such that we can properly store
928      * r1/r2 to the lowcore on page-translation exceptions.
929      *
930      * TODO: Access key handling
931      */
932     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
933                             MMU_DATA_LOAD, mmu_idx, ra);
934     if (exc) {
935         if (cco) {
936             return 2;
937         }
938         goto inject_exc;
939     }
940     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
941                             MMU_DATA_STORE, mmu_idx, ra);
942     if (exc) {
943         if (cco && exc != PGM_PROTECTION) {
944             return 1;
945         }
946         goto inject_exc;
947     }
948     access_memmove(env, &desta, &srca, ra);
949     return 0; /* data moved */
950 inject_exc:
951 #if !defined(CONFIG_USER_ONLY)
952     if (exc != PGM_ADDRESSING) {
953         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
954                  env->tlb_fill_tec);
955     }
956     if (exc == PGM_PAGE_TRANS) {
957         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
958                  r1 << 4 | r2);
959     }
960 #endif
961     tcg_s390_program_interrupt(env, exc, ra);
962 }
963 
964 /* string copy */
965 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
966 {
967     const int mmu_idx = cpu_mmu_index(env, false);
968     const uint64_t d = get_address(env, r1);
969     const uint64_t s = get_address(env, r2);
970     const uint8_t c = env->regs[0];
971     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
972     S390Access srca, desta;
973     uintptr_t ra = GETPC();
974     int i;
975 
976     if (env->regs[0] & 0xffffff00ull) {
977         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
978     }
979 
980     /*
981      * Our access should not exceed single pages, as we must not report access
982      * exceptions exceeding the actually copied range (which we don't know at
983      * this point). We might over-indicate watchpoints within the pages
984      * (if we ever care, we have to limit processing to a single byte).
985      */
986     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
987     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
988     for (i = 0; i < len; i++) {
989         const uint8_t v = access_get_byte(env, &srca, i, ra);
990 
991         access_set_byte(env, &desta, i, v, ra);
992         if (v == c) {
993             set_address_zero(env, r1, d + i);
994             return 1;
995         }
996     }
997     set_address_zero(env, r1, d + len);
998     set_address_zero(env, r2, s + len);
999     return 3;
1000 }
1001 
1002 /* load access registers r1 to r3 from memory at a2 */
1003 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1004 {
1005     uintptr_t ra = GETPC();
1006     int i;
1007 
1008     if (a2 & 0x3) {
1009         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1010     }
1011 
1012     for (i = r1;; i = (i + 1) % 16) {
1013         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1014         a2 += 4;
1015 
1016         if (i == r3) {
1017             break;
1018         }
1019     }
1020 }
1021 
1022 /* store access registers r1 to r3 in memory at a2 */
1023 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1024 {
1025     uintptr_t ra = GETPC();
1026     int i;
1027 
1028     if (a2 & 0x3) {
1029         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1030     }
1031 
1032     for (i = r1;; i = (i + 1) % 16) {
1033         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1034         a2 += 4;
1035 
1036         if (i == r3) {
1037             break;
1038         }
1039     }
1040 }
1041 
1042 /* move long helper */
1043 static inline uint32_t do_mvcl(CPUS390XState *env,
1044                                uint64_t *dest, uint64_t *destlen,
1045                                uint64_t *src, uint64_t *srclen,
1046                                uint16_t pad, int wordsize, uintptr_t ra)
1047 {
1048     const int mmu_idx = cpu_mmu_index(env, false);
1049     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1050     S390Access srca, desta;
1051     int i, cc;
1052 
1053     if (*destlen == *srclen) {
1054         cc = 0;
1055     } else if (*destlen < *srclen) {
1056         cc = 1;
1057     } else {
1058         cc = 2;
1059     }
1060 
1061     if (!*destlen) {
1062         return cc;
1063     }
1064 
1065     /*
1066      * Only perform one type of type of operation (move/pad) at a time.
1067      * Stay within single pages.
1068      */
1069     if (*srclen) {
1070         /* Copy the src array */
1071         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1072         *destlen -= len;
1073         *srclen -= len;
1074         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1075         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1076         access_memmove(env, &desta, &srca, ra);
1077         *src = wrap_address(env, *src + len);
1078         *dest = wrap_address(env, *dest + len);
1079     } else if (wordsize == 1) {
1080         /* Pad the remaining area */
1081         *destlen -= len;
1082         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1083         access_memset(env, &desta, pad, ra);
1084         *dest = wrap_address(env, *dest + len);
1085     } else {
1086         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1087 
1088         /* The remaining length selects the padding byte. */
1089         for (i = 0; i < len; (*destlen)--, i++) {
1090             if (*destlen & 1) {
1091                 access_set_byte(env, &desta, i, pad, ra);
1092             } else {
1093                 access_set_byte(env, &desta, i, pad >> 8, ra);
1094             }
1095         }
1096         *dest = wrap_address(env, *dest + len);
1097     }
1098 
1099     return *destlen ? 3 : cc;
1100 }
1101 
1102 /* move long */
1103 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1104 {
1105     const int mmu_idx = cpu_mmu_index(env, false);
1106     uintptr_t ra = GETPC();
1107     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1108     uint64_t dest = get_address(env, r1);
1109     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1110     uint64_t src = get_address(env, r2);
1111     uint8_t pad = env->regs[r2 + 1] >> 24;
1112     CPUState *cs = env_cpu(env);
1113     S390Access srca, desta;
1114     uint32_t cc, cur_len;
1115 
1116     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1117         cc = 3;
1118     } else if (srclen == destlen) {
1119         cc = 0;
1120     } else if (destlen < srclen) {
1121         cc = 1;
1122     } else {
1123         cc = 2;
1124     }
1125 
1126     /* We might have to zero-out some bits even if there was no action. */
1127     if (unlikely(!destlen || cc == 3)) {
1128         set_address_zero(env, r2, src);
1129         set_address_zero(env, r1, dest);
1130         return cc;
1131     } else if (!srclen) {
1132         set_address_zero(env, r2, src);
1133     }
1134 
1135     /*
1136      * Only perform one type of type of operation (move/pad) in one step.
1137      * Stay within single pages.
1138      */
1139     while (destlen) {
1140         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1141         if (!srclen) {
1142             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1143                                    ra);
1144             access_memset(env, &desta, pad, ra);
1145         } else {
1146             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1147 
1148             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1149                                   ra);
1150             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                    ra);
1152             access_memmove(env, &desta, &srca, ra);
1153             src = wrap_address(env, src + cur_len);
1154             srclen -= cur_len;
1155             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1156             set_address_zero(env, r2, src);
1157         }
1158         dest = wrap_address(env, dest + cur_len);
1159         destlen -= cur_len;
1160         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1161         set_address_zero(env, r1, dest);
1162 
1163         /*
1164          * MVCL is interruptible. Return to the main loop if requested after
1165          * writing back all state to registers. If no interrupt will get
1166          * injected, we'll end up back in this handler and continue processing
1167          * the remaining parts.
1168          */
1169         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1170             cpu_loop_exit_restore(cs, ra);
1171         }
1172     }
1173     return cc;
1174 }
1175 
1176 /* move long extended */
1177 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1178                        uint32_t r3)
1179 {
1180     uintptr_t ra = GETPC();
1181     uint64_t destlen = get_length(env, r1 + 1);
1182     uint64_t dest = get_address(env, r1);
1183     uint64_t srclen = get_length(env, r3 + 1);
1184     uint64_t src = get_address(env, r3);
1185     uint8_t pad = a2;
1186     uint32_t cc;
1187 
1188     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1189 
1190     set_length(env, r1 + 1, destlen);
1191     set_length(env, r3 + 1, srclen);
1192     set_address(env, r1, dest);
1193     set_address(env, r3, src);
1194 
1195     return cc;
1196 }
1197 
1198 /* move long unicode */
1199 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1200                        uint32_t r3)
1201 {
1202     uintptr_t ra = GETPC();
1203     uint64_t destlen = get_length(env, r1 + 1);
1204     uint64_t dest = get_address(env, r1);
1205     uint64_t srclen = get_length(env, r3 + 1);
1206     uint64_t src = get_address(env, r3);
1207     uint16_t pad = a2;
1208     uint32_t cc;
1209 
1210     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1211 
1212     set_length(env, r1 + 1, destlen);
1213     set_length(env, r3 + 1, srclen);
1214     set_address(env, r1, dest);
1215     set_address(env, r3, src);
1216 
1217     return cc;
1218 }
1219 
1220 /* compare logical long helper */
1221 static inline uint32_t do_clcl(CPUS390XState *env,
1222                                uint64_t *src1, uint64_t *src1len,
1223                                uint64_t *src3, uint64_t *src3len,
1224                                uint16_t pad, uint64_t limit,
1225                                int wordsize, uintptr_t ra)
1226 {
1227     uint64_t len = MAX(*src1len, *src3len);
1228     uint32_t cc = 0;
1229 
1230     check_alignment(env, *src1len | *src3len, wordsize, ra);
1231 
1232     if (!len) {
1233         return cc;
1234     }
1235 
1236     /* Lest we fail to service interrupts in a timely manner, limit the
1237        amount of work we're willing to do.  */
1238     if (len > limit) {
1239         len = limit;
1240         cc = 3;
1241     }
1242 
1243     for (; len; len -= wordsize) {
1244         uint16_t v1 = pad;
1245         uint16_t v3 = pad;
1246 
1247         if (*src1len) {
1248             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1249         }
1250         if (*src3len) {
1251             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1252         }
1253 
1254         if (v1 != v3) {
1255             cc = (v1 < v3) ? 1 : 2;
1256             break;
1257         }
1258 
1259         if (*src1len) {
1260             *src1 += wordsize;
1261             *src1len -= wordsize;
1262         }
1263         if (*src3len) {
1264             *src3 += wordsize;
1265             *src3len -= wordsize;
1266         }
1267     }
1268 
1269     return cc;
1270 }
1271 
1272 
1273 /* compare logical long */
1274 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1275 {
1276     uintptr_t ra = GETPC();
1277     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1278     uint64_t src1 = get_address(env, r1);
1279     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1280     uint64_t src3 = get_address(env, r2);
1281     uint8_t pad = env->regs[r2 + 1] >> 24;
1282     uint32_t cc;
1283 
1284     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1285 
1286     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1287     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1288     set_address(env, r1, src1);
1289     set_address(env, r2, src3);
1290 
1291     return cc;
1292 }
1293 
1294 /* compare logical long extended memcompare insn with padding */
1295 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1296                        uint32_t r3)
1297 {
1298     uintptr_t ra = GETPC();
1299     uint64_t src1len = get_length(env, r1 + 1);
1300     uint64_t src1 = get_address(env, r1);
1301     uint64_t src3len = get_length(env, r3 + 1);
1302     uint64_t src3 = get_address(env, r3);
1303     uint8_t pad = a2;
1304     uint32_t cc;
1305 
1306     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1307 
1308     set_length(env, r1 + 1, src1len);
1309     set_length(env, r3 + 1, src3len);
1310     set_address(env, r1, src1);
1311     set_address(env, r3, src3);
1312 
1313     return cc;
1314 }
1315 
1316 /* compare logical long unicode memcompare insn with padding */
1317 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1318                        uint32_t r3)
1319 {
1320     uintptr_t ra = GETPC();
1321     uint64_t src1len = get_length(env, r1 + 1);
1322     uint64_t src1 = get_address(env, r1);
1323     uint64_t src3len = get_length(env, r3 + 1);
1324     uint64_t src3 = get_address(env, r3);
1325     uint16_t pad = a2;
1326     uint32_t cc = 0;
1327 
1328     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1329 
1330     set_length(env, r1 + 1, src1len);
1331     set_length(env, r3 + 1, src3len);
1332     set_address(env, r1, src1);
1333     set_address(env, r3, src3);
1334 
1335     return cc;
1336 }
1337 
1338 /* checksum */
1339 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1340                       uint64_t src, uint64_t src_len)
1341 {
1342     uintptr_t ra = GETPC();
1343     uint64_t max_len, len;
1344     uint64_t cksm = (uint32_t)r1;
1345 
1346     /* Lest we fail to service interrupts in a timely manner, limit the
1347        amount of work we're willing to do.  For now, let's cap at 8k.  */
1348     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1349 
1350     /* Process full words as available.  */
1351     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1352         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1353     }
1354 
1355     switch (max_len - len) {
1356     case 1:
1357         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1358         len += 1;
1359         break;
1360     case 2:
1361         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1362         len += 2;
1363         break;
1364     case 3:
1365         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1366         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1367         len += 3;
1368         break;
1369     }
1370 
1371     /* Fold the carry from the checksum.  Note that we can see carry-out
1372        during folding more than once (but probably not more than twice).  */
1373     while (cksm > 0xffffffffull) {
1374         cksm = (uint32_t)cksm + (cksm >> 32);
1375     }
1376 
1377     /* Indicate whether or not we've processed everything.  */
1378     env->cc_op = (len == src_len ? 0 : 3);
1379 
1380     /* Return both cksm and processed length.  */
1381     env->retxl = cksm;
1382     return len;
1383 }
1384 
1385 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1386 {
1387     uintptr_t ra = GETPC();
1388     int len_dest = len >> 4;
1389     int len_src = len & 0xf;
1390     uint8_t b;
1391 
1392     dest += len_dest;
1393     src += len_src;
1394 
1395     /* last byte is special, it only flips the nibbles */
1396     b = cpu_ldub_data_ra(env, src, ra);
1397     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1398     src--;
1399     len_src--;
1400 
1401     /* now pack every value */
1402     while (len_dest > 0) {
1403         b = 0;
1404 
1405         if (len_src >= 0) {
1406             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1407             src--;
1408             len_src--;
1409         }
1410         if (len_src >= 0) {
1411             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1412             src--;
1413             len_src--;
1414         }
1415 
1416         len_dest--;
1417         dest--;
1418         cpu_stb_data_ra(env, dest, b, ra);
1419     }
1420 }
1421 
1422 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1423                            uint32_t srclen, int ssize, uintptr_t ra)
1424 {
1425     int i;
1426     /* The destination operand is always 16 bytes long.  */
1427     const int destlen = 16;
1428 
1429     /* The operands are processed from right to left.  */
1430     src += srclen - 1;
1431     dest += destlen - 1;
1432 
1433     for (i = 0; i < destlen; i++) {
1434         uint8_t b = 0;
1435 
1436         /* Start with a positive sign */
1437         if (i == 0) {
1438             b = 0xc;
1439         } else if (srclen > ssize) {
1440             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1441             src -= ssize;
1442             srclen -= ssize;
1443         }
1444 
1445         if (srclen > ssize) {
1446             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1447             src -= ssize;
1448             srclen -= ssize;
1449         }
1450 
1451         cpu_stb_data_ra(env, dest, b, ra);
1452         dest--;
1453     }
1454 }
1455 
1456 
1457 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1458                  uint32_t srclen)
1459 {
1460     do_pkau(env, dest, src, srclen, 1, GETPC());
1461 }
1462 
1463 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1464                  uint32_t srclen)
1465 {
1466     do_pkau(env, dest, src, srclen, 2, GETPC());
1467 }
1468 
1469 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1470                   uint64_t src)
1471 {
1472     uintptr_t ra = GETPC();
1473     int len_dest = len >> 4;
1474     int len_src = len & 0xf;
1475     uint8_t b;
1476     int second_nibble = 0;
1477 
1478     dest += len_dest;
1479     src += len_src;
1480 
1481     /* last byte is special, it only flips the nibbles */
1482     b = cpu_ldub_data_ra(env, src, ra);
1483     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1484     src--;
1485     len_src--;
1486 
1487     /* now pad every nibble with 0xf0 */
1488 
1489     while (len_dest > 0) {
1490         uint8_t cur_byte = 0;
1491 
1492         if (len_src > 0) {
1493             cur_byte = cpu_ldub_data_ra(env, src, ra);
1494         }
1495 
1496         len_dest--;
1497         dest--;
1498 
1499         /* only advance one nibble at a time */
1500         if (second_nibble) {
1501             cur_byte >>= 4;
1502             len_src--;
1503             src--;
1504         }
1505         second_nibble = !second_nibble;
1506 
1507         /* digit */
1508         cur_byte = (cur_byte & 0xf);
1509         /* zone bits */
1510         cur_byte |= 0xf0;
1511 
1512         cpu_stb_data_ra(env, dest, cur_byte, ra);
1513     }
1514 }
1515 
1516 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1517                                  uint32_t destlen, int dsize, uint64_t src,
1518                                  uintptr_t ra)
1519 {
1520     int i;
1521     uint32_t cc;
1522     uint8_t b;
1523     /* The source operand is always 16 bytes long.  */
1524     const int srclen = 16;
1525 
1526     /* The operands are processed from right to left.  */
1527     src += srclen - 1;
1528     dest += destlen - dsize;
1529 
1530     /* Check for the sign.  */
1531     b = cpu_ldub_data_ra(env, src, ra);
1532     src--;
1533     switch (b & 0xf) {
1534     case 0xa:
1535     case 0xc:
1536     case 0xe ... 0xf:
1537         cc = 0;  /* plus */
1538         break;
1539     case 0xb:
1540     case 0xd:
1541         cc = 1;  /* minus */
1542         break;
1543     default:
1544     case 0x0 ... 0x9:
1545         cc = 3;  /* invalid */
1546         break;
1547     }
1548 
1549     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1550     for (i = 0; i < destlen; i += dsize) {
1551         if (i == (31 * dsize)) {
1552             /* If length is 32/64 bytes, the leftmost byte is 0. */
1553             b = 0;
1554         } else if (i % (2 * dsize)) {
1555             b = cpu_ldub_data_ra(env, src, ra);
1556             src--;
1557         } else {
1558             b >>= 4;
1559         }
1560         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1561         dest -= dsize;
1562     }
1563 
1564     return cc;
1565 }
1566 
1567 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1568                        uint64_t src)
1569 {
1570     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1571 }
1572 
1573 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1574                        uint64_t src)
1575 {
1576     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1577 }
1578 
1579 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1580 {
1581     uintptr_t ra = GETPC();
1582     uint32_t cc = 0;
1583     int i;
1584 
1585     for (i = 0; i < destlen; i++) {
1586         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1587         /* digit */
1588         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1589 
1590         if (i == (destlen - 1)) {
1591             /* sign */
1592             cc |= (b & 0xf) < 0xa ? 1 : 0;
1593         } else {
1594             /* digit */
1595             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1596         }
1597     }
1598 
1599     return cc;
1600 }
1601 
1602 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1603                              uint64_t trans, uintptr_t ra)
1604 {
1605     uint32_t i;
1606 
1607     for (i = 0; i <= len; i++) {
1608         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1609         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1610         cpu_stb_data_ra(env, array + i, new_byte, ra);
1611     }
1612 
1613     return env->cc_op;
1614 }
1615 
1616 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1617                 uint64_t trans)
1618 {
1619     do_helper_tr(env, len, array, trans, GETPC());
1620 }
1621 
1622 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1623                      uint64_t len, uint64_t trans)
1624 {
1625     uintptr_t ra = GETPC();
1626     uint8_t end = env->regs[0] & 0xff;
1627     uint64_t l = len;
1628     uint64_t i;
1629     uint32_t cc = 0;
1630 
1631     if (!(env->psw.mask & PSW_MASK_64)) {
1632         array &= 0x7fffffff;
1633         l = (uint32_t)l;
1634     }
1635 
1636     /* Lest we fail to service interrupts in a timely manner, limit the
1637        amount of work we're willing to do.  For now, let's cap at 8k.  */
1638     if (l > 0x2000) {
1639         l = 0x2000;
1640         cc = 3;
1641     }
1642 
1643     for (i = 0; i < l; i++) {
1644         uint8_t byte, new_byte;
1645 
1646         byte = cpu_ldub_data_ra(env, array + i, ra);
1647 
1648         if (byte == end) {
1649             cc = 1;
1650             break;
1651         }
1652 
1653         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1654         cpu_stb_data_ra(env, array + i, new_byte, ra);
1655     }
1656 
1657     env->cc_op = cc;
1658     env->retxl = len - i;
1659     return array + i;
1660 }
1661 
1662 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1663                                      uint64_t array, uint64_t trans,
1664                                      int inc, uintptr_t ra)
1665 {
1666     int i;
1667 
1668     for (i = 0; i <= len; i++) {
1669         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1670         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1671 
1672         if (sbyte != 0) {
1673             set_address(env, 1, array + i * inc);
1674             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1675             return (i == len) ? 2 : 1;
1676         }
1677     }
1678 
1679     return 0;
1680 }
1681 
1682 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1683                                   uint64_t array, uint64_t trans,
1684                                   uintptr_t ra)
1685 {
1686     return do_helper_trt(env, len, array, trans, 1, ra);
1687 }
1688 
1689 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1690                      uint64_t trans)
1691 {
1692     return do_helper_trt(env, len, array, trans, 1, GETPC());
1693 }
1694 
1695 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1696                                    uint64_t array, uint64_t trans,
1697                                    uintptr_t ra)
1698 {
1699     return do_helper_trt(env, len, array, trans, -1, ra);
1700 }
1701 
1702 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1703                       uint64_t trans)
1704 {
1705     return do_helper_trt(env, len, array, trans, -1, GETPC());
1706 }
1707 
1708 /* Translate one/two to one/two */
1709 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1710                       uint32_t tst, uint32_t sizes)
1711 {
1712     uintptr_t ra = GETPC();
1713     int dsize = (sizes & 1) ? 1 : 2;
1714     int ssize = (sizes & 2) ? 1 : 2;
1715     uint64_t tbl = get_address(env, 1);
1716     uint64_t dst = get_address(env, r1);
1717     uint64_t len = get_length(env, r1 + 1);
1718     uint64_t src = get_address(env, r2);
1719     uint32_t cc = 3;
1720     int i;
1721 
1722     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1723        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1724        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1725     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1726         tbl &= -4096;
1727     } else {
1728         tbl &= -8;
1729     }
1730 
1731     check_alignment(env, len, ssize, ra);
1732 
1733     /* Lest we fail to service interrupts in a timely manner, */
1734     /* limit the amount of work we're willing to do.   */
1735     for (i = 0; i < 0x2000; i++) {
1736         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1737         uint64_t tble = tbl + (sval * dsize);
1738         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1739         if (dval == tst) {
1740             cc = 1;
1741             break;
1742         }
1743         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1744 
1745         len -= ssize;
1746         src += ssize;
1747         dst += dsize;
1748 
1749         if (len == 0) {
1750             cc = 0;
1751             break;
1752         }
1753     }
1754 
1755     set_address(env, r1, dst);
1756     set_length(env, r1 + 1, len);
1757     set_address(env, r2, src);
1758 
1759     return cc;
1760 }
1761 
1762 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1763                   uint32_t r1, uint32_t r3)
1764 {
1765     uintptr_t ra = GETPC();
1766     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1767     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1768     Int128 oldv;
1769     uint64_t oldh, oldl;
1770     bool fail;
1771 
1772     check_alignment(env, addr, 16, ra);
1773 
1774     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1775     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1776 
1777     oldv = int128_make128(oldl, oldh);
1778     fail = !int128_eq(oldv, cmpv);
1779     if (fail) {
1780         newv = oldv;
1781     }
1782 
1783     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1784     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1785 
1786     env->cc_op = fail;
1787     env->regs[r1] = int128_gethi(oldv);
1788     env->regs[r1 + 1] = int128_getlo(oldv);
1789 }
1790 
1791 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1792                            uint32_t r1, uint32_t r3)
1793 {
1794     uintptr_t ra = GETPC();
1795     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1796     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1797     int mem_idx;
1798     MemOpIdx oi;
1799     Int128 oldv;
1800     bool fail;
1801 
1802     assert(HAVE_CMPXCHG128);
1803 
1804     mem_idx = cpu_mmu_index(env, false);
1805     oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1806     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1807     fail = !int128_eq(oldv, cmpv);
1808 
1809     env->cc_op = fail;
1810     env->regs[r1] = int128_gethi(oldv);
1811     env->regs[r1 + 1] = int128_getlo(oldv);
1812 }
1813 
1814 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1815                         uint64_t a2, bool parallel)
1816 {
1817     uint32_t mem_idx = cpu_mmu_index(env, false);
1818     uintptr_t ra = GETPC();
1819     uint32_t fc = extract32(env->regs[0], 0, 8);
1820     uint32_t sc = extract32(env->regs[0], 8, 8);
1821     uint64_t pl = get_address(env, 1) & -16;
1822     uint64_t svh, svl;
1823     uint32_t cc;
1824 
1825     /* Sanity check the function code and storage characteristic.  */
1826     if (fc > 1 || sc > 3) {
1827         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1828             goto spec_exception;
1829         }
1830         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1831             goto spec_exception;
1832         }
1833     }
1834 
1835     /* Sanity check the alignments.  */
1836     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1837         goto spec_exception;
1838     }
1839 
1840     /* Sanity check writability of the store address.  */
1841     probe_write(env, a2, 1 << sc, mem_idx, ra);
1842 
1843     /*
1844      * Note that the compare-and-swap is atomic, and the store is atomic,
1845      * but the complete operation is not.  Therefore we do not need to
1846      * assert serial context in order to implement this.  That said,
1847      * restart early if we can't support either operation that is supposed
1848      * to be atomic.
1849      */
1850     if (parallel) {
1851         uint32_t max = 2;
1852 #ifdef CONFIG_ATOMIC64
1853         max = 3;
1854 #endif
1855         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1856             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1857             cpu_loop_exit_atomic(env_cpu(env), ra);
1858         }
1859     }
1860 
1861     /* All loads happen before all stores.  For simplicity, load the entire
1862        store value area from the parameter list.  */
1863     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1864     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1865 
1866     switch (fc) {
1867     case 0:
1868         {
1869             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1870             uint32_t cv = env->regs[r3];
1871             uint32_t ov;
1872 
1873             if (parallel) {
1874 #ifdef CONFIG_USER_ONLY
1875                 uint32_t *haddr = g2h(env_cpu(env), a1);
1876                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1877 #else
1878                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1879                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1880 #endif
1881             } else {
1882                 ov = cpu_ldl_data_ra(env, a1, ra);
1883                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1884             }
1885             cc = (ov != cv);
1886             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1887         }
1888         break;
1889 
1890     case 1:
1891         {
1892             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1893             uint64_t cv = env->regs[r3];
1894             uint64_t ov;
1895 
1896             if (parallel) {
1897 #ifdef CONFIG_ATOMIC64
1898                 MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1899                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1900 #else
1901                 /* Note that we asserted !parallel above.  */
1902                 g_assert_not_reached();
1903 #endif
1904             } else {
1905                 ov = cpu_ldq_data_ra(env, a1, ra);
1906                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1907             }
1908             cc = (ov != cv);
1909             env->regs[r3] = ov;
1910         }
1911         break;
1912 
1913     case 2:
1914         {
1915             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1916             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1917             Int128 nv = int128_make128(nvl, nvh);
1918             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1919             Int128 ov;
1920 
1921             if (!parallel) {
1922                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1923                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1924 
1925                 ov = int128_make128(ol, oh);
1926                 cc = !int128_eq(ov, cv);
1927                 if (cc) {
1928                     nv = ov;
1929                 }
1930 
1931                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1932                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1933             } else if (HAVE_CMPXCHG128) {
1934                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1935                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1936                 cc = !int128_eq(ov, cv);
1937             } else {
1938                 /* Note that we asserted !parallel above.  */
1939                 g_assert_not_reached();
1940             }
1941 
1942             env->regs[r3 + 0] = int128_gethi(ov);
1943             env->regs[r3 + 1] = int128_getlo(ov);
1944         }
1945         break;
1946 
1947     default:
1948         g_assert_not_reached();
1949     }
1950 
1951     /* Store only if the comparison succeeded.  Note that above we use a pair
1952        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1953        from the most-significant bits of svh.  */
1954     if (cc == 0) {
1955         switch (sc) {
1956         case 0:
1957             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1958             break;
1959         case 1:
1960             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1961             break;
1962         case 2:
1963             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1964             break;
1965         case 3:
1966             cpu_stq_data_ra(env, a2, svh, ra);
1967             break;
1968         case 4:
1969             if (!parallel) {
1970                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1971                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1972             } else if (HAVE_ATOMIC128) {
1973                 MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1974                 Int128 sv = int128_make128(svl, svh);
1975                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1976             } else {
1977                 /* Note that we asserted !parallel above.  */
1978                 g_assert_not_reached();
1979             }
1980             break;
1981         default:
1982             g_assert_not_reached();
1983         }
1984     }
1985 
1986     return cc;
1987 
1988  spec_exception:
1989     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1990 }
1991 
1992 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1993 {
1994     return do_csst(env, r3, a1, a2, false);
1995 }
1996 
1997 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1998                                uint64_t a2)
1999 {
2000     return do_csst(env, r3, a1, a2, true);
2001 }
2002 
2003 #if !defined(CONFIG_USER_ONLY)
2004 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2005 {
2006     uintptr_t ra = GETPC();
2007     bool PERchanged = false;
2008     uint64_t src = a2;
2009     uint32_t i;
2010 
2011     if (src & 0x7) {
2012         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2013     }
2014 
2015     for (i = r1;; i = (i + 1) % 16) {
2016         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2017         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2018             PERchanged = true;
2019         }
2020         env->cregs[i] = val;
2021         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2022                    i, src, val);
2023         src += sizeof(uint64_t);
2024 
2025         if (i == r3) {
2026             break;
2027         }
2028     }
2029 
2030     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2031         s390_cpu_recompute_watchpoints(env_cpu(env));
2032     }
2033 
2034     tlb_flush(env_cpu(env));
2035 }
2036 
2037 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2038 {
2039     uintptr_t ra = GETPC();
2040     bool PERchanged = false;
2041     uint64_t src = a2;
2042     uint32_t i;
2043 
2044     if (src & 0x3) {
2045         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2046     }
2047 
2048     for (i = r1;; i = (i + 1) % 16) {
2049         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2050         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2051             PERchanged = true;
2052         }
2053         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2054         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2055         src += sizeof(uint32_t);
2056 
2057         if (i == r3) {
2058             break;
2059         }
2060     }
2061 
2062     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2063         s390_cpu_recompute_watchpoints(env_cpu(env));
2064     }
2065 
2066     tlb_flush(env_cpu(env));
2067 }
2068 
2069 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2070 {
2071     uintptr_t ra = GETPC();
2072     uint64_t dest = a2;
2073     uint32_t i;
2074 
2075     if (dest & 0x7) {
2076         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2077     }
2078 
2079     for (i = r1;; i = (i + 1) % 16) {
2080         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2081         dest += sizeof(uint64_t);
2082 
2083         if (i == r3) {
2084             break;
2085         }
2086     }
2087 }
2088 
2089 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2090 {
2091     uintptr_t ra = GETPC();
2092     uint64_t dest = a2;
2093     uint32_t i;
2094 
2095     if (dest & 0x3) {
2096         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2097     }
2098 
2099     for (i = r1;; i = (i + 1) % 16) {
2100         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2101         dest += sizeof(uint32_t);
2102 
2103         if (i == r3) {
2104             break;
2105         }
2106     }
2107 }
2108 
2109 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2110 {
2111     uintptr_t ra = GETPC();
2112     int i;
2113 
2114     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2115 
2116     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2117         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2118     }
2119 
2120     return 0;
2121 }
2122 
2123 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2124 {
2125     S390CPU *cpu = env_archcpu(env);
2126     CPUState *cs = env_cpu(env);
2127 
2128     /*
2129      * TODO: we currently don't handle all access protection types
2130      * (including access-list and key-controlled) as well as AR mode.
2131      */
2132     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2133         /* Fetching permitted; storing permitted */
2134         return 0;
2135     }
2136 
2137     if (env->int_pgm_code == PGM_PROTECTION) {
2138         /* retry if reading is possible */
2139         cs->exception_index = -1;
2140         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2141             /* Fetching permitted; storing not permitted */
2142             return 1;
2143         }
2144     }
2145 
2146     switch (env->int_pgm_code) {
2147     case PGM_PROTECTION:
2148         /* Fetching not permitted; storing not permitted */
2149         cs->exception_index = -1;
2150         return 2;
2151     case PGM_ADDRESSING:
2152     case PGM_TRANS_SPEC:
2153         /* exceptions forwarded to the guest */
2154         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2155         return 0;
2156     }
2157 
2158     /* Translation not available */
2159     cs->exception_index = -1;
2160     return 3;
2161 }
2162 
2163 /* insert storage key extended */
2164 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2165 {
2166     static S390SKeysState *ss;
2167     static S390SKeysClass *skeyclass;
2168     uint64_t addr = wrap_address(env, r2);
2169     uint8_t key;
2170     int rc;
2171 
2172     addr = mmu_real2abs(env, addr);
2173     if (!mmu_absolute_addr_valid(addr, false)) {
2174         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2175     }
2176 
2177     if (unlikely(!ss)) {
2178         ss = s390_get_skeys_device();
2179         skeyclass = S390_SKEYS_GET_CLASS(ss);
2180         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2181             tlb_flush_all_cpus_synced(env_cpu(env));
2182         }
2183     }
2184 
2185     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2186     if (rc) {
2187         trace_get_skeys_nonzero(rc);
2188         return 0;
2189     }
2190     return key;
2191 }
2192 
2193 /* set storage key extended */
2194 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2195 {
2196     static S390SKeysState *ss;
2197     static S390SKeysClass *skeyclass;
2198     uint64_t addr = wrap_address(env, r2);
2199     uint8_t key;
2200     int rc;
2201 
2202     addr = mmu_real2abs(env, addr);
2203     if (!mmu_absolute_addr_valid(addr, false)) {
2204         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2205     }
2206 
2207     if (unlikely(!ss)) {
2208         ss = s390_get_skeys_device();
2209         skeyclass = S390_SKEYS_GET_CLASS(ss);
2210         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2211             tlb_flush_all_cpus_synced(env_cpu(env));
2212         }
2213     }
2214 
2215     key = r1 & 0xfe;
2216     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2217     if (rc) {
2218         trace_set_skeys_nonzero(rc);
2219     }
2220    /*
2221     * As we can only flush by virtual address and not all the entries
2222     * that point to a physical address we have to flush the whole TLB.
2223     */
2224     tlb_flush_all_cpus_synced(env_cpu(env));
2225 }
2226 
2227 /* reset reference bit extended */
2228 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2229 {
2230     uint64_t addr = wrap_address(env, r2);
2231     static S390SKeysState *ss;
2232     static S390SKeysClass *skeyclass;
2233     uint8_t re, key;
2234     int rc;
2235 
2236     addr = mmu_real2abs(env, addr);
2237     if (!mmu_absolute_addr_valid(addr, false)) {
2238         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2239     }
2240 
2241     if (unlikely(!ss)) {
2242         ss = s390_get_skeys_device();
2243         skeyclass = S390_SKEYS_GET_CLASS(ss);
2244         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2245             tlb_flush_all_cpus_synced(env_cpu(env));
2246         }
2247     }
2248 
2249     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2250     if (rc) {
2251         trace_get_skeys_nonzero(rc);
2252         return 0;
2253     }
2254 
2255     re = key & (SK_R | SK_C);
2256     key &= ~SK_R;
2257 
2258     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2259     if (rc) {
2260         trace_set_skeys_nonzero(rc);
2261         return 0;
2262     }
2263    /*
2264     * As we can only flush by virtual address and not all the entries
2265     * that point to a physical address we have to flush the whole TLB.
2266     */
2267     tlb_flush_all_cpus_synced(env_cpu(env));
2268 
2269     /*
2270      * cc
2271      *
2272      * 0  Reference bit zero; change bit zero
2273      * 1  Reference bit zero; change bit one
2274      * 2  Reference bit one; change bit zero
2275      * 3  Reference bit one; change bit one
2276      */
2277 
2278     return re >> 1;
2279 }
2280 
2281 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2282 {
2283     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2284     S390Access srca, desta;
2285     uintptr_t ra = GETPC();
2286     int cc = 0;
2287 
2288     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2289                __func__, l, a1, a2);
2290 
2291     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2292         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2293         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2294     }
2295 
2296     l = wrap_length32(env, l);
2297     if (l > 256) {
2298         /* max 256 */
2299         l = 256;
2300         cc = 3;
2301     } else if (!l) {
2302         return cc;
2303     }
2304 
2305     /* TODO: Access key handling */
2306     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2307     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2308     access_memmove(env, &desta, &srca, ra);
2309     return cc;
2310 }
2311 
2312 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2313 {
2314     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2315     S390Access srca, desta;
2316     uintptr_t ra = GETPC();
2317     int cc = 0;
2318 
2319     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2320                __func__, l, a1, a2);
2321 
2322     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2323         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2324         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2325     }
2326 
2327     l = wrap_length32(env, l);
2328     if (l > 256) {
2329         /* max 256 */
2330         l = 256;
2331         cc = 3;
2332     } else if (!l) {
2333         return cc;
2334     }
2335 
2336     /* TODO: Access key handling */
2337     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2338     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2339     access_memmove(env, &desta, &srca, ra);
2340     return cc;
2341 }
2342 
2343 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2344 {
2345     CPUState *cs = env_cpu(env);
2346     const uintptr_t ra = GETPC();
2347     uint64_t table, entry, raddr;
2348     uint16_t entries, i, index = 0;
2349 
2350     if (r2 & 0xff000) {
2351         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2352     }
2353 
2354     if (!(r2 & 0x800)) {
2355         /* invalidation-and-clearing operation */
2356         table = r1 & ASCE_ORIGIN;
2357         entries = (r2 & 0x7ff) + 1;
2358 
2359         switch (r1 & ASCE_TYPE_MASK) {
2360         case ASCE_TYPE_REGION1:
2361             index = (r2 >> 53) & 0x7ff;
2362             break;
2363         case ASCE_TYPE_REGION2:
2364             index = (r2 >> 42) & 0x7ff;
2365             break;
2366         case ASCE_TYPE_REGION3:
2367             index = (r2 >> 31) & 0x7ff;
2368             break;
2369         case ASCE_TYPE_SEGMENT:
2370             index = (r2 >> 20) & 0x7ff;
2371             break;
2372         }
2373         for (i = 0; i < entries; i++) {
2374             /* addresses are not wrapped in 24/31bit mode but table index is */
2375             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2376             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2377             if (!(entry & REGION_ENTRY_I)) {
2378                 /* we are allowed to not store if already invalid */
2379                 entry |= REGION_ENTRY_I;
2380                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2381             }
2382         }
2383     }
2384 
2385     /* We simply flush the complete tlb, therefore we can ignore r3. */
2386     if (m4 & 1) {
2387         tlb_flush(cs);
2388     } else {
2389         tlb_flush_all_cpus_synced(cs);
2390     }
2391 }
2392 
2393 /* invalidate pte */
2394 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2395                   uint32_t m4)
2396 {
2397     CPUState *cs = env_cpu(env);
2398     const uintptr_t ra = GETPC();
2399     uint64_t page = vaddr & TARGET_PAGE_MASK;
2400     uint64_t pte_addr, pte;
2401 
2402     /* Compute the page table entry address */
2403     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2404     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2405 
2406     /* Mark the page table entry as invalid */
2407     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2408     pte |= PAGE_ENTRY_I;
2409     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2410 
2411     /* XXX we exploit the fact that Linux passes the exact virtual
2412        address here - it's not obliged to! */
2413     if (m4 & 1) {
2414         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2415             tlb_flush_page(cs, page);
2416             /* XXX 31-bit hack */
2417             tlb_flush_page(cs, page ^ 0x80000000);
2418         } else {
2419             /* looks like we don't have a valid virtual address */
2420             tlb_flush(cs);
2421         }
2422     } else {
2423         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2424             tlb_flush_page_all_cpus_synced(cs, page);
2425             /* XXX 31-bit hack */
2426             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2427         } else {
2428             /* looks like we don't have a valid virtual address */
2429             tlb_flush_all_cpus_synced(cs);
2430         }
2431     }
2432 }
2433 
2434 /* flush local tlb */
2435 void HELPER(ptlb)(CPUS390XState *env)
2436 {
2437     tlb_flush(env_cpu(env));
2438 }
2439 
2440 /* flush global tlb */
2441 void HELPER(purge)(CPUS390XState *env)
2442 {
2443     tlb_flush_all_cpus_synced(env_cpu(env));
2444 }
2445 
2446 /* load real address */
2447 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2448 {
2449     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2450     uint64_t ret, tec;
2451     int flags, exc, cc;
2452 
2453     /* XXX incomplete - has more corner cases */
2454     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2455         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2456     }
2457 
2458     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2459     if (exc) {
2460         cc = 3;
2461         ret = exc | 0x80000000;
2462     } else {
2463         cc = 0;
2464         ret |= addr & ~TARGET_PAGE_MASK;
2465     }
2466 
2467     env->cc_op = cc;
2468     return ret;
2469 }
2470 #endif
2471 
2472 /* load pair from quadword */
2473 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2474 {
2475     uintptr_t ra = GETPC();
2476     uint64_t hi, lo;
2477 
2478     check_alignment(env, addr, 16, ra);
2479     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2480     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2481 
2482     env->retxl = lo;
2483     return hi;
2484 }
2485 
2486 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2487 {
2488     uintptr_t ra = GETPC();
2489     uint64_t hi, lo;
2490     int mem_idx;
2491     MemOpIdx oi;
2492     Int128 v;
2493 
2494     assert(HAVE_ATOMIC128);
2495 
2496     mem_idx = cpu_mmu_index(env, false);
2497     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2498     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2499     hi = int128_gethi(v);
2500     lo = int128_getlo(v);
2501 
2502     env->retxl = lo;
2503     return hi;
2504 }
2505 
2506 /* store pair to quadword */
2507 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2508                   uint64_t low, uint64_t high)
2509 {
2510     uintptr_t ra = GETPC();
2511 
2512     check_alignment(env, addr, 16, ra);
2513     cpu_stq_data_ra(env, addr + 0, high, ra);
2514     cpu_stq_data_ra(env, addr + 8, low, ra);
2515 }
2516 
2517 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2518                            uint64_t low, uint64_t high)
2519 {
2520     uintptr_t ra = GETPC();
2521     int mem_idx;
2522     MemOpIdx oi;
2523     Int128 v;
2524 
2525     assert(HAVE_ATOMIC128);
2526 
2527     mem_idx = cpu_mmu_index(env, false);
2528     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2529     v = int128_make128(low, high);
2530     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2531 }
2532 
2533 /* Execute instruction.  This instruction executes an insn modified with
2534    the contents of r1.  It does not change the executed instruction in memory;
2535    it does not change the program counter.
2536 
2537    Perform this by recording the modified instruction in env->ex_value.
2538    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2539 */
2540 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2541 {
2542     uint64_t insn = cpu_lduw_code(env, addr);
2543     uint8_t opc = insn >> 8;
2544 
2545     /* Or in the contents of R1[56:63].  */
2546     insn |= r1 & 0xff;
2547 
2548     /* Load the rest of the instruction.  */
2549     insn <<= 48;
2550     switch (get_ilen(opc)) {
2551     case 2:
2552         break;
2553     case 4:
2554         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2555         break;
2556     case 6:
2557         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2558         break;
2559     default:
2560         g_assert_not_reached();
2561     }
2562 
2563     /* The very most common cases can be sped up by avoiding a new TB.  */
2564     if ((opc & 0xf0) == 0xd0) {
2565         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2566                                       uint64_t, uintptr_t);
2567         static const dx_helper dx[16] = {
2568             [0x0] = do_helper_trt_bkwd,
2569             [0x2] = do_helper_mvc,
2570             [0x4] = do_helper_nc,
2571             [0x5] = do_helper_clc,
2572             [0x6] = do_helper_oc,
2573             [0x7] = do_helper_xc,
2574             [0xc] = do_helper_tr,
2575             [0xd] = do_helper_trt_fwd,
2576         };
2577         dx_helper helper = dx[opc & 0xf];
2578 
2579         if (helper) {
2580             uint32_t l = extract64(insn, 48, 8);
2581             uint32_t b1 = extract64(insn, 44, 4);
2582             uint32_t d1 = extract64(insn, 32, 12);
2583             uint32_t b2 = extract64(insn, 28, 4);
2584             uint32_t d2 = extract64(insn, 16, 12);
2585             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2586             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2587 
2588             env->cc_op = helper(env, l, a1, a2, 0);
2589             env->psw.addr += ilen;
2590             return;
2591         }
2592     } else if (opc == 0x0a) {
2593         env->int_svc_code = extract64(insn, 48, 8);
2594         env->int_svc_ilen = ilen;
2595         helper_exception(env, EXCP_SVC);
2596         g_assert_not_reached();
2597     }
2598 
2599     /* Record the insn we want to execute as well as the ilen to use
2600        during the execution of the target insn.  This will also ensure
2601        that ex_value is non-zero, which flags that we are in a state
2602        that requires such execution.  */
2603     env->ex_value = insn | ilen;
2604 }
2605 
2606 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2607                        uint64_t len)
2608 {
2609     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2610     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2611     const uint64_t r0 = env->regs[0];
2612     const uintptr_t ra = GETPC();
2613     uint8_t dest_key, dest_as, dest_k, dest_a;
2614     uint8_t src_key, src_as, src_k, src_a;
2615     uint64_t val;
2616     int cc = 0;
2617 
2618     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2619                __func__, dest, src, len);
2620 
2621     if (!(env->psw.mask & PSW_MASK_DAT)) {
2622         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2623     }
2624 
2625     /* OAC (operand access control) for the first operand -> dest */
2626     val = (r0 & 0xffff0000ULL) >> 16;
2627     dest_key = (val >> 12) & 0xf;
2628     dest_as = (val >> 6) & 0x3;
2629     dest_k = (val >> 1) & 0x1;
2630     dest_a = val & 0x1;
2631 
2632     /* OAC (operand access control) for the second operand -> src */
2633     val = (r0 & 0x0000ffffULL);
2634     src_key = (val >> 12) & 0xf;
2635     src_as = (val >> 6) & 0x3;
2636     src_k = (val >> 1) & 0x1;
2637     src_a = val & 0x1;
2638 
2639     if (!dest_k) {
2640         dest_key = psw_key;
2641     }
2642     if (!src_k) {
2643         src_key = psw_key;
2644     }
2645     if (!dest_a) {
2646         dest_as = psw_as;
2647     }
2648     if (!src_a) {
2649         src_as = psw_as;
2650     }
2651 
2652     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2653         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2654     }
2655     if (!(env->cregs[0] & CR0_SECONDARY) &&
2656         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2657         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2658     }
2659     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2660         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2661     }
2662 
2663     len = wrap_length32(env, len);
2664     if (len > 4096) {
2665         cc = 3;
2666         len = 4096;
2667     }
2668 
2669     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2670     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2671         (env->psw.mask & PSW_MASK_PSTATE)) {
2672         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2673                       __func__);
2674         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2675     }
2676 
2677     /* FIXME: Access using correct keys and AR-mode */
2678     if (len) {
2679         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2680                                          mmu_idx_from_as(src_as), ra);
2681         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2682                                           mmu_idx_from_as(dest_as), ra);
2683 
2684         access_memmove(env, &desta, &srca, ra);
2685     }
2686 
2687     return cc;
2688 }
2689 
2690 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2691    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2692    value >= 0 indicates failure, and the CC value to be returned.  */
2693 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2694                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2695                                  uint32_t *ochar, uint32_t *olen);
2696 
2697 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2698    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2699    indicates failure, and the CC value to be returned.  */
2700 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2701                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2702                                  uint32_t *olen);
2703 
2704 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2705                        bool enh_check, uintptr_t ra,
2706                        uint32_t *ochar, uint32_t *olen)
2707 {
2708     uint8_t s0, s1, s2, s3;
2709     uint32_t c, l;
2710 
2711     if (ilen < 1) {
2712         return 0;
2713     }
2714     s0 = cpu_ldub_data_ra(env, addr, ra);
2715     if (s0 <= 0x7f) {
2716         /* one byte character */
2717         l = 1;
2718         c = s0;
2719     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2720         /* invalid character */
2721         return 2;
2722     } else if (s0 <= 0xdf) {
2723         /* two byte character */
2724         l = 2;
2725         if (ilen < 2) {
2726             return 0;
2727         }
2728         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2729         c = s0 & 0x1f;
2730         c = (c << 6) | (s1 & 0x3f);
2731         if (enh_check && (s1 & 0xc0) != 0x80) {
2732             return 2;
2733         }
2734     } else if (s0 <= 0xef) {
2735         /* three byte character */
2736         l = 3;
2737         if (ilen < 3) {
2738             return 0;
2739         }
2740         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2741         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2742         c = s0 & 0x0f;
2743         c = (c << 6) | (s1 & 0x3f);
2744         c = (c << 6) | (s2 & 0x3f);
2745         /* Fold the byte-by-byte range descriptions in the PoO into
2746            tests against the complete value.  It disallows encodings
2747            that could be smaller, and the UTF-16 surrogates.  */
2748         if (enh_check
2749             && ((s1 & 0xc0) != 0x80
2750                 || (s2 & 0xc0) != 0x80
2751                 || c < 0x1000
2752                 || (c >= 0xd800 && c <= 0xdfff))) {
2753             return 2;
2754         }
2755     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2756         /* four byte character */
2757         l = 4;
2758         if (ilen < 4) {
2759             return 0;
2760         }
2761         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2762         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2763         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2764         c = s0 & 0x07;
2765         c = (c << 6) | (s1 & 0x3f);
2766         c = (c << 6) | (s2 & 0x3f);
2767         c = (c << 6) | (s3 & 0x3f);
2768         /* See above.  */
2769         if (enh_check
2770             && ((s1 & 0xc0) != 0x80
2771                 || (s2 & 0xc0) != 0x80
2772                 || (s3 & 0xc0) != 0x80
2773                 || c < 0x010000
2774                 || c > 0x10ffff)) {
2775             return 2;
2776         }
2777     } else {
2778         /* invalid character */
2779         return 2;
2780     }
2781 
2782     *ochar = c;
2783     *olen = l;
2784     return -1;
2785 }
2786 
2787 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2788                         bool enh_check, uintptr_t ra,
2789                         uint32_t *ochar, uint32_t *olen)
2790 {
2791     uint16_t s0, s1;
2792     uint32_t c, l;
2793 
2794     if (ilen < 2) {
2795         return 0;
2796     }
2797     s0 = cpu_lduw_data_ra(env, addr, ra);
2798     if ((s0 & 0xfc00) != 0xd800) {
2799         /* one word character */
2800         l = 2;
2801         c = s0;
2802     } else {
2803         /* two word character */
2804         l = 4;
2805         if (ilen < 4) {
2806             return 0;
2807         }
2808         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2809         c = extract32(s0, 6, 4) + 1;
2810         c = (c << 6) | (s0 & 0x3f);
2811         c = (c << 10) | (s1 & 0x3ff);
2812         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2813             /* invalid surrogate character */
2814             return 2;
2815         }
2816     }
2817 
2818     *ochar = c;
2819     *olen = l;
2820     return -1;
2821 }
2822 
2823 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2824                         bool enh_check, uintptr_t ra,
2825                         uint32_t *ochar, uint32_t *olen)
2826 {
2827     uint32_t c;
2828 
2829     if (ilen < 4) {
2830         return 0;
2831     }
2832     c = cpu_ldl_data_ra(env, addr, ra);
2833     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2834         /* invalid unicode character */
2835         return 2;
2836     }
2837 
2838     *ochar = c;
2839     *olen = 4;
2840     return -1;
2841 }
2842 
2843 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2844                        uintptr_t ra, uint32_t c, uint32_t *olen)
2845 {
2846     uint8_t d[4];
2847     uint32_t l, i;
2848 
2849     if (c <= 0x7f) {
2850         /* one byte character */
2851         l = 1;
2852         d[0] = c;
2853     } else if (c <= 0x7ff) {
2854         /* two byte character */
2855         l = 2;
2856         d[1] = 0x80 | extract32(c, 0, 6);
2857         d[0] = 0xc0 | extract32(c, 6, 5);
2858     } else if (c <= 0xffff) {
2859         /* three byte character */
2860         l = 3;
2861         d[2] = 0x80 | extract32(c, 0, 6);
2862         d[1] = 0x80 | extract32(c, 6, 6);
2863         d[0] = 0xe0 | extract32(c, 12, 4);
2864     } else {
2865         /* four byte character */
2866         l = 4;
2867         d[3] = 0x80 | extract32(c, 0, 6);
2868         d[2] = 0x80 | extract32(c, 6, 6);
2869         d[1] = 0x80 | extract32(c, 12, 6);
2870         d[0] = 0xf0 | extract32(c, 18, 3);
2871     }
2872 
2873     if (ilen < l) {
2874         return 1;
2875     }
2876     for (i = 0; i < l; ++i) {
2877         cpu_stb_data_ra(env, addr + i, d[i], ra);
2878     }
2879 
2880     *olen = l;
2881     return -1;
2882 }
2883 
2884 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2885                         uintptr_t ra, uint32_t c, uint32_t *olen)
2886 {
2887     uint16_t d0, d1;
2888 
2889     if (c <= 0xffff) {
2890         /* one word character */
2891         if (ilen < 2) {
2892             return 1;
2893         }
2894         cpu_stw_data_ra(env, addr, c, ra);
2895         *olen = 2;
2896     } else {
2897         /* two word character */
2898         if (ilen < 4) {
2899             return 1;
2900         }
2901         d1 = 0xdc00 | extract32(c, 0, 10);
2902         d0 = 0xd800 | extract32(c, 10, 6);
2903         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2904         cpu_stw_data_ra(env, addr + 0, d0, ra);
2905         cpu_stw_data_ra(env, addr + 2, d1, ra);
2906         *olen = 4;
2907     }
2908 
2909     return -1;
2910 }
2911 
2912 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2913                         uintptr_t ra, uint32_t c, uint32_t *olen)
2914 {
2915     if (ilen < 4) {
2916         return 1;
2917     }
2918     cpu_stl_data_ra(env, addr, c, ra);
2919     *olen = 4;
2920     return -1;
2921 }
2922 
2923 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2924                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2925                                        decode_unicode_fn decode,
2926                                        encode_unicode_fn encode)
2927 {
2928     uint64_t dst = get_address(env, r1);
2929     uint64_t dlen = get_length(env, r1 + 1);
2930     uint64_t src = get_address(env, r2);
2931     uint64_t slen = get_length(env, r2 + 1);
2932     bool enh_check = m3 & 1;
2933     int cc, i;
2934 
2935     /* Lest we fail to service interrupts in a timely manner, limit the
2936        amount of work we're willing to do.  For now, let's cap at 256.  */
2937     for (i = 0; i < 256; ++i) {
2938         uint32_t c, ilen, olen;
2939 
2940         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2941         if (unlikely(cc >= 0)) {
2942             break;
2943         }
2944         cc = encode(env, dst, dlen, ra, c, &olen);
2945         if (unlikely(cc >= 0)) {
2946             break;
2947         }
2948 
2949         src += ilen;
2950         slen -= ilen;
2951         dst += olen;
2952         dlen -= olen;
2953         cc = 3;
2954     }
2955 
2956     set_address(env, r1, dst);
2957     set_length(env, r1 + 1, dlen);
2958     set_address(env, r2, src);
2959     set_length(env, r2 + 1, slen);
2960 
2961     return cc;
2962 }
2963 
2964 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2965 {
2966     return convert_unicode(env, r1, r2, m3, GETPC(),
2967                            decode_utf8, encode_utf16);
2968 }
2969 
2970 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2971 {
2972     return convert_unicode(env, r1, r2, m3, GETPC(),
2973                            decode_utf8, encode_utf32);
2974 }
2975 
2976 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2977 {
2978     return convert_unicode(env, r1, r2, m3, GETPC(),
2979                            decode_utf16, encode_utf8);
2980 }
2981 
2982 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2983 {
2984     return convert_unicode(env, r1, r2, m3, GETPC(),
2985                            decode_utf16, encode_utf32);
2986 }
2987 
2988 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2989 {
2990     return convert_unicode(env, r1, r2, m3, GETPC(),
2991                            decode_utf32, encode_utf8);
2992 }
2993 
2994 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2995 {
2996     return convert_unicode(env, r1, r2, m3, GETPC(),
2997                            decode_utf32, encode_utf16);
2998 }
2999 
3000 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3001                         uintptr_t ra)
3002 {
3003     /* test the actual access, not just any access to the page due to LAP */
3004     while (len) {
3005         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3006         const uint64_t curlen = MIN(pagelen, len);
3007 
3008         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3009         addr = wrap_address(env, addr + curlen);
3010         len -= curlen;
3011     }
3012 }
3013 
3014 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3015 {
3016     probe_write_access(env, addr, len, GETPC());
3017 }
3018