xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 39dc3e4a)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "s390x-internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 #include "tcg/tcg.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x80 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     char *haddr1;
118     char *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
128      * pages, we might trigger a new MMU translation - very unlikely that
129      * the mapping changes in between and we would trigger a fault.
130      */
131     int mmu_idx;
132 } S390Access;
133 
134 /*
135  * With nonfault=1, return the PGM_ exception that would have been injected
136  * into the guest; return 0 if no exception was detected.
137  *
138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
140  */
141 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
142                              MMUAccessType access_type, int mmu_idx,
143                              bool nonfault, void **phost, uintptr_t ra)
144 {
145     int flags;
146 
147 #if defined(CONFIG_USER_ONLY)
148     flags = page_get_flags(addr);
149     if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
150         env->__excp_addr = addr;
151         flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
152         if (nonfault) {
153             return flags;
154         }
155         tcg_s390_program_interrupt(env, flags, ra);
156     }
157     *phost = g2h(env_cpu(env), addr);
158 #else
159     /*
160      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
161      * to detect if there was an exception during tlb_fill().
162      */
163     env->tlb_fill_exc = 0;
164     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
165                                ra);
166     if (env->tlb_fill_exc) {
167         return env->tlb_fill_exc;
168     }
169 
170     if (unlikely(flags & TLB_WATCHPOINT)) {
171         /* S390 does not presently use transaction attributes. */
172         cpu_check_watchpoint(env_cpu(env), addr, size,
173                              MEMTXATTRS_UNSPECIFIED,
174                              (access_type == MMU_DATA_STORE
175                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
176     }
177 #endif
178     return 0;
179 }
180 
181 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
182                              bool nonfault, vaddr vaddr1, int size,
183                              MMUAccessType access_type,
184                              int mmu_idx, uintptr_t ra)
185 {
186     void *haddr1, *haddr2 = NULL;
187     int size1, size2, exc;
188     vaddr vaddr2 = 0;
189 
190     assert(size > 0 && size <= 4096);
191 
192     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
193     size2 = size - size1;
194 
195     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
196                             &haddr1, ra);
197     if (exc) {
198         return exc;
199     }
200     if (unlikely(size2)) {
201         /* The access crosses page boundaries. */
202         vaddr2 = wrap_address(env, vaddr1 + size1);
203         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
204                                 nonfault, &haddr2, ra);
205         if (exc) {
206             return exc;
207         }
208     }
209 
210     *access = (S390Access) {
211         .vaddr1 = vaddr1,
212         .vaddr2 = vaddr2,
213         .haddr1 = haddr1,
214         .haddr2 = haddr2,
215         .size1 = size1,
216         .size2 = size2,
217         .mmu_idx = mmu_idx
218     };
219     return 0;
220 }
221 
222 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
223                                  MMUAccessType access_type, int mmu_idx,
224                                  uintptr_t ra)
225 {
226     S390Access ret;
227     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
228                                 access_type, mmu_idx, ra);
229     assert(!exc);
230     return ret;
231 }
232 
233 /* Helper to handle memset on a single page. */
234 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
235                              uint8_t byte, uint16_t size, int mmu_idx,
236                              uintptr_t ra)
237 {
238 #ifdef CONFIG_USER_ONLY
239     g_assert(haddr);
240     memset(haddr, byte, size);
241 #else
242     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
243     int i;
244 
245     if (likely(haddr)) {
246         memset(haddr, byte, size);
247     } else {
248         /*
249          * Do a single access and test if we can then get access to the
250          * page. This is especially relevant to speed up TLB_NOTDIRTY.
251          */
252         g_assert(size > 0);
253         helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
254         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
255         if (likely(haddr)) {
256             memset(haddr + 1, byte, size - 1);
257         } else {
258             for (i = 1; i < size; i++) {
259                 helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
260             }
261         }
262     }
263 #endif
264 }
265 
266 static void access_memset(CPUS390XState *env, S390Access *desta,
267                           uint8_t byte, uintptr_t ra)
268 {
269 
270     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
271                      desta->mmu_idx, ra);
272     if (likely(!desta->size2)) {
273         return;
274     }
275     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
276                      desta->mmu_idx, ra);
277 }
278 
279 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
280                                   int offset, int mmu_idx, uintptr_t ra)
281 {
282 #ifdef CONFIG_USER_ONLY
283     return ldub_p(*haddr + offset);
284 #else
285     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
286     uint8_t byte;
287 
288     if (likely(*haddr)) {
289         return ldub_p(*haddr + offset);
290     }
291     /*
292      * Do a single access and test if we can then get access to the
293      * page. This is especially relevant to speed up TLB_NOTDIRTY.
294      */
295     byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
296     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
297     return byte;
298 #endif
299 }
300 
301 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
302                                int offset, uintptr_t ra)
303 {
304     if (offset < access->size1) {
305         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
306                                   offset, access->mmu_idx, ra);
307     }
308     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
309                               offset - access->size1, access->mmu_idx, ra);
310 }
311 
312 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
313                                int offset, uint8_t byte, int mmu_idx,
314                                uintptr_t ra)
315 {
316 #ifdef CONFIG_USER_ONLY
317     stb_p(*haddr + offset, byte);
318 #else
319     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
320 
321     if (likely(*haddr)) {
322         stb_p(*haddr + offset, byte);
323         return;
324     }
325     /*
326      * Do a single access and test if we can then get access to the
327      * page. This is especially relevant to speed up TLB_NOTDIRTY.
328      */
329     helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
330     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
331 #endif
332 }
333 
334 static void access_set_byte(CPUS390XState *env, S390Access *access,
335                             int offset, uint8_t byte, uintptr_t ra)
336 {
337     if (offset < access->size1) {
338         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
339                            access->mmu_idx, ra);
340     } else {
341         do_access_set_byte(env, access->vaddr2, &access->haddr2,
342                            offset - access->size1, byte, access->mmu_idx, ra);
343     }
344 }
345 
346 /*
347  * Move data with the same semantics as memmove() in case ranges don't overlap
348  * or src > dest. Undefined behavior on destructive overlaps.
349  */
350 static void access_memmove(CPUS390XState *env, S390Access *desta,
351                            S390Access *srca, uintptr_t ra)
352 {
353     int diff;
354 
355     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
356 
357     /* Fallback to slow access in case we don't have access to all host pages */
358     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
359                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
360         int i;
361 
362         for (i = 0; i < desta->size1 + desta->size2; i++) {
363             uint8_t byte = access_get_byte(env, srca, i, ra);
364 
365             access_set_byte(env, desta, i, byte, ra);
366         }
367         return;
368     }
369 
370     if (srca->size1 == desta->size1) {
371         memmove(desta->haddr1, srca->haddr1, srca->size1);
372         if (unlikely(srca->size2)) {
373             memmove(desta->haddr2, srca->haddr2, srca->size2);
374         }
375     } else if (srca->size1 < desta->size1) {
376         diff = desta->size1 - srca->size1;
377         memmove(desta->haddr1, srca->haddr1, srca->size1);
378         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
379         if (likely(desta->size2)) {
380             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
381         }
382     } else {
383         diff = srca->size1 - desta->size1;
384         memmove(desta->haddr1, srca->haddr1, desta->size1);
385         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
386         if (likely(srca->size2)) {
387             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
388         }
389     }
390 }
391 
392 static int mmu_idx_from_as(uint8_t as)
393 {
394     switch (as) {
395     case AS_PRIMARY:
396         return MMU_PRIMARY_IDX;
397     case AS_SECONDARY:
398         return MMU_SECONDARY_IDX;
399     case AS_HOME:
400         return MMU_HOME_IDX;
401     default:
402         /* FIXME AS_ACCREG */
403         g_assert_not_reached();
404     }
405 }
406 
407 /* and on array */
408 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
409                              uint64_t src, uintptr_t ra)
410 {
411     const int mmu_idx = cpu_mmu_index(env, false);
412     S390Access srca1, srca2, desta;
413     uint32_t i;
414     uint8_t c = 0;
415 
416     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
417                __func__, l, dest, src);
418 
419     /* NC always processes one more byte than specified - maximum is 256 */
420     l++;
421 
422     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
423     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
424     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
425     for (i = 0; i < l; i++) {
426         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
427                           access_get_byte(env, &srca2, i, ra);
428 
429         c |= x;
430         access_set_byte(env, &desta, i, x, ra);
431     }
432     return c != 0;
433 }
434 
435 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
436                     uint64_t src)
437 {
438     return do_helper_nc(env, l, dest, src, GETPC());
439 }
440 
441 /* xor on array */
442 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
443                              uint64_t src, uintptr_t ra)
444 {
445     const int mmu_idx = cpu_mmu_index(env, false);
446     S390Access srca1, srca2, desta;
447     uint32_t i;
448     uint8_t c = 0;
449 
450     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
451                __func__, l, dest, src);
452 
453     /* XC always processes one more byte than specified - maximum is 256 */
454     l++;
455 
456     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
457     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
458     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
459 
460     /* xor with itself is the same as memset(0) */
461     if (src == dest) {
462         access_memset(env, &desta, 0, ra);
463         return 0;
464     }
465 
466     for (i = 0; i < l; i++) {
467         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
468                           access_get_byte(env, &srca2, i, ra);
469 
470         c |= x;
471         access_set_byte(env, &desta, i, x, ra);
472     }
473     return c != 0;
474 }
475 
476 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
477                     uint64_t src)
478 {
479     return do_helper_xc(env, l, dest, src, GETPC());
480 }
481 
482 /* or on array */
483 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
484                              uint64_t src, uintptr_t ra)
485 {
486     const int mmu_idx = cpu_mmu_index(env, false);
487     S390Access srca1, srca2, desta;
488     uint32_t i;
489     uint8_t c = 0;
490 
491     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
492                __func__, l, dest, src);
493 
494     /* OC always processes one more byte than specified - maximum is 256 */
495     l++;
496 
497     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
498     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
499     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
500     for (i = 0; i < l; i++) {
501         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
502                           access_get_byte(env, &srca2, i, ra);
503 
504         c |= x;
505         access_set_byte(env, &desta, i, x, ra);
506     }
507     return c != 0;
508 }
509 
510 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
511                     uint64_t src)
512 {
513     return do_helper_oc(env, l, dest, src, GETPC());
514 }
515 
516 /* memmove */
517 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
518                               uint64_t src, uintptr_t ra)
519 {
520     const int mmu_idx = cpu_mmu_index(env, false);
521     S390Access srca, desta;
522     uint32_t i;
523 
524     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
525                __func__, l, dest, src);
526 
527     /* MVC always copies one more byte than specified - maximum is 256 */
528     l++;
529 
530     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
531     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
532 
533     /*
534      * "When the operands overlap, the result is obtained as if the operands
535      * were processed one byte at a time". Only non-destructive overlaps
536      * behave like memmove().
537      */
538     if (dest == src + 1) {
539         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
540     } else if (!is_destructive_overlap(env, dest, src, l)) {
541         access_memmove(env, &desta, &srca, ra);
542     } else {
543         for (i = 0; i < l; i++) {
544             uint8_t byte = access_get_byte(env, &srca, i, ra);
545 
546             access_set_byte(env, &desta, i, byte, ra);
547         }
548     }
549 
550     return env->cc_op;
551 }
552 
553 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
554 {
555     do_helper_mvc(env, l, dest, src, GETPC());
556 }
557 
558 /* move inverse  */
559 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
560 {
561     const int mmu_idx = cpu_mmu_index(env, false);
562     S390Access srca, desta;
563     uintptr_t ra = GETPC();
564     int i;
565 
566     /* MVCIN always copies one more byte than specified - maximum is 256 */
567     l++;
568 
569     src = wrap_address(env, src - l + 1);
570     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
571     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
572     for (i = 0; i < l; i++) {
573         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
574 
575         access_set_byte(env, &desta, i, x, ra);
576     }
577 }
578 
579 /* move numerics  */
580 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
581 {
582     const int mmu_idx = cpu_mmu_index(env, false);
583     S390Access srca1, srca2, desta;
584     uintptr_t ra = GETPC();
585     int i;
586 
587     /* MVN always copies one more byte than specified - maximum is 256 */
588     l++;
589 
590     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
591     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
592     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
593     for (i = 0; i < l; i++) {
594         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
595                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
596 
597         access_set_byte(env, &desta, i, x, ra);
598     }
599 }
600 
601 /* move with offset  */
602 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
603 {
604     const int mmu_idx = cpu_mmu_index(env, false);
605     /* MVO always processes one more byte than specified - maximum is 16 */
606     const int len_dest = (l >> 4) + 1;
607     const int len_src = (l & 0xf) + 1;
608     uintptr_t ra = GETPC();
609     uint8_t byte_dest, byte_src;
610     S390Access srca, desta;
611     int i, j;
612 
613     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
614     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
615 
616     /* Handle rightmost byte */
617     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
618     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
619     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
620     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
621 
622     /* Process remaining bytes from right to left */
623     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
624         byte_dest = byte_src >> 4;
625         if (j >= 0) {
626             byte_src = access_get_byte(env, &srca, j, ra);
627         } else {
628             byte_src = 0;
629         }
630         byte_dest |= byte_src << 4;
631         access_set_byte(env, &desta, i, byte_dest, ra);
632     }
633 }
634 
635 /* move zones  */
636 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
637 {
638     const int mmu_idx = cpu_mmu_index(env, false);
639     S390Access srca1, srca2, desta;
640     uintptr_t ra = GETPC();
641     int i;
642 
643     /* MVZ always copies one more byte than specified - maximum is 256 */
644     l++;
645 
646     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
647     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
648     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
649     for (i = 0; i < l; i++) {
650         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
651                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
652 
653         access_set_byte(env, &desta, i, x, ra);
654     }
655 }
656 
657 /* compare unsigned byte arrays */
658 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
659                               uint64_t s2, uintptr_t ra)
660 {
661     uint32_t i;
662     uint32_t cc = 0;
663 
664     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
665                __func__, l, s1, s2);
666 
667     for (i = 0; i <= l; i++) {
668         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
669         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
670         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
671         if (x < y) {
672             cc = 1;
673             break;
674         } else if (x > y) {
675             cc = 2;
676             break;
677         }
678     }
679 
680     HELPER_LOG("\n");
681     return cc;
682 }
683 
684 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
685 {
686     return do_helper_clc(env, l, s1, s2, GETPC());
687 }
688 
689 /* compare logical under mask */
690 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
691                      uint64_t addr)
692 {
693     uintptr_t ra = GETPC();
694     uint32_t cc = 0;
695 
696     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
697                mask, addr);
698 
699     while (mask) {
700         if (mask & 8) {
701             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
702             uint8_t r = extract32(r1, 24, 8);
703             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
704                        addr);
705             if (r < d) {
706                 cc = 1;
707                 break;
708             } else if (r > d) {
709                 cc = 2;
710                 break;
711             }
712             addr++;
713         }
714         mask = (mask << 1) & 0xf;
715         r1 <<= 8;
716     }
717 
718     HELPER_LOG("\n");
719     return cc;
720 }
721 
722 static inline uint64_t get_address(CPUS390XState *env, int reg)
723 {
724     return wrap_address(env, env->regs[reg]);
725 }
726 
727 /*
728  * Store the address to the given register, zeroing out unused leftmost
729  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
730  */
731 static inline void set_address_zero(CPUS390XState *env, int reg,
732                                     uint64_t address)
733 {
734     if (env->psw.mask & PSW_MASK_64) {
735         env->regs[reg] = address;
736     } else {
737         if (!(env->psw.mask & PSW_MASK_32)) {
738             address &= 0x00ffffff;
739         } else {
740             address &= 0x7fffffff;
741         }
742         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
743     }
744 }
745 
746 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
747 {
748     if (env->psw.mask & PSW_MASK_64) {
749         /* 64-Bit mode */
750         env->regs[reg] = address;
751     } else {
752         if (!(env->psw.mask & PSW_MASK_32)) {
753             /* 24-Bit mode. According to the PoO it is implementation
754             dependent if bits 32-39 remain unchanged or are set to
755             zeros.  Choose the former so that the function can also be
756             used for TRT.  */
757             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
758         } else {
759             /* 31-Bit mode. According to the PoO it is implementation
760             dependent if bit 32 remains unchanged or is set to zero.
761             Choose the latter so that the function can also be used for
762             TRT.  */
763             address &= 0x7fffffff;
764             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
765         }
766     }
767 }
768 
769 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
770 {
771     if (!(env->psw.mask & PSW_MASK_64)) {
772         return (uint32_t)length;
773     }
774     return length;
775 }
776 
777 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
778 {
779     if (!(env->psw.mask & PSW_MASK_64)) {
780         /* 24-Bit and 31-Bit mode */
781         length &= 0x7fffffff;
782     }
783     return length;
784 }
785 
786 static inline uint64_t get_length(CPUS390XState *env, int reg)
787 {
788     return wrap_length31(env, env->regs[reg]);
789 }
790 
791 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
792 {
793     if (env->psw.mask & PSW_MASK_64) {
794         /* 64-Bit mode */
795         env->regs[reg] = length;
796     } else {
797         /* 24-Bit and 31-Bit mode */
798         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
799     }
800 }
801 
802 /* search string (c is byte to search, r2 is string, r1 end of string) */
803 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
804 {
805     uintptr_t ra = GETPC();
806     uint64_t end, str;
807     uint32_t len;
808     uint8_t v, c = env->regs[0];
809 
810     /* Bits 32-55 must contain all 0.  */
811     if (env->regs[0] & 0xffffff00u) {
812         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
813     }
814 
815     str = get_address(env, r2);
816     end = get_address(env, r1);
817 
818     /* Lest we fail to service interrupts in a timely manner, limit the
819        amount of work we're willing to do.  For now, let's cap at 8k.  */
820     for (len = 0; len < 0x2000; ++len) {
821         if (str + len == end) {
822             /* Character not found.  R1 & R2 are unmodified.  */
823             env->cc_op = 2;
824             return;
825         }
826         v = cpu_ldub_data_ra(env, str + len, ra);
827         if (v == c) {
828             /* Character found.  Set R1 to the location; R2 is unmodified.  */
829             env->cc_op = 1;
830             set_address(env, r1, str + len);
831             return;
832         }
833     }
834 
835     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
836     env->cc_op = 3;
837     set_address(env, r2, str + len);
838 }
839 
840 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
841 {
842     uintptr_t ra = GETPC();
843     uint32_t len;
844     uint16_t v, c = env->regs[0];
845     uint64_t end, str, adj_end;
846 
847     /* Bits 32-47 of R0 must be zero.  */
848     if (env->regs[0] & 0xffff0000u) {
849         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
850     }
851 
852     str = get_address(env, r2);
853     end = get_address(env, r1);
854 
855     /* If the LSB of the two addresses differ, use one extra byte.  */
856     adj_end = end + ((str ^ end) & 1);
857 
858     /* Lest we fail to service interrupts in a timely manner, limit the
859        amount of work we're willing to do.  For now, let's cap at 8k.  */
860     for (len = 0; len < 0x2000; len += 2) {
861         if (str + len == adj_end) {
862             /* End of input found.  */
863             env->cc_op = 2;
864             return;
865         }
866         v = cpu_lduw_data_ra(env, str + len, ra);
867         if (v == c) {
868             /* Character found.  Set R1 to the location; R2 is unmodified.  */
869             env->cc_op = 1;
870             set_address(env, r1, str + len);
871             return;
872         }
873     }
874 
875     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
876     env->cc_op = 3;
877     set_address(env, r2, str + len);
878 }
879 
880 /* unsigned string compare (c is string terminator) */
881 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
882 {
883     uintptr_t ra = GETPC();
884     uint32_t len;
885 
886     c = c & 0xff;
887     s1 = wrap_address(env, s1);
888     s2 = wrap_address(env, s2);
889 
890     /* Lest we fail to service interrupts in a timely manner, limit the
891        amount of work we're willing to do.  For now, let's cap at 8k.  */
892     for (len = 0; len < 0x2000; ++len) {
893         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
894         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
895         if (v1 == v2) {
896             if (v1 == c) {
897                 /* Equal.  CC=0, and don't advance the registers.  */
898                 env->cc_op = 0;
899                 env->retxl = s2;
900                 return s1;
901             }
902         } else {
903             /* Unequal.  CC={1,2}, and advance the registers.  Note that
904                the terminator need not be zero, but the string that contains
905                the terminator is by definition "low".  */
906             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
907             env->retxl = s2 + len;
908             return s1 + len;
909         }
910     }
911 
912     /* CPU-determined bytes equal; advance the registers.  */
913     env->cc_op = 3;
914     env->retxl = s2 + len;
915     return s1 + len;
916 }
917 
918 /* move page */
919 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
920 {
921     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
922     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
923     const int mmu_idx = cpu_mmu_index(env, false);
924     const bool f = extract64(r0, 11, 1);
925     const bool s = extract64(r0, 10, 1);
926     const bool cco = extract64(r0, 8, 1);
927     uintptr_t ra = GETPC();
928     S390Access srca, desta;
929     int exc;
930 
931     if ((f && s) || extract64(r0, 12, 4)) {
932         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
933     }
934 
935     /*
936      * We always manually handle exceptions such that we can properly store
937      * r1/r2 to the lowcore on page-translation exceptions.
938      *
939      * TODO: Access key handling
940      */
941     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
942                             MMU_DATA_LOAD, mmu_idx, ra);
943     if (exc) {
944         if (cco) {
945             return 2;
946         }
947         goto inject_exc;
948     }
949     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
950                             MMU_DATA_STORE, mmu_idx, ra);
951     if (exc) {
952         if (cco && exc != PGM_PROTECTION) {
953             return 1;
954         }
955         goto inject_exc;
956     }
957     access_memmove(env, &desta, &srca, ra);
958     return 0; /* data moved */
959 inject_exc:
960 #if !defined(CONFIG_USER_ONLY)
961     if (exc != PGM_ADDRESSING) {
962         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
963                  env->tlb_fill_tec);
964     }
965     if (exc == PGM_PAGE_TRANS) {
966         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
967                  r1 << 4 | r2);
968     }
969 #endif
970     tcg_s390_program_interrupt(env, exc, ra);
971 }
972 
973 /* string copy */
974 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
975 {
976     const int mmu_idx = cpu_mmu_index(env, false);
977     const uint64_t d = get_address(env, r1);
978     const uint64_t s = get_address(env, r2);
979     const uint8_t c = env->regs[0];
980     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
981     S390Access srca, desta;
982     uintptr_t ra = GETPC();
983     int i;
984 
985     if (env->regs[0] & 0xffffff00ull) {
986         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
987     }
988 
989     /*
990      * Our access should not exceed single pages, as we must not report access
991      * exceptions exceeding the actually copied range (which we don't know at
992      * this point). We might over-indicate watchpoints within the pages
993      * (if we ever care, we have to limit processing to a single byte).
994      */
995     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
996     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
997     for (i = 0; i < len; i++) {
998         const uint8_t v = access_get_byte(env, &srca, i, ra);
999 
1000         access_set_byte(env, &desta, i, v, ra);
1001         if (v == c) {
1002             set_address_zero(env, r1, d + i);
1003             return 1;
1004         }
1005     }
1006     set_address_zero(env, r1, d + len);
1007     set_address_zero(env, r2, s + len);
1008     return 3;
1009 }
1010 
1011 /* load access registers r1 to r3 from memory at a2 */
1012 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1013 {
1014     uintptr_t ra = GETPC();
1015     int i;
1016 
1017     if (a2 & 0x3) {
1018         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1019     }
1020 
1021     for (i = r1;; i = (i + 1) % 16) {
1022         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1023         a2 += 4;
1024 
1025         if (i == r3) {
1026             break;
1027         }
1028     }
1029 }
1030 
1031 /* store access registers r1 to r3 in memory at a2 */
1032 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1033 {
1034     uintptr_t ra = GETPC();
1035     int i;
1036 
1037     if (a2 & 0x3) {
1038         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1039     }
1040 
1041     for (i = r1;; i = (i + 1) % 16) {
1042         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1043         a2 += 4;
1044 
1045         if (i == r3) {
1046             break;
1047         }
1048     }
1049 }
1050 
1051 /* move long helper */
1052 static inline uint32_t do_mvcl(CPUS390XState *env,
1053                                uint64_t *dest, uint64_t *destlen,
1054                                uint64_t *src, uint64_t *srclen,
1055                                uint16_t pad, int wordsize, uintptr_t ra)
1056 {
1057     const int mmu_idx = cpu_mmu_index(env, false);
1058     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1059     S390Access srca, desta;
1060     int i, cc;
1061 
1062     if (*destlen == *srclen) {
1063         cc = 0;
1064     } else if (*destlen < *srclen) {
1065         cc = 1;
1066     } else {
1067         cc = 2;
1068     }
1069 
1070     if (!*destlen) {
1071         return cc;
1072     }
1073 
1074     /*
1075      * Only perform one type of type of operation (move/pad) at a time.
1076      * Stay within single pages.
1077      */
1078     if (*srclen) {
1079         /* Copy the src array */
1080         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1081         *destlen -= len;
1082         *srclen -= len;
1083         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1084         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1085         access_memmove(env, &desta, &srca, ra);
1086         *src = wrap_address(env, *src + len);
1087         *dest = wrap_address(env, *dest + len);
1088     } else if (wordsize == 1) {
1089         /* Pad the remaining area */
1090         *destlen -= len;
1091         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1092         access_memset(env, &desta, pad, ra);
1093         *dest = wrap_address(env, *dest + len);
1094     } else {
1095         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1096 
1097         /* The remaining length selects the padding byte. */
1098         for (i = 0; i < len; (*destlen)--, i++) {
1099             if (*destlen & 1) {
1100                 access_set_byte(env, &desta, i, pad, ra);
1101             } else {
1102                 access_set_byte(env, &desta, i, pad >> 8, ra);
1103             }
1104         }
1105         *dest = wrap_address(env, *dest + len);
1106     }
1107 
1108     return *destlen ? 3 : cc;
1109 }
1110 
1111 /* move long */
1112 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1113 {
1114     const int mmu_idx = cpu_mmu_index(env, false);
1115     uintptr_t ra = GETPC();
1116     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1117     uint64_t dest = get_address(env, r1);
1118     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1119     uint64_t src = get_address(env, r2);
1120     uint8_t pad = env->regs[r2 + 1] >> 24;
1121     CPUState *cs = env_cpu(env);
1122     S390Access srca, desta;
1123     uint32_t cc, cur_len;
1124 
1125     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1126         cc = 3;
1127     } else if (srclen == destlen) {
1128         cc = 0;
1129     } else if (destlen < srclen) {
1130         cc = 1;
1131     } else {
1132         cc = 2;
1133     }
1134 
1135     /* We might have to zero-out some bits even if there was no action. */
1136     if (unlikely(!destlen || cc == 3)) {
1137         set_address_zero(env, r2, src);
1138         set_address_zero(env, r1, dest);
1139         return cc;
1140     } else if (!srclen) {
1141         set_address_zero(env, r2, src);
1142     }
1143 
1144     /*
1145      * Only perform one type of type of operation (move/pad) in one step.
1146      * Stay within single pages.
1147      */
1148     while (destlen) {
1149         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1150         if (!srclen) {
1151             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1152                                    ra);
1153             access_memset(env, &desta, pad, ra);
1154         } else {
1155             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1156 
1157             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1158                                   ra);
1159             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1160                                    ra);
1161             access_memmove(env, &desta, &srca, ra);
1162             src = wrap_address(env, src + cur_len);
1163             srclen -= cur_len;
1164             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1165             set_address_zero(env, r2, src);
1166         }
1167         dest = wrap_address(env, dest + cur_len);
1168         destlen -= cur_len;
1169         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1170         set_address_zero(env, r1, dest);
1171 
1172         /*
1173          * MVCL is interruptible. Return to the main loop if requested after
1174          * writing back all state to registers. If no interrupt will get
1175          * injected, we'll end up back in this handler and continue processing
1176          * the remaining parts.
1177          */
1178         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1179             cpu_loop_exit_restore(cs, ra);
1180         }
1181     }
1182     return cc;
1183 }
1184 
1185 /* move long extended */
1186 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1187                        uint32_t r3)
1188 {
1189     uintptr_t ra = GETPC();
1190     uint64_t destlen = get_length(env, r1 + 1);
1191     uint64_t dest = get_address(env, r1);
1192     uint64_t srclen = get_length(env, r3 + 1);
1193     uint64_t src = get_address(env, r3);
1194     uint8_t pad = a2;
1195     uint32_t cc;
1196 
1197     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1198 
1199     set_length(env, r1 + 1, destlen);
1200     set_length(env, r3 + 1, srclen);
1201     set_address(env, r1, dest);
1202     set_address(env, r3, src);
1203 
1204     return cc;
1205 }
1206 
1207 /* move long unicode */
1208 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1209                        uint32_t r3)
1210 {
1211     uintptr_t ra = GETPC();
1212     uint64_t destlen = get_length(env, r1 + 1);
1213     uint64_t dest = get_address(env, r1);
1214     uint64_t srclen = get_length(env, r3 + 1);
1215     uint64_t src = get_address(env, r3);
1216     uint16_t pad = a2;
1217     uint32_t cc;
1218 
1219     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1220 
1221     set_length(env, r1 + 1, destlen);
1222     set_length(env, r3 + 1, srclen);
1223     set_address(env, r1, dest);
1224     set_address(env, r3, src);
1225 
1226     return cc;
1227 }
1228 
1229 /* compare logical long helper */
1230 static inline uint32_t do_clcl(CPUS390XState *env,
1231                                uint64_t *src1, uint64_t *src1len,
1232                                uint64_t *src3, uint64_t *src3len,
1233                                uint16_t pad, uint64_t limit,
1234                                int wordsize, uintptr_t ra)
1235 {
1236     uint64_t len = MAX(*src1len, *src3len);
1237     uint32_t cc = 0;
1238 
1239     check_alignment(env, *src1len | *src3len, wordsize, ra);
1240 
1241     if (!len) {
1242         return cc;
1243     }
1244 
1245     /* Lest we fail to service interrupts in a timely manner, limit the
1246        amount of work we're willing to do.  */
1247     if (len > limit) {
1248         len = limit;
1249         cc = 3;
1250     }
1251 
1252     for (; len; len -= wordsize) {
1253         uint16_t v1 = pad;
1254         uint16_t v3 = pad;
1255 
1256         if (*src1len) {
1257             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1258         }
1259         if (*src3len) {
1260             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1261         }
1262 
1263         if (v1 != v3) {
1264             cc = (v1 < v3) ? 1 : 2;
1265             break;
1266         }
1267 
1268         if (*src1len) {
1269             *src1 += wordsize;
1270             *src1len -= wordsize;
1271         }
1272         if (*src3len) {
1273             *src3 += wordsize;
1274             *src3len -= wordsize;
1275         }
1276     }
1277 
1278     return cc;
1279 }
1280 
1281 
1282 /* compare logical long */
1283 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1284 {
1285     uintptr_t ra = GETPC();
1286     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1287     uint64_t src1 = get_address(env, r1);
1288     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1289     uint64_t src3 = get_address(env, r2);
1290     uint8_t pad = env->regs[r2 + 1] >> 24;
1291     uint32_t cc;
1292 
1293     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1294 
1295     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1296     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1297     set_address(env, r1, src1);
1298     set_address(env, r2, src3);
1299 
1300     return cc;
1301 }
1302 
1303 /* compare logical long extended memcompare insn with padding */
1304 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1305                        uint32_t r3)
1306 {
1307     uintptr_t ra = GETPC();
1308     uint64_t src1len = get_length(env, r1 + 1);
1309     uint64_t src1 = get_address(env, r1);
1310     uint64_t src3len = get_length(env, r3 + 1);
1311     uint64_t src3 = get_address(env, r3);
1312     uint8_t pad = a2;
1313     uint32_t cc;
1314 
1315     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1316 
1317     set_length(env, r1 + 1, src1len);
1318     set_length(env, r3 + 1, src3len);
1319     set_address(env, r1, src1);
1320     set_address(env, r3, src3);
1321 
1322     return cc;
1323 }
1324 
1325 /* compare logical long unicode memcompare insn with padding */
1326 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1327                        uint32_t r3)
1328 {
1329     uintptr_t ra = GETPC();
1330     uint64_t src1len = get_length(env, r1 + 1);
1331     uint64_t src1 = get_address(env, r1);
1332     uint64_t src3len = get_length(env, r3 + 1);
1333     uint64_t src3 = get_address(env, r3);
1334     uint16_t pad = a2;
1335     uint32_t cc = 0;
1336 
1337     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1338 
1339     set_length(env, r1 + 1, src1len);
1340     set_length(env, r3 + 1, src3len);
1341     set_address(env, r1, src1);
1342     set_address(env, r3, src3);
1343 
1344     return cc;
1345 }
1346 
1347 /* checksum */
1348 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1349                       uint64_t src, uint64_t src_len)
1350 {
1351     uintptr_t ra = GETPC();
1352     uint64_t max_len, len;
1353     uint64_t cksm = (uint32_t)r1;
1354 
1355     /* Lest we fail to service interrupts in a timely manner, limit the
1356        amount of work we're willing to do.  For now, let's cap at 8k.  */
1357     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1358 
1359     /* Process full words as available.  */
1360     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1361         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1362     }
1363 
1364     switch (max_len - len) {
1365     case 1:
1366         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1367         len += 1;
1368         break;
1369     case 2:
1370         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1371         len += 2;
1372         break;
1373     case 3:
1374         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1375         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1376         len += 3;
1377         break;
1378     }
1379 
1380     /* Fold the carry from the checksum.  Note that we can see carry-out
1381        during folding more than once (but probably not more than twice).  */
1382     while (cksm > 0xffffffffull) {
1383         cksm = (uint32_t)cksm + (cksm >> 32);
1384     }
1385 
1386     /* Indicate whether or not we've processed everything.  */
1387     env->cc_op = (len == src_len ? 0 : 3);
1388 
1389     /* Return both cksm and processed length.  */
1390     env->retxl = cksm;
1391     return len;
1392 }
1393 
1394 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1395 {
1396     uintptr_t ra = GETPC();
1397     int len_dest = len >> 4;
1398     int len_src = len & 0xf;
1399     uint8_t b;
1400 
1401     dest += len_dest;
1402     src += len_src;
1403 
1404     /* last byte is special, it only flips the nibbles */
1405     b = cpu_ldub_data_ra(env, src, ra);
1406     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1407     src--;
1408     len_src--;
1409 
1410     /* now pack every value */
1411     while (len_dest > 0) {
1412         b = 0;
1413 
1414         if (len_src >= 0) {
1415             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1416             src--;
1417             len_src--;
1418         }
1419         if (len_src >= 0) {
1420             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1421             src--;
1422             len_src--;
1423         }
1424 
1425         len_dest--;
1426         dest--;
1427         cpu_stb_data_ra(env, dest, b, ra);
1428     }
1429 }
1430 
1431 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1432                            uint32_t srclen, int ssize, uintptr_t ra)
1433 {
1434     int i;
1435     /* The destination operand is always 16 bytes long.  */
1436     const int destlen = 16;
1437 
1438     /* The operands are processed from right to left.  */
1439     src += srclen - 1;
1440     dest += destlen - 1;
1441 
1442     for (i = 0; i < destlen; i++) {
1443         uint8_t b = 0;
1444 
1445         /* Start with a positive sign */
1446         if (i == 0) {
1447             b = 0xc;
1448         } else if (srclen > ssize) {
1449             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1450             src -= ssize;
1451             srclen -= ssize;
1452         }
1453 
1454         if (srclen > ssize) {
1455             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1456             src -= ssize;
1457             srclen -= ssize;
1458         }
1459 
1460         cpu_stb_data_ra(env, dest, b, ra);
1461         dest--;
1462     }
1463 }
1464 
1465 
1466 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1467                  uint32_t srclen)
1468 {
1469     do_pkau(env, dest, src, srclen, 1, GETPC());
1470 }
1471 
1472 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1473                  uint32_t srclen)
1474 {
1475     do_pkau(env, dest, src, srclen, 2, GETPC());
1476 }
1477 
1478 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1479                   uint64_t src)
1480 {
1481     uintptr_t ra = GETPC();
1482     int len_dest = len >> 4;
1483     int len_src = len & 0xf;
1484     uint8_t b;
1485     int second_nibble = 0;
1486 
1487     dest += len_dest;
1488     src += len_src;
1489 
1490     /* last byte is special, it only flips the nibbles */
1491     b = cpu_ldub_data_ra(env, src, ra);
1492     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1493     src--;
1494     len_src--;
1495 
1496     /* now pad every nibble with 0xf0 */
1497 
1498     while (len_dest > 0) {
1499         uint8_t cur_byte = 0;
1500 
1501         if (len_src > 0) {
1502             cur_byte = cpu_ldub_data_ra(env, src, ra);
1503         }
1504 
1505         len_dest--;
1506         dest--;
1507 
1508         /* only advance one nibble at a time */
1509         if (second_nibble) {
1510             cur_byte >>= 4;
1511             len_src--;
1512             src--;
1513         }
1514         second_nibble = !second_nibble;
1515 
1516         /* digit */
1517         cur_byte = (cur_byte & 0xf);
1518         /* zone bits */
1519         cur_byte |= 0xf0;
1520 
1521         cpu_stb_data_ra(env, dest, cur_byte, ra);
1522     }
1523 }
1524 
1525 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1526                                  uint32_t destlen, int dsize, uint64_t src,
1527                                  uintptr_t ra)
1528 {
1529     int i;
1530     uint32_t cc;
1531     uint8_t b;
1532     /* The source operand is always 16 bytes long.  */
1533     const int srclen = 16;
1534 
1535     /* The operands are processed from right to left.  */
1536     src += srclen - 1;
1537     dest += destlen - dsize;
1538 
1539     /* Check for the sign.  */
1540     b = cpu_ldub_data_ra(env, src, ra);
1541     src--;
1542     switch (b & 0xf) {
1543     case 0xa:
1544     case 0xc:
1545     case 0xe ... 0xf:
1546         cc = 0;  /* plus */
1547         break;
1548     case 0xb:
1549     case 0xd:
1550         cc = 1;  /* minus */
1551         break;
1552     default:
1553     case 0x0 ... 0x9:
1554         cc = 3;  /* invalid */
1555         break;
1556     }
1557 
1558     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1559     for (i = 0; i < destlen; i += dsize) {
1560         if (i == (31 * dsize)) {
1561             /* If length is 32/64 bytes, the leftmost byte is 0. */
1562             b = 0;
1563         } else if (i % (2 * dsize)) {
1564             b = cpu_ldub_data_ra(env, src, ra);
1565             src--;
1566         } else {
1567             b >>= 4;
1568         }
1569         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1570         dest -= dsize;
1571     }
1572 
1573     return cc;
1574 }
1575 
1576 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1577                        uint64_t src)
1578 {
1579     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1580 }
1581 
1582 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1583                        uint64_t src)
1584 {
1585     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1586 }
1587 
1588 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1589 {
1590     uintptr_t ra = GETPC();
1591     uint32_t cc = 0;
1592     int i;
1593 
1594     for (i = 0; i < destlen; i++) {
1595         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1596         /* digit */
1597         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1598 
1599         if (i == (destlen - 1)) {
1600             /* sign */
1601             cc |= (b & 0xf) < 0xa ? 1 : 0;
1602         } else {
1603             /* digit */
1604             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1605         }
1606     }
1607 
1608     return cc;
1609 }
1610 
1611 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1612                              uint64_t trans, uintptr_t ra)
1613 {
1614     uint32_t i;
1615 
1616     for (i = 0; i <= len; i++) {
1617         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1618         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1619         cpu_stb_data_ra(env, array + i, new_byte, ra);
1620     }
1621 
1622     return env->cc_op;
1623 }
1624 
1625 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1626                 uint64_t trans)
1627 {
1628     do_helper_tr(env, len, array, trans, GETPC());
1629 }
1630 
1631 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1632                      uint64_t len, uint64_t trans)
1633 {
1634     uintptr_t ra = GETPC();
1635     uint8_t end = env->regs[0] & 0xff;
1636     uint64_t l = len;
1637     uint64_t i;
1638     uint32_t cc = 0;
1639 
1640     if (!(env->psw.mask & PSW_MASK_64)) {
1641         array &= 0x7fffffff;
1642         l = (uint32_t)l;
1643     }
1644 
1645     /* Lest we fail to service interrupts in a timely manner, limit the
1646        amount of work we're willing to do.  For now, let's cap at 8k.  */
1647     if (l > 0x2000) {
1648         l = 0x2000;
1649         cc = 3;
1650     }
1651 
1652     for (i = 0; i < l; i++) {
1653         uint8_t byte, new_byte;
1654 
1655         byte = cpu_ldub_data_ra(env, array + i, ra);
1656 
1657         if (byte == end) {
1658             cc = 1;
1659             break;
1660         }
1661 
1662         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1663         cpu_stb_data_ra(env, array + i, new_byte, ra);
1664     }
1665 
1666     env->cc_op = cc;
1667     env->retxl = len - i;
1668     return array + i;
1669 }
1670 
1671 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1672                                      uint64_t array, uint64_t trans,
1673                                      int inc, uintptr_t ra)
1674 {
1675     int i;
1676 
1677     for (i = 0; i <= len; i++) {
1678         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1679         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1680 
1681         if (sbyte != 0) {
1682             set_address(env, 1, array + i * inc);
1683             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1684             return (i == len) ? 2 : 1;
1685         }
1686     }
1687 
1688     return 0;
1689 }
1690 
1691 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1692                                   uint64_t array, uint64_t trans,
1693                                   uintptr_t ra)
1694 {
1695     return do_helper_trt(env, len, array, trans, 1, ra);
1696 }
1697 
1698 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1699                      uint64_t trans)
1700 {
1701     return do_helper_trt(env, len, array, trans, 1, GETPC());
1702 }
1703 
1704 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1705                                    uint64_t array, uint64_t trans,
1706                                    uintptr_t ra)
1707 {
1708     return do_helper_trt(env, len, array, trans, -1, ra);
1709 }
1710 
1711 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1712                       uint64_t trans)
1713 {
1714     return do_helper_trt(env, len, array, trans, -1, GETPC());
1715 }
1716 
1717 /* Translate one/two to one/two */
1718 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1719                       uint32_t tst, uint32_t sizes)
1720 {
1721     uintptr_t ra = GETPC();
1722     int dsize = (sizes & 1) ? 1 : 2;
1723     int ssize = (sizes & 2) ? 1 : 2;
1724     uint64_t tbl = get_address(env, 1);
1725     uint64_t dst = get_address(env, r1);
1726     uint64_t len = get_length(env, r1 + 1);
1727     uint64_t src = get_address(env, r2);
1728     uint32_t cc = 3;
1729     int i;
1730 
1731     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1732        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1733        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1734     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1735         tbl &= -4096;
1736     } else {
1737         tbl &= -8;
1738     }
1739 
1740     check_alignment(env, len, ssize, ra);
1741 
1742     /* Lest we fail to service interrupts in a timely manner, */
1743     /* limit the amount of work we're willing to do.   */
1744     for (i = 0; i < 0x2000; i++) {
1745         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1746         uint64_t tble = tbl + (sval * dsize);
1747         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1748         if (dval == tst) {
1749             cc = 1;
1750             break;
1751         }
1752         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1753 
1754         len -= ssize;
1755         src += ssize;
1756         dst += dsize;
1757 
1758         if (len == 0) {
1759             cc = 0;
1760             break;
1761         }
1762     }
1763 
1764     set_address(env, r1, dst);
1765     set_length(env, r1 + 1, len);
1766     set_address(env, r2, src);
1767 
1768     return cc;
1769 }
1770 
1771 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1772                   uint32_t r1, uint32_t r3)
1773 {
1774     uintptr_t ra = GETPC();
1775     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1776     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1777     Int128 oldv;
1778     uint64_t oldh, oldl;
1779     bool fail;
1780 
1781     check_alignment(env, addr, 16, ra);
1782 
1783     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1784     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1785 
1786     oldv = int128_make128(oldl, oldh);
1787     fail = !int128_eq(oldv, cmpv);
1788     if (fail) {
1789         newv = oldv;
1790     }
1791 
1792     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1793     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1794 
1795     env->cc_op = fail;
1796     env->regs[r1] = int128_gethi(oldv);
1797     env->regs[r1 + 1] = int128_getlo(oldv);
1798 }
1799 
1800 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1801                            uint32_t r1, uint32_t r3)
1802 {
1803     uintptr_t ra = GETPC();
1804     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1805     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1806     int mem_idx;
1807     TCGMemOpIdx oi;
1808     Int128 oldv;
1809     bool fail;
1810 
1811     assert(HAVE_CMPXCHG128);
1812 
1813     mem_idx = cpu_mmu_index(env, false);
1814     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1815     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1816     fail = !int128_eq(oldv, cmpv);
1817 
1818     env->cc_op = fail;
1819     env->regs[r1] = int128_gethi(oldv);
1820     env->regs[r1 + 1] = int128_getlo(oldv);
1821 }
1822 
1823 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1824                         uint64_t a2, bool parallel)
1825 {
1826     uint32_t mem_idx = cpu_mmu_index(env, false);
1827     uintptr_t ra = GETPC();
1828     uint32_t fc = extract32(env->regs[0], 0, 8);
1829     uint32_t sc = extract32(env->regs[0], 8, 8);
1830     uint64_t pl = get_address(env, 1) & -16;
1831     uint64_t svh, svl;
1832     uint32_t cc;
1833 
1834     /* Sanity check the function code and storage characteristic.  */
1835     if (fc > 1 || sc > 3) {
1836         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1837             goto spec_exception;
1838         }
1839         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1840             goto spec_exception;
1841         }
1842     }
1843 
1844     /* Sanity check the alignments.  */
1845     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1846         goto spec_exception;
1847     }
1848 
1849     /* Sanity check writability of the store address.  */
1850     probe_write(env, a2, 1 << sc, mem_idx, ra);
1851 
1852     /*
1853      * Note that the compare-and-swap is atomic, and the store is atomic,
1854      * but the complete operation is not.  Therefore we do not need to
1855      * assert serial context in order to implement this.  That said,
1856      * restart early if we can't support either operation that is supposed
1857      * to be atomic.
1858      */
1859     if (parallel) {
1860         uint32_t max = 2;
1861 #ifdef CONFIG_ATOMIC64
1862         max = 3;
1863 #endif
1864         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1865             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1866             cpu_loop_exit_atomic(env_cpu(env), ra);
1867         }
1868     }
1869 
1870     /* All loads happen before all stores.  For simplicity, load the entire
1871        store value area from the parameter list.  */
1872     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1873     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1874 
1875     switch (fc) {
1876     case 0:
1877         {
1878             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1879             uint32_t cv = env->regs[r3];
1880             uint32_t ov;
1881 
1882             if (parallel) {
1883 #ifdef CONFIG_USER_ONLY
1884                 uint32_t *haddr = g2h(env_cpu(env), a1);
1885                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1886 #else
1887                 TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1888                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1889 #endif
1890             } else {
1891                 ov = cpu_ldl_data_ra(env, a1, ra);
1892                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1893             }
1894             cc = (ov != cv);
1895             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1896         }
1897         break;
1898 
1899     case 1:
1900         {
1901             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1902             uint64_t cv = env->regs[r3];
1903             uint64_t ov;
1904 
1905             if (parallel) {
1906 #ifdef CONFIG_ATOMIC64
1907                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1908                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1909 #else
1910                 /* Note that we asserted !parallel above.  */
1911                 g_assert_not_reached();
1912 #endif
1913             } else {
1914                 ov = cpu_ldq_data_ra(env, a1, ra);
1915                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1916             }
1917             cc = (ov != cv);
1918             env->regs[r3] = ov;
1919         }
1920         break;
1921 
1922     case 2:
1923         {
1924             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1925             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1926             Int128 nv = int128_make128(nvl, nvh);
1927             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1928             Int128 ov;
1929 
1930             if (!parallel) {
1931                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1932                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1933 
1934                 ov = int128_make128(ol, oh);
1935                 cc = !int128_eq(ov, cv);
1936                 if (cc) {
1937                     nv = ov;
1938                 }
1939 
1940                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1941                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1942             } else if (HAVE_CMPXCHG128) {
1943                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1944                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1945                 cc = !int128_eq(ov, cv);
1946             } else {
1947                 /* Note that we asserted !parallel above.  */
1948                 g_assert_not_reached();
1949             }
1950 
1951             env->regs[r3 + 0] = int128_gethi(ov);
1952             env->regs[r3 + 1] = int128_getlo(ov);
1953         }
1954         break;
1955 
1956     default:
1957         g_assert_not_reached();
1958     }
1959 
1960     /* Store only if the comparison succeeded.  Note that above we use a pair
1961        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1962        from the most-significant bits of svh.  */
1963     if (cc == 0) {
1964         switch (sc) {
1965         case 0:
1966             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1967             break;
1968         case 1:
1969             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1970             break;
1971         case 2:
1972             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1973             break;
1974         case 3:
1975             cpu_stq_data_ra(env, a2, svh, ra);
1976             break;
1977         case 4:
1978             if (!parallel) {
1979                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1980                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1981             } else if (HAVE_ATOMIC128) {
1982                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1983                 Int128 sv = int128_make128(svl, svh);
1984                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1985             } else {
1986                 /* Note that we asserted !parallel above.  */
1987                 g_assert_not_reached();
1988             }
1989             break;
1990         default:
1991             g_assert_not_reached();
1992         }
1993     }
1994 
1995     return cc;
1996 
1997  spec_exception:
1998     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1999 }
2000 
2001 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2002 {
2003     return do_csst(env, r3, a1, a2, false);
2004 }
2005 
2006 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2007                                uint64_t a2)
2008 {
2009     return do_csst(env, r3, a1, a2, true);
2010 }
2011 
2012 #if !defined(CONFIG_USER_ONLY)
2013 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2014 {
2015     uintptr_t ra = GETPC();
2016     bool PERchanged = false;
2017     uint64_t src = a2;
2018     uint32_t i;
2019 
2020     if (src & 0x7) {
2021         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2022     }
2023 
2024     for (i = r1;; i = (i + 1) % 16) {
2025         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2026         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2027             PERchanged = true;
2028         }
2029         env->cregs[i] = val;
2030         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2031                    i, src, val);
2032         src += sizeof(uint64_t);
2033 
2034         if (i == r3) {
2035             break;
2036         }
2037     }
2038 
2039     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2040         s390_cpu_recompute_watchpoints(env_cpu(env));
2041     }
2042 
2043     tlb_flush(env_cpu(env));
2044 }
2045 
2046 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2047 {
2048     uintptr_t ra = GETPC();
2049     bool PERchanged = false;
2050     uint64_t src = a2;
2051     uint32_t i;
2052 
2053     if (src & 0x3) {
2054         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2055     }
2056 
2057     for (i = r1;; i = (i + 1) % 16) {
2058         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2059         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2060             PERchanged = true;
2061         }
2062         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2063         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2064         src += sizeof(uint32_t);
2065 
2066         if (i == r3) {
2067             break;
2068         }
2069     }
2070 
2071     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2072         s390_cpu_recompute_watchpoints(env_cpu(env));
2073     }
2074 
2075     tlb_flush(env_cpu(env));
2076 }
2077 
2078 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2079 {
2080     uintptr_t ra = GETPC();
2081     uint64_t dest = a2;
2082     uint32_t i;
2083 
2084     if (dest & 0x7) {
2085         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2086     }
2087 
2088     for (i = r1;; i = (i + 1) % 16) {
2089         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2090         dest += sizeof(uint64_t);
2091 
2092         if (i == r3) {
2093             break;
2094         }
2095     }
2096 }
2097 
2098 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2099 {
2100     uintptr_t ra = GETPC();
2101     uint64_t dest = a2;
2102     uint32_t i;
2103 
2104     if (dest & 0x3) {
2105         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2106     }
2107 
2108     for (i = r1;; i = (i + 1) % 16) {
2109         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2110         dest += sizeof(uint32_t);
2111 
2112         if (i == r3) {
2113             break;
2114         }
2115     }
2116 }
2117 
2118 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2119 {
2120     uintptr_t ra = GETPC();
2121     int i;
2122 
2123     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2124 
2125     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2126         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2127     }
2128 
2129     return 0;
2130 }
2131 
2132 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2133 {
2134     S390CPU *cpu = env_archcpu(env);
2135     CPUState *cs = env_cpu(env);
2136 
2137     /*
2138      * TODO: we currently don't handle all access protection types
2139      * (including access-list and key-controlled) as well as AR mode.
2140      */
2141     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2142         /* Fetching permitted; storing permitted */
2143         return 0;
2144     }
2145 
2146     if (env->int_pgm_code == PGM_PROTECTION) {
2147         /* retry if reading is possible */
2148         cs->exception_index = -1;
2149         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2150             /* Fetching permitted; storing not permitted */
2151             return 1;
2152         }
2153     }
2154 
2155     switch (env->int_pgm_code) {
2156     case PGM_PROTECTION:
2157         /* Fetching not permitted; storing not permitted */
2158         cs->exception_index = -1;
2159         return 2;
2160     case PGM_ADDRESSING:
2161     case PGM_TRANS_SPEC:
2162         /* exceptions forwarded to the guest */
2163         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2164         return 0;
2165     }
2166 
2167     /* Translation not available */
2168     cs->exception_index = -1;
2169     return 3;
2170 }
2171 
2172 /* insert storage key extended */
2173 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2174 {
2175     static S390SKeysState *ss;
2176     static S390SKeysClass *skeyclass;
2177     uint64_t addr = wrap_address(env, r2);
2178     uint8_t key;
2179     int rc;
2180 
2181     addr = mmu_real2abs(env, addr);
2182     if (!mmu_absolute_addr_valid(addr, false)) {
2183         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2184     }
2185 
2186     if (unlikely(!ss)) {
2187         ss = s390_get_skeys_device();
2188         skeyclass = S390_SKEYS_GET_CLASS(ss);
2189         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2190             tlb_flush_all_cpus_synced(env_cpu(env));
2191         }
2192     }
2193 
2194     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2195     if (rc) {
2196         trace_get_skeys_nonzero(rc);
2197         return 0;
2198     }
2199     return key;
2200 }
2201 
2202 /* set storage key extended */
2203 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2204 {
2205     static S390SKeysState *ss;
2206     static S390SKeysClass *skeyclass;
2207     uint64_t addr = wrap_address(env, r2);
2208     uint8_t key;
2209     int rc;
2210 
2211     addr = mmu_real2abs(env, addr);
2212     if (!mmu_absolute_addr_valid(addr, false)) {
2213         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2214     }
2215 
2216     if (unlikely(!ss)) {
2217         ss = s390_get_skeys_device();
2218         skeyclass = S390_SKEYS_GET_CLASS(ss);
2219         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2220             tlb_flush_all_cpus_synced(env_cpu(env));
2221         }
2222     }
2223 
2224     key = r1 & 0xfe;
2225     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2226     if (rc) {
2227         trace_set_skeys_nonzero(rc);
2228     }
2229    /*
2230     * As we can only flush by virtual address and not all the entries
2231     * that point to a physical address we have to flush the whole TLB.
2232     */
2233     tlb_flush_all_cpus_synced(env_cpu(env));
2234 }
2235 
2236 /* reset reference bit extended */
2237 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2238 {
2239     uint64_t addr = wrap_address(env, r2);
2240     static S390SKeysState *ss;
2241     static S390SKeysClass *skeyclass;
2242     uint8_t re, key;
2243     int rc;
2244 
2245     addr = mmu_real2abs(env, addr);
2246     if (!mmu_absolute_addr_valid(addr, false)) {
2247         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2248     }
2249 
2250     if (unlikely(!ss)) {
2251         ss = s390_get_skeys_device();
2252         skeyclass = S390_SKEYS_GET_CLASS(ss);
2253         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2254             tlb_flush_all_cpus_synced(env_cpu(env));
2255         }
2256     }
2257 
2258     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2259     if (rc) {
2260         trace_get_skeys_nonzero(rc);
2261         return 0;
2262     }
2263 
2264     re = key & (SK_R | SK_C);
2265     key &= ~SK_R;
2266 
2267     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2268     if (rc) {
2269         trace_set_skeys_nonzero(rc);
2270         return 0;
2271     }
2272    /*
2273     * As we can only flush by virtual address and not all the entries
2274     * that point to a physical address we have to flush the whole TLB.
2275     */
2276     tlb_flush_all_cpus_synced(env_cpu(env));
2277 
2278     /*
2279      * cc
2280      *
2281      * 0  Reference bit zero; change bit zero
2282      * 1  Reference bit zero; change bit one
2283      * 2  Reference bit one; change bit zero
2284      * 3  Reference bit one; change bit one
2285      */
2286 
2287     return re >> 1;
2288 }
2289 
2290 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2291 {
2292     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2293     S390Access srca, desta;
2294     uintptr_t ra = GETPC();
2295     int cc = 0;
2296 
2297     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2298                __func__, l, a1, a2);
2299 
2300     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2301         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2302         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2303     }
2304 
2305     l = wrap_length32(env, l);
2306     if (l > 256) {
2307         /* max 256 */
2308         l = 256;
2309         cc = 3;
2310     } else if (!l) {
2311         return cc;
2312     }
2313 
2314     /* TODO: Access key handling */
2315     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2316     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2317     access_memmove(env, &desta, &srca, ra);
2318     return cc;
2319 }
2320 
2321 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2322 {
2323     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2324     S390Access srca, desta;
2325     uintptr_t ra = GETPC();
2326     int cc = 0;
2327 
2328     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2329                __func__, l, a1, a2);
2330 
2331     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2332         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2333         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2334     }
2335 
2336     l = wrap_length32(env, l);
2337     if (l > 256) {
2338         /* max 256 */
2339         l = 256;
2340         cc = 3;
2341     } else if (!l) {
2342         return cc;
2343     }
2344 
2345     /* TODO: Access key handling */
2346     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2347     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2348     access_memmove(env, &desta, &srca, ra);
2349     return cc;
2350 }
2351 
2352 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2353 {
2354     CPUState *cs = env_cpu(env);
2355     const uintptr_t ra = GETPC();
2356     uint64_t table, entry, raddr;
2357     uint16_t entries, i, index = 0;
2358 
2359     if (r2 & 0xff000) {
2360         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2361     }
2362 
2363     if (!(r2 & 0x800)) {
2364         /* invalidation-and-clearing operation */
2365         table = r1 & ASCE_ORIGIN;
2366         entries = (r2 & 0x7ff) + 1;
2367 
2368         switch (r1 & ASCE_TYPE_MASK) {
2369         case ASCE_TYPE_REGION1:
2370             index = (r2 >> 53) & 0x7ff;
2371             break;
2372         case ASCE_TYPE_REGION2:
2373             index = (r2 >> 42) & 0x7ff;
2374             break;
2375         case ASCE_TYPE_REGION3:
2376             index = (r2 >> 31) & 0x7ff;
2377             break;
2378         case ASCE_TYPE_SEGMENT:
2379             index = (r2 >> 20) & 0x7ff;
2380             break;
2381         }
2382         for (i = 0; i < entries; i++) {
2383             /* addresses are not wrapped in 24/31bit mode but table index is */
2384             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2385             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2386             if (!(entry & REGION_ENTRY_I)) {
2387                 /* we are allowed to not store if already invalid */
2388                 entry |= REGION_ENTRY_I;
2389                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2390             }
2391         }
2392     }
2393 
2394     /* We simply flush the complete tlb, therefore we can ignore r3. */
2395     if (m4 & 1) {
2396         tlb_flush(cs);
2397     } else {
2398         tlb_flush_all_cpus_synced(cs);
2399     }
2400 }
2401 
2402 /* invalidate pte */
2403 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2404                   uint32_t m4)
2405 {
2406     CPUState *cs = env_cpu(env);
2407     const uintptr_t ra = GETPC();
2408     uint64_t page = vaddr & TARGET_PAGE_MASK;
2409     uint64_t pte_addr, pte;
2410 
2411     /* Compute the page table entry address */
2412     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2413     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2414 
2415     /* Mark the page table entry as invalid */
2416     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2417     pte |= PAGE_ENTRY_I;
2418     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2419 
2420     /* XXX we exploit the fact that Linux passes the exact virtual
2421        address here - it's not obliged to! */
2422     if (m4 & 1) {
2423         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2424             tlb_flush_page(cs, page);
2425             /* XXX 31-bit hack */
2426             tlb_flush_page(cs, page ^ 0x80000000);
2427         } else {
2428             /* looks like we don't have a valid virtual address */
2429             tlb_flush(cs);
2430         }
2431     } else {
2432         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2433             tlb_flush_page_all_cpus_synced(cs, page);
2434             /* XXX 31-bit hack */
2435             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2436         } else {
2437             /* looks like we don't have a valid virtual address */
2438             tlb_flush_all_cpus_synced(cs);
2439         }
2440     }
2441 }
2442 
2443 /* flush local tlb */
2444 void HELPER(ptlb)(CPUS390XState *env)
2445 {
2446     tlb_flush(env_cpu(env));
2447 }
2448 
2449 /* flush global tlb */
2450 void HELPER(purge)(CPUS390XState *env)
2451 {
2452     tlb_flush_all_cpus_synced(env_cpu(env));
2453 }
2454 
2455 /* load real address */
2456 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2457 {
2458     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2459     uint64_t ret, tec;
2460     int flags, exc, cc;
2461 
2462     /* XXX incomplete - has more corner cases */
2463     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2464         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2465     }
2466 
2467     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2468     if (exc) {
2469         cc = 3;
2470         ret = exc | 0x80000000;
2471     } else {
2472         cc = 0;
2473         ret |= addr & ~TARGET_PAGE_MASK;
2474     }
2475 
2476     env->cc_op = cc;
2477     return ret;
2478 }
2479 #endif
2480 
2481 /* load pair from quadword */
2482 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2483 {
2484     uintptr_t ra = GETPC();
2485     uint64_t hi, lo;
2486 
2487     check_alignment(env, addr, 16, ra);
2488     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2489     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2490 
2491     env->retxl = lo;
2492     return hi;
2493 }
2494 
2495 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2496 {
2497     uintptr_t ra = GETPC();
2498     uint64_t hi, lo;
2499     int mem_idx;
2500     TCGMemOpIdx oi;
2501     Int128 v;
2502 
2503     assert(HAVE_ATOMIC128);
2504 
2505     mem_idx = cpu_mmu_index(env, false);
2506     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2507     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2508     hi = int128_gethi(v);
2509     lo = int128_getlo(v);
2510 
2511     env->retxl = lo;
2512     return hi;
2513 }
2514 
2515 /* store pair to quadword */
2516 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2517                   uint64_t low, uint64_t high)
2518 {
2519     uintptr_t ra = GETPC();
2520 
2521     check_alignment(env, addr, 16, ra);
2522     cpu_stq_data_ra(env, addr + 0, high, ra);
2523     cpu_stq_data_ra(env, addr + 8, low, ra);
2524 }
2525 
2526 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2527                            uint64_t low, uint64_t high)
2528 {
2529     uintptr_t ra = GETPC();
2530     int mem_idx;
2531     TCGMemOpIdx oi;
2532     Int128 v;
2533 
2534     assert(HAVE_ATOMIC128);
2535 
2536     mem_idx = cpu_mmu_index(env, false);
2537     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2538     v = int128_make128(low, high);
2539     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2540 }
2541 
2542 /* Execute instruction.  This instruction executes an insn modified with
2543    the contents of r1.  It does not change the executed instruction in memory;
2544    it does not change the program counter.
2545 
2546    Perform this by recording the modified instruction in env->ex_value.
2547    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2548 */
2549 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2550 {
2551     uint64_t insn = cpu_lduw_code(env, addr);
2552     uint8_t opc = insn >> 8;
2553 
2554     /* Or in the contents of R1[56:63].  */
2555     insn |= r1 & 0xff;
2556 
2557     /* Load the rest of the instruction.  */
2558     insn <<= 48;
2559     switch (get_ilen(opc)) {
2560     case 2:
2561         break;
2562     case 4:
2563         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2564         break;
2565     case 6:
2566         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2567         break;
2568     default:
2569         g_assert_not_reached();
2570     }
2571 
2572     /* The very most common cases can be sped up by avoiding a new TB.  */
2573     if ((opc & 0xf0) == 0xd0) {
2574         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2575                                       uint64_t, uintptr_t);
2576         static const dx_helper dx[16] = {
2577             [0x0] = do_helper_trt_bkwd,
2578             [0x2] = do_helper_mvc,
2579             [0x4] = do_helper_nc,
2580             [0x5] = do_helper_clc,
2581             [0x6] = do_helper_oc,
2582             [0x7] = do_helper_xc,
2583             [0xc] = do_helper_tr,
2584             [0xd] = do_helper_trt_fwd,
2585         };
2586         dx_helper helper = dx[opc & 0xf];
2587 
2588         if (helper) {
2589             uint32_t l = extract64(insn, 48, 8);
2590             uint32_t b1 = extract64(insn, 44, 4);
2591             uint32_t d1 = extract64(insn, 32, 12);
2592             uint32_t b2 = extract64(insn, 28, 4);
2593             uint32_t d2 = extract64(insn, 16, 12);
2594             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2595             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2596 
2597             env->cc_op = helper(env, l, a1, a2, 0);
2598             env->psw.addr += ilen;
2599             return;
2600         }
2601     } else if (opc == 0x0a) {
2602         env->int_svc_code = extract64(insn, 48, 8);
2603         env->int_svc_ilen = ilen;
2604         helper_exception(env, EXCP_SVC);
2605         g_assert_not_reached();
2606     }
2607 
2608     /* Record the insn we want to execute as well as the ilen to use
2609        during the execution of the target insn.  This will also ensure
2610        that ex_value is non-zero, which flags that we are in a state
2611        that requires such execution.  */
2612     env->ex_value = insn | ilen;
2613 }
2614 
2615 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2616                        uint64_t len)
2617 {
2618     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2619     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2620     const uint64_t r0 = env->regs[0];
2621     const uintptr_t ra = GETPC();
2622     uint8_t dest_key, dest_as, dest_k, dest_a;
2623     uint8_t src_key, src_as, src_k, src_a;
2624     uint64_t val;
2625     int cc = 0;
2626 
2627     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2628                __func__, dest, src, len);
2629 
2630     if (!(env->psw.mask & PSW_MASK_DAT)) {
2631         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2632     }
2633 
2634     /* OAC (operand access control) for the first operand -> dest */
2635     val = (r0 & 0xffff0000ULL) >> 16;
2636     dest_key = (val >> 12) & 0xf;
2637     dest_as = (val >> 6) & 0x3;
2638     dest_k = (val >> 1) & 0x1;
2639     dest_a = val & 0x1;
2640 
2641     /* OAC (operand access control) for the second operand -> src */
2642     val = (r0 & 0x0000ffffULL);
2643     src_key = (val >> 12) & 0xf;
2644     src_as = (val >> 6) & 0x3;
2645     src_k = (val >> 1) & 0x1;
2646     src_a = val & 0x1;
2647 
2648     if (!dest_k) {
2649         dest_key = psw_key;
2650     }
2651     if (!src_k) {
2652         src_key = psw_key;
2653     }
2654     if (!dest_a) {
2655         dest_as = psw_as;
2656     }
2657     if (!src_a) {
2658         src_as = psw_as;
2659     }
2660 
2661     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2662         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2663     }
2664     if (!(env->cregs[0] & CR0_SECONDARY) &&
2665         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2666         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2667     }
2668     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2669         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2670     }
2671 
2672     len = wrap_length32(env, len);
2673     if (len > 4096) {
2674         cc = 3;
2675         len = 4096;
2676     }
2677 
2678     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2679     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2680         (env->psw.mask & PSW_MASK_PSTATE)) {
2681         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2682                       __func__);
2683         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2684     }
2685 
2686     /* FIXME: Access using correct keys and AR-mode */
2687     if (len) {
2688         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2689                                          mmu_idx_from_as(src_as), ra);
2690         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2691                                           mmu_idx_from_as(dest_as), ra);
2692 
2693         access_memmove(env, &desta, &srca, ra);
2694     }
2695 
2696     return cc;
2697 }
2698 
2699 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2700    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2701    value >= 0 indicates failure, and the CC value to be returned.  */
2702 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2703                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2704                                  uint32_t *ochar, uint32_t *olen);
2705 
2706 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2707    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2708    indicates failure, and the CC value to be returned.  */
2709 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2710                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2711                                  uint32_t *olen);
2712 
2713 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2714                        bool enh_check, uintptr_t ra,
2715                        uint32_t *ochar, uint32_t *olen)
2716 {
2717     uint8_t s0, s1, s2, s3;
2718     uint32_t c, l;
2719 
2720     if (ilen < 1) {
2721         return 0;
2722     }
2723     s0 = cpu_ldub_data_ra(env, addr, ra);
2724     if (s0 <= 0x7f) {
2725         /* one byte character */
2726         l = 1;
2727         c = s0;
2728     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2729         /* invalid character */
2730         return 2;
2731     } else if (s0 <= 0xdf) {
2732         /* two byte character */
2733         l = 2;
2734         if (ilen < 2) {
2735             return 0;
2736         }
2737         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2738         c = s0 & 0x1f;
2739         c = (c << 6) | (s1 & 0x3f);
2740         if (enh_check && (s1 & 0xc0) != 0x80) {
2741             return 2;
2742         }
2743     } else if (s0 <= 0xef) {
2744         /* three byte character */
2745         l = 3;
2746         if (ilen < 3) {
2747             return 0;
2748         }
2749         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2750         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2751         c = s0 & 0x0f;
2752         c = (c << 6) | (s1 & 0x3f);
2753         c = (c << 6) | (s2 & 0x3f);
2754         /* Fold the byte-by-byte range descriptions in the PoO into
2755            tests against the complete value.  It disallows encodings
2756            that could be smaller, and the UTF-16 surrogates.  */
2757         if (enh_check
2758             && ((s1 & 0xc0) != 0x80
2759                 || (s2 & 0xc0) != 0x80
2760                 || c < 0x1000
2761                 || (c >= 0xd800 && c <= 0xdfff))) {
2762             return 2;
2763         }
2764     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2765         /* four byte character */
2766         l = 4;
2767         if (ilen < 4) {
2768             return 0;
2769         }
2770         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2771         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2772         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2773         c = s0 & 0x07;
2774         c = (c << 6) | (s1 & 0x3f);
2775         c = (c << 6) | (s2 & 0x3f);
2776         c = (c << 6) | (s3 & 0x3f);
2777         /* See above.  */
2778         if (enh_check
2779             && ((s1 & 0xc0) != 0x80
2780                 || (s2 & 0xc0) != 0x80
2781                 || (s3 & 0xc0) != 0x80
2782                 || c < 0x010000
2783                 || c > 0x10ffff)) {
2784             return 2;
2785         }
2786     } else {
2787         /* invalid character */
2788         return 2;
2789     }
2790 
2791     *ochar = c;
2792     *olen = l;
2793     return -1;
2794 }
2795 
2796 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2797                         bool enh_check, uintptr_t ra,
2798                         uint32_t *ochar, uint32_t *olen)
2799 {
2800     uint16_t s0, s1;
2801     uint32_t c, l;
2802 
2803     if (ilen < 2) {
2804         return 0;
2805     }
2806     s0 = cpu_lduw_data_ra(env, addr, ra);
2807     if ((s0 & 0xfc00) != 0xd800) {
2808         /* one word character */
2809         l = 2;
2810         c = s0;
2811     } else {
2812         /* two word character */
2813         l = 4;
2814         if (ilen < 4) {
2815             return 0;
2816         }
2817         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2818         c = extract32(s0, 6, 4) + 1;
2819         c = (c << 6) | (s0 & 0x3f);
2820         c = (c << 10) | (s1 & 0x3ff);
2821         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2822             /* invalid surrogate character */
2823             return 2;
2824         }
2825     }
2826 
2827     *ochar = c;
2828     *olen = l;
2829     return -1;
2830 }
2831 
2832 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2833                         bool enh_check, uintptr_t ra,
2834                         uint32_t *ochar, uint32_t *olen)
2835 {
2836     uint32_t c;
2837 
2838     if (ilen < 4) {
2839         return 0;
2840     }
2841     c = cpu_ldl_data_ra(env, addr, ra);
2842     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2843         /* invalid unicode character */
2844         return 2;
2845     }
2846 
2847     *ochar = c;
2848     *olen = 4;
2849     return -1;
2850 }
2851 
2852 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2853                        uintptr_t ra, uint32_t c, uint32_t *olen)
2854 {
2855     uint8_t d[4];
2856     uint32_t l, i;
2857 
2858     if (c <= 0x7f) {
2859         /* one byte character */
2860         l = 1;
2861         d[0] = c;
2862     } else if (c <= 0x7ff) {
2863         /* two byte character */
2864         l = 2;
2865         d[1] = 0x80 | extract32(c, 0, 6);
2866         d[0] = 0xc0 | extract32(c, 6, 5);
2867     } else if (c <= 0xffff) {
2868         /* three byte character */
2869         l = 3;
2870         d[2] = 0x80 | extract32(c, 0, 6);
2871         d[1] = 0x80 | extract32(c, 6, 6);
2872         d[0] = 0xe0 | extract32(c, 12, 4);
2873     } else {
2874         /* four byte character */
2875         l = 4;
2876         d[3] = 0x80 | extract32(c, 0, 6);
2877         d[2] = 0x80 | extract32(c, 6, 6);
2878         d[1] = 0x80 | extract32(c, 12, 6);
2879         d[0] = 0xf0 | extract32(c, 18, 3);
2880     }
2881 
2882     if (ilen < l) {
2883         return 1;
2884     }
2885     for (i = 0; i < l; ++i) {
2886         cpu_stb_data_ra(env, addr + i, d[i], ra);
2887     }
2888 
2889     *olen = l;
2890     return -1;
2891 }
2892 
2893 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2894                         uintptr_t ra, uint32_t c, uint32_t *olen)
2895 {
2896     uint16_t d0, d1;
2897 
2898     if (c <= 0xffff) {
2899         /* one word character */
2900         if (ilen < 2) {
2901             return 1;
2902         }
2903         cpu_stw_data_ra(env, addr, c, ra);
2904         *olen = 2;
2905     } else {
2906         /* two word character */
2907         if (ilen < 4) {
2908             return 1;
2909         }
2910         d1 = 0xdc00 | extract32(c, 0, 10);
2911         d0 = 0xd800 | extract32(c, 10, 6);
2912         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2913         cpu_stw_data_ra(env, addr + 0, d0, ra);
2914         cpu_stw_data_ra(env, addr + 2, d1, ra);
2915         *olen = 4;
2916     }
2917 
2918     return -1;
2919 }
2920 
2921 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2922                         uintptr_t ra, uint32_t c, uint32_t *olen)
2923 {
2924     if (ilen < 4) {
2925         return 1;
2926     }
2927     cpu_stl_data_ra(env, addr, c, ra);
2928     *olen = 4;
2929     return -1;
2930 }
2931 
2932 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2933                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2934                                        decode_unicode_fn decode,
2935                                        encode_unicode_fn encode)
2936 {
2937     uint64_t dst = get_address(env, r1);
2938     uint64_t dlen = get_length(env, r1 + 1);
2939     uint64_t src = get_address(env, r2);
2940     uint64_t slen = get_length(env, r2 + 1);
2941     bool enh_check = m3 & 1;
2942     int cc, i;
2943 
2944     /* Lest we fail to service interrupts in a timely manner, limit the
2945        amount of work we're willing to do.  For now, let's cap at 256.  */
2946     for (i = 0; i < 256; ++i) {
2947         uint32_t c, ilen, olen;
2948 
2949         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2950         if (unlikely(cc >= 0)) {
2951             break;
2952         }
2953         cc = encode(env, dst, dlen, ra, c, &olen);
2954         if (unlikely(cc >= 0)) {
2955             break;
2956         }
2957 
2958         src += ilen;
2959         slen -= ilen;
2960         dst += olen;
2961         dlen -= olen;
2962         cc = 3;
2963     }
2964 
2965     set_address(env, r1, dst);
2966     set_length(env, r1 + 1, dlen);
2967     set_address(env, r2, src);
2968     set_length(env, r2 + 1, slen);
2969 
2970     return cc;
2971 }
2972 
2973 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2974 {
2975     return convert_unicode(env, r1, r2, m3, GETPC(),
2976                            decode_utf8, encode_utf16);
2977 }
2978 
2979 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2980 {
2981     return convert_unicode(env, r1, r2, m3, GETPC(),
2982                            decode_utf8, encode_utf32);
2983 }
2984 
2985 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2986 {
2987     return convert_unicode(env, r1, r2, m3, GETPC(),
2988                            decode_utf16, encode_utf8);
2989 }
2990 
2991 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2992 {
2993     return convert_unicode(env, r1, r2, m3, GETPC(),
2994                            decode_utf16, encode_utf32);
2995 }
2996 
2997 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2998 {
2999     return convert_unicode(env, r1, r2, m3, GETPC(),
3000                            decode_utf32, encode_utf8);
3001 }
3002 
3003 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3004 {
3005     return convert_unicode(env, r1, r2, m3, GETPC(),
3006                            decode_utf32, encode_utf16);
3007 }
3008 
3009 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3010                         uintptr_t ra)
3011 {
3012     /* test the actual access, not just any access to the page due to LAP */
3013     while (len) {
3014         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3015         const uint64_t curlen = MIN(pagelen, len);
3016 
3017         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3018         addr = wrap_address(env, addr + curlen);
3019         len -= curlen;
3020     }
3021 }
3022 
3023 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3024 {
3025     probe_write_access(env, addr, len, GETPC());
3026 }
3027