xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 90f9e35b)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x80 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     char *haddr1;
118     char *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
128      * pages, we might trigger a new MMU translation - very unlikely that
129      * the mapping changes in between and we would trigger a fault.
130      */
131     int mmu_idx;
132 } S390Access;
133 
134 /*
135  * With nonfault=1, return the PGM_ exception that would have been injected
136  * into the guest; return 0 if no exception was detected.
137  *
138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
140  */
141 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
142                              MMUAccessType access_type, int mmu_idx,
143                              bool nonfault, void **phost, uintptr_t ra)
144 {
145 #if defined(CONFIG_USER_ONLY)
146     return probe_access_flags(env, addr, access_type, mmu_idx,
147                               nonfault, phost, ra);
148 #else
149     int flags;
150 
151     /*
152      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
153      * to detect if there was an exception during tlb_fill().
154      */
155     env->tlb_fill_exc = 0;
156     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
157                                ra);
158     if (env->tlb_fill_exc) {
159         return env->tlb_fill_exc;
160     }
161 
162     if (unlikely(flags & TLB_WATCHPOINT)) {
163         /* S390 does not presently use transaction attributes. */
164         cpu_check_watchpoint(env_cpu(env), addr, size,
165                              MEMTXATTRS_UNSPECIFIED,
166                              (access_type == MMU_DATA_STORE
167                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
168     }
169     return 0;
170 #endif
171 }
172 
173 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
174                              bool nonfault, vaddr vaddr1, int size,
175                              MMUAccessType access_type,
176                              int mmu_idx, uintptr_t ra)
177 {
178     void *haddr1, *haddr2 = NULL;
179     int size1, size2, exc;
180     vaddr vaddr2 = 0;
181 
182     assert(size > 0 && size <= 4096);
183 
184     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
185     size2 = size - size1;
186 
187     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
188                             &haddr1, ra);
189     if (exc) {
190         return exc;
191     }
192     if (unlikely(size2)) {
193         /* The access crosses page boundaries. */
194         vaddr2 = wrap_address(env, vaddr1 + size1);
195         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
196                                 nonfault, &haddr2, ra);
197         if (exc) {
198             return exc;
199         }
200     }
201 
202     *access = (S390Access) {
203         .vaddr1 = vaddr1,
204         .vaddr2 = vaddr2,
205         .haddr1 = haddr1,
206         .haddr2 = haddr2,
207         .size1 = size1,
208         .size2 = size2,
209         .mmu_idx = mmu_idx
210     };
211     return 0;
212 }
213 
214 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
215                                  MMUAccessType access_type, int mmu_idx,
216                                  uintptr_t ra)
217 {
218     S390Access ret;
219     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
220                                 access_type, mmu_idx, ra);
221     assert(!exc);
222     return ret;
223 }
224 
225 /* Helper to handle memset on a single page. */
226 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
227                              uint8_t byte, uint16_t size, int mmu_idx,
228                              uintptr_t ra)
229 {
230 #ifdef CONFIG_USER_ONLY
231     g_assert(haddr);
232     memset(haddr, byte, size);
233 #else
234     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235     int i;
236 
237     if (likely(haddr)) {
238         memset(haddr, byte, size);
239     } else {
240         /*
241          * Do a single access and test if we can then get access to the
242          * page. This is especially relevant to speed up TLB_NOTDIRTY.
243          */
244         g_assert(size > 0);
245         cpu_stb_mmu(env, vaddr, byte, oi, ra);
246         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
247         if (likely(haddr)) {
248             memset(haddr + 1, byte, size - 1);
249         } else {
250             for (i = 1; i < size; i++) {
251                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
252             }
253         }
254     }
255 #endif
256 }
257 
258 static void access_memset(CPUS390XState *env, S390Access *desta,
259                           uint8_t byte, uintptr_t ra)
260 {
261 
262     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
263                      desta->mmu_idx, ra);
264     if (likely(!desta->size2)) {
265         return;
266     }
267     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
268                      desta->mmu_idx, ra);
269 }
270 
271 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
272                                   int offset, int mmu_idx, uintptr_t ra)
273 {
274 #ifdef CONFIG_USER_ONLY
275     return ldub_p(*haddr + offset);
276 #else
277     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
278     uint8_t byte;
279 
280     if (likely(*haddr)) {
281         return ldub_p(*haddr + offset);
282     }
283     /*
284      * Do a single access and test if we can then get access to the
285      * page. This is especially relevant to speed up TLB_NOTDIRTY.
286      */
287     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
288     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
289     return byte;
290 #endif
291 }
292 
293 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
294                                int offset, uintptr_t ra)
295 {
296     if (offset < access->size1) {
297         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
298                                   offset, access->mmu_idx, ra);
299     }
300     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
301                               offset - access->size1, access->mmu_idx, ra);
302 }
303 
304 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
305                                int offset, uint8_t byte, int mmu_idx,
306                                uintptr_t ra)
307 {
308 #ifdef CONFIG_USER_ONLY
309     stb_p(*haddr + offset, byte);
310 #else
311     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
312 
313     if (likely(*haddr)) {
314         stb_p(*haddr + offset, byte);
315         return;
316     }
317     /*
318      * Do a single access and test if we can then get access to the
319      * page. This is especially relevant to speed up TLB_NOTDIRTY.
320      */
321     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
322     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
323 #endif
324 }
325 
326 static void access_set_byte(CPUS390XState *env, S390Access *access,
327                             int offset, uint8_t byte, uintptr_t ra)
328 {
329     if (offset < access->size1) {
330         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
331                            access->mmu_idx, ra);
332     } else {
333         do_access_set_byte(env, access->vaddr2, &access->haddr2,
334                            offset - access->size1, byte, access->mmu_idx, ra);
335     }
336 }
337 
338 /*
339  * Move data with the same semantics as memmove() in case ranges don't overlap
340  * or src > dest. Undefined behavior on destructive overlaps.
341  */
342 static void access_memmove(CPUS390XState *env, S390Access *desta,
343                            S390Access *srca, uintptr_t ra)
344 {
345     int diff;
346 
347     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
348 
349     /* Fallback to slow access in case we don't have access to all host pages */
350     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
351                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
352         int i;
353 
354         for (i = 0; i < desta->size1 + desta->size2; i++) {
355             uint8_t byte = access_get_byte(env, srca, i, ra);
356 
357             access_set_byte(env, desta, i, byte, ra);
358         }
359         return;
360     }
361 
362     if (srca->size1 == desta->size1) {
363         memmove(desta->haddr1, srca->haddr1, srca->size1);
364         if (unlikely(srca->size2)) {
365             memmove(desta->haddr2, srca->haddr2, srca->size2);
366         }
367     } else if (srca->size1 < desta->size1) {
368         diff = desta->size1 - srca->size1;
369         memmove(desta->haddr1, srca->haddr1, srca->size1);
370         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
371         if (likely(desta->size2)) {
372             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
373         }
374     } else {
375         diff = srca->size1 - desta->size1;
376         memmove(desta->haddr1, srca->haddr1, desta->size1);
377         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
378         if (likely(srca->size2)) {
379             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
380         }
381     }
382 }
383 
384 static int mmu_idx_from_as(uint8_t as)
385 {
386     switch (as) {
387     case AS_PRIMARY:
388         return MMU_PRIMARY_IDX;
389     case AS_SECONDARY:
390         return MMU_SECONDARY_IDX;
391     case AS_HOME:
392         return MMU_HOME_IDX;
393     default:
394         /* FIXME AS_ACCREG */
395         g_assert_not_reached();
396     }
397 }
398 
399 /* and on array */
400 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
401                              uint64_t src, uintptr_t ra)
402 {
403     const int mmu_idx = cpu_mmu_index(env, false);
404     S390Access srca1, srca2, desta;
405     uint32_t i;
406     uint8_t c = 0;
407 
408     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
409                __func__, l, dest, src);
410 
411     /* NC always processes one more byte than specified - maximum is 256 */
412     l++;
413 
414     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
415     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
416     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
417     for (i = 0; i < l; i++) {
418         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
419                           access_get_byte(env, &srca2, i, ra);
420 
421         c |= x;
422         access_set_byte(env, &desta, i, x, ra);
423     }
424     return c != 0;
425 }
426 
427 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
428                     uint64_t src)
429 {
430     return do_helper_nc(env, l, dest, src, GETPC());
431 }
432 
433 /* xor on array */
434 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
435                              uint64_t src, uintptr_t ra)
436 {
437     const int mmu_idx = cpu_mmu_index(env, false);
438     S390Access srca1, srca2, desta;
439     uint32_t i;
440     uint8_t c = 0;
441 
442     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
443                __func__, l, dest, src);
444 
445     /* XC always processes one more byte than specified - maximum is 256 */
446     l++;
447 
448     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
449     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
450     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
451 
452     /* xor with itself is the same as memset(0) */
453     if (src == dest) {
454         access_memset(env, &desta, 0, ra);
455         return 0;
456     }
457 
458     for (i = 0; i < l; i++) {
459         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
460                           access_get_byte(env, &srca2, i, ra);
461 
462         c |= x;
463         access_set_byte(env, &desta, i, x, ra);
464     }
465     return c != 0;
466 }
467 
468 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
469                     uint64_t src)
470 {
471     return do_helper_xc(env, l, dest, src, GETPC());
472 }
473 
474 /* or on array */
475 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
476                              uint64_t src, uintptr_t ra)
477 {
478     const int mmu_idx = cpu_mmu_index(env, false);
479     S390Access srca1, srca2, desta;
480     uint32_t i;
481     uint8_t c = 0;
482 
483     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
484                __func__, l, dest, src);
485 
486     /* OC always processes one more byte than specified - maximum is 256 */
487     l++;
488 
489     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
490     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
491     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
492     for (i = 0; i < l; i++) {
493         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
494                           access_get_byte(env, &srca2, i, ra);
495 
496         c |= x;
497         access_set_byte(env, &desta, i, x, ra);
498     }
499     return c != 0;
500 }
501 
502 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
503                     uint64_t src)
504 {
505     return do_helper_oc(env, l, dest, src, GETPC());
506 }
507 
508 /* memmove */
509 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
510                               uint64_t src, uintptr_t ra)
511 {
512     const int mmu_idx = cpu_mmu_index(env, false);
513     S390Access srca, desta;
514     uint32_t i;
515 
516     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
517                __func__, l, dest, src);
518 
519     /* MVC always copies one more byte than specified - maximum is 256 */
520     l++;
521 
522     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
523     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
524 
525     /*
526      * "When the operands overlap, the result is obtained as if the operands
527      * were processed one byte at a time". Only non-destructive overlaps
528      * behave like memmove().
529      */
530     if (dest == src + 1) {
531         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
532     } else if (!is_destructive_overlap(env, dest, src, l)) {
533         access_memmove(env, &desta, &srca, ra);
534     } else {
535         for (i = 0; i < l; i++) {
536             uint8_t byte = access_get_byte(env, &srca, i, ra);
537 
538             access_set_byte(env, &desta, i, byte, ra);
539         }
540     }
541 
542     return env->cc_op;
543 }
544 
545 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
546 {
547     do_helper_mvc(env, l, dest, src, GETPC());
548 }
549 
550 /* move right to left */
551 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
552 {
553     const int mmu_idx = cpu_mmu_index(env, false);
554     const uint64_t ra = GETPC();
555     S390Access srca, desta;
556     int32_t i;
557 
558     /* MVCRL always copies one more byte than specified - maximum is 256 */
559     l++;
560 
561     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
562     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
563 
564     for (i = l - 1; i >= 0; i--) {
565         uint8_t byte = access_get_byte(env, &srca, i, ra);
566         access_set_byte(env, &desta, i, byte, ra);
567     }
568 }
569 
570 /* move inverse  */
571 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
572 {
573     const int mmu_idx = cpu_mmu_index(env, false);
574     S390Access srca, desta;
575     uintptr_t ra = GETPC();
576     int i;
577 
578     /* MVCIN always copies one more byte than specified - maximum is 256 */
579     l++;
580 
581     src = wrap_address(env, src - l + 1);
582     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
583     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
584     for (i = 0; i < l; i++) {
585         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
586 
587         access_set_byte(env, &desta, i, x, ra);
588     }
589 }
590 
591 /* move numerics  */
592 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
593 {
594     const int mmu_idx = cpu_mmu_index(env, false);
595     S390Access srca1, srca2, desta;
596     uintptr_t ra = GETPC();
597     int i;
598 
599     /* MVN always copies one more byte than specified - maximum is 256 */
600     l++;
601 
602     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
603     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
604     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
605     for (i = 0; i < l; i++) {
606         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
607                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
608 
609         access_set_byte(env, &desta, i, x, ra);
610     }
611 }
612 
613 /* move with offset  */
614 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
615 {
616     const int mmu_idx = cpu_mmu_index(env, false);
617     /* MVO always processes one more byte than specified - maximum is 16 */
618     const int len_dest = (l >> 4) + 1;
619     const int len_src = (l & 0xf) + 1;
620     uintptr_t ra = GETPC();
621     uint8_t byte_dest, byte_src;
622     S390Access srca, desta;
623     int i, j;
624 
625     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
626     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
627 
628     /* Handle rightmost byte */
629     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
630     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
631     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
632     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
633 
634     /* Process remaining bytes from right to left */
635     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
636         byte_dest = byte_src >> 4;
637         if (j >= 0) {
638             byte_src = access_get_byte(env, &srca, j, ra);
639         } else {
640             byte_src = 0;
641         }
642         byte_dest |= byte_src << 4;
643         access_set_byte(env, &desta, i, byte_dest, ra);
644     }
645 }
646 
647 /* move zones  */
648 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
649 {
650     const int mmu_idx = cpu_mmu_index(env, false);
651     S390Access srca1, srca2, desta;
652     uintptr_t ra = GETPC();
653     int i;
654 
655     /* MVZ always copies one more byte than specified - maximum is 256 */
656     l++;
657 
658     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
659     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
660     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
661     for (i = 0; i < l; i++) {
662         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
663                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
664 
665         access_set_byte(env, &desta, i, x, ra);
666     }
667 }
668 
669 /* compare unsigned byte arrays */
670 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
671                               uint64_t s2, uintptr_t ra)
672 {
673     uint32_t i;
674     uint32_t cc = 0;
675 
676     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
677                __func__, l, s1, s2);
678 
679     for (i = 0; i <= l; i++) {
680         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
681         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
682         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
683         if (x < y) {
684             cc = 1;
685             break;
686         } else if (x > y) {
687             cc = 2;
688             break;
689         }
690     }
691 
692     HELPER_LOG("\n");
693     return cc;
694 }
695 
696 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
697 {
698     return do_helper_clc(env, l, s1, s2, GETPC());
699 }
700 
701 /* compare logical under mask */
702 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
703                      uint64_t addr)
704 {
705     uintptr_t ra = GETPC();
706     uint32_t cc = 0;
707 
708     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
709                mask, addr);
710 
711     while (mask) {
712         if (mask & 8) {
713             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
714             uint8_t r = extract32(r1, 24, 8);
715             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
716                        addr);
717             if (r < d) {
718                 cc = 1;
719                 break;
720             } else if (r > d) {
721                 cc = 2;
722                 break;
723             }
724             addr++;
725         }
726         mask = (mask << 1) & 0xf;
727         r1 <<= 8;
728     }
729 
730     HELPER_LOG("\n");
731     return cc;
732 }
733 
734 static inline uint64_t get_address(CPUS390XState *env, int reg)
735 {
736     return wrap_address(env, env->regs[reg]);
737 }
738 
739 /*
740  * Store the address to the given register, zeroing out unused leftmost
741  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
742  */
743 static inline void set_address_zero(CPUS390XState *env, int reg,
744                                     uint64_t address)
745 {
746     if (env->psw.mask & PSW_MASK_64) {
747         env->regs[reg] = address;
748     } else {
749         if (!(env->psw.mask & PSW_MASK_32)) {
750             address &= 0x00ffffff;
751         } else {
752             address &= 0x7fffffff;
753         }
754         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
755     }
756 }
757 
758 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
759 {
760     if (env->psw.mask & PSW_MASK_64) {
761         /* 64-Bit mode */
762         env->regs[reg] = address;
763     } else {
764         if (!(env->psw.mask & PSW_MASK_32)) {
765             /* 24-Bit mode. According to the PoO it is implementation
766             dependent if bits 32-39 remain unchanged or are set to
767             zeros.  Choose the former so that the function can also be
768             used for TRT.  */
769             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
770         } else {
771             /* 31-Bit mode. According to the PoO it is implementation
772             dependent if bit 32 remains unchanged or is set to zero.
773             Choose the latter so that the function can also be used for
774             TRT.  */
775             address &= 0x7fffffff;
776             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
777         }
778     }
779 }
780 
781 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
782 {
783     if (!(env->psw.mask & PSW_MASK_64)) {
784         return (uint32_t)length;
785     }
786     return length;
787 }
788 
789 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
790 {
791     if (!(env->psw.mask & PSW_MASK_64)) {
792         /* 24-Bit and 31-Bit mode */
793         length &= 0x7fffffff;
794     }
795     return length;
796 }
797 
798 static inline uint64_t get_length(CPUS390XState *env, int reg)
799 {
800     return wrap_length31(env, env->regs[reg]);
801 }
802 
803 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
804 {
805     if (env->psw.mask & PSW_MASK_64) {
806         /* 64-Bit mode */
807         env->regs[reg] = length;
808     } else {
809         /* 24-Bit and 31-Bit mode */
810         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
811     }
812 }
813 
814 /* search string (c is byte to search, r2 is string, r1 end of string) */
815 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
816 {
817     uintptr_t ra = GETPC();
818     uint64_t end, str;
819     uint32_t len;
820     uint8_t v, c = env->regs[0];
821 
822     /* Bits 32-55 must contain all 0.  */
823     if (env->regs[0] & 0xffffff00u) {
824         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
825     }
826 
827     str = get_address(env, r2);
828     end = get_address(env, r1);
829 
830     /* Lest we fail to service interrupts in a timely manner, limit the
831        amount of work we're willing to do.  For now, let's cap at 8k.  */
832     for (len = 0; len < 0x2000; ++len) {
833         if (str + len == end) {
834             /* Character not found.  R1 & R2 are unmodified.  */
835             env->cc_op = 2;
836             return;
837         }
838         v = cpu_ldub_data_ra(env, str + len, ra);
839         if (v == c) {
840             /* Character found.  Set R1 to the location; R2 is unmodified.  */
841             env->cc_op = 1;
842             set_address(env, r1, str + len);
843             return;
844         }
845     }
846 
847     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
848     env->cc_op = 3;
849     set_address(env, r2, str + len);
850 }
851 
852 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
853 {
854     uintptr_t ra = GETPC();
855     uint32_t len;
856     uint16_t v, c = env->regs[0];
857     uint64_t end, str, adj_end;
858 
859     /* Bits 32-47 of R0 must be zero.  */
860     if (env->regs[0] & 0xffff0000u) {
861         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
862     }
863 
864     str = get_address(env, r2);
865     end = get_address(env, r1);
866 
867     /* If the LSB of the two addresses differ, use one extra byte.  */
868     adj_end = end + ((str ^ end) & 1);
869 
870     /* Lest we fail to service interrupts in a timely manner, limit the
871        amount of work we're willing to do.  For now, let's cap at 8k.  */
872     for (len = 0; len < 0x2000; len += 2) {
873         if (str + len == adj_end) {
874             /* End of input found.  */
875             env->cc_op = 2;
876             return;
877         }
878         v = cpu_lduw_data_ra(env, str + len, ra);
879         if (v == c) {
880             /* Character found.  Set R1 to the location; R2 is unmodified.  */
881             env->cc_op = 1;
882             set_address(env, r1, str + len);
883             return;
884         }
885     }
886 
887     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
888     env->cc_op = 3;
889     set_address(env, r2, str + len);
890 }
891 
892 /* unsigned string compare (c is string terminator) */
893 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
894 {
895     uintptr_t ra = GETPC();
896     uint32_t len;
897 
898     c = c & 0xff;
899     s1 = wrap_address(env, s1);
900     s2 = wrap_address(env, s2);
901 
902     /* Lest we fail to service interrupts in a timely manner, limit the
903        amount of work we're willing to do.  For now, let's cap at 8k.  */
904     for (len = 0; len < 0x2000; ++len) {
905         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
906         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
907         if (v1 == v2) {
908             if (v1 == c) {
909                 /* Equal.  CC=0, and don't advance the registers.  */
910                 env->cc_op = 0;
911                 env->retxl = s2;
912                 return s1;
913             }
914         } else {
915             /* Unequal.  CC={1,2}, and advance the registers.  Note that
916                the terminator need not be zero, but the string that contains
917                the terminator is by definition "low".  */
918             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
919             env->retxl = s2 + len;
920             return s1 + len;
921         }
922     }
923 
924     /* CPU-determined bytes equal; advance the registers.  */
925     env->cc_op = 3;
926     env->retxl = s2 + len;
927     return s1 + len;
928 }
929 
930 /* move page */
931 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
932 {
933     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
934     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
935     const int mmu_idx = cpu_mmu_index(env, false);
936     const bool f = extract64(r0, 11, 1);
937     const bool s = extract64(r0, 10, 1);
938     const bool cco = extract64(r0, 8, 1);
939     uintptr_t ra = GETPC();
940     S390Access srca, desta;
941     int exc;
942 
943     if ((f && s) || extract64(r0, 12, 4)) {
944         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
945     }
946 
947     /*
948      * We always manually handle exceptions such that we can properly store
949      * r1/r2 to the lowcore on page-translation exceptions.
950      *
951      * TODO: Access key handling
952      */
953     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
954                             MMU_DATA_LOAD, mmu_idx, ra);
955     if (exc) {
956         if (cco) {
957             return 2;
958         }
959         goto inject_exc;
960     }
961     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
962                             MMU_DATA_STORE, mmu_idx, ra);
963     if (exc) {
964         if (cco && exc != PGM_PROTECTION) {
965             return 1;
966         }
967         goto inject_exc;
968     }
969     access_memmove(env, &desta, &srca, ra);
970     return 0; /* data moved */
971 inject_exc:
972 #if !defined(CONFIG_USER_ONLY)
973     if (exc != PGM_ADDRESSING) {
974         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
975                  env->tlb_fill_tec);
976     }
977     if (exc == PGM_PAGE_TRANS) {
978         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
979                  r1 << 4 | r2);
980     }
981 #endif
982     tcg_s390_program_interrupt(env, exc, ra);
983 }
984 
985 /* string copy */
986 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
987 {
988     const int mmu_idx = cpu_mmu_index(env, false);
989     const uint64_t d = get_address(env, r1);
990     const uint64_t s = get_address(env, r2);
991     const uint8_t c = env->regs[0];
992     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
993     S390Access srca, desta;
994     uintptr_t ra = GETPC();
995     int i;
996 
997     if (env->regs[0] & 0xffffff00ull) {
998         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
999     }
1000 
1001     /*
1002      * Our access should not exceed single pages, as we must not report access
1003      * exceptions exceeding the actually copied range (which we don't know at
1004      * this point). We might over-indicate watchpoints within the pages
1005      * (if we ever care, we have to limit processing to a single byte).
1006      */
1007     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
1008     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
1009     for (i = 0; i < len; i++) {
1010         const uint8_t v = access_get_byte(env, &srca, i, ra);
1011 
1012         access_set_byte(env, &desta, i, v, ra);
1013         if (v == c) {
1014             set_address_zero(env, r1, d + i);
1015             return 1;
1016         }
1017     }
1018     set_address_zero(env, r1, d + len);
1019     set_address_zero(env, r2, s + len);
1020     return 3;
1021 }
1022 
1023 /* load access registers r1 to r3 from memory at a2 */
1024 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1025 {
1026     uintptr_t ra = GETPC();
1027     int i;
1028 
1029     if (a2 & 0x3) {
1030         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1031     }
1032 
1033     for (i = r1;; i = (i + 1) % 16) {
1034         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1035         a2 += 4;
1036 
1037         if (i == r3) {
1038             break;
1039         }
1040     }
1041 }
1042 
1043 /* store access registers r1 to r3 in memory at a2 */
1044 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1045 {
1046     uintptr_t ra = GETPC();
1047     int i;
1048 
1049     if (a2 & 0x3) {
1050         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1051     }
1052 
1053     for (i = r1;; i = (i + 1) % 16) {
1054         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1055         a2 += 4;
1056 
1057         if (i == r3) {
1058             break;
1059         }
1060     }
1061 }
1062 
1063 /* move long helper */
1064 static inline uint32_t do_mvcl(CPUS390XState *env,
1065                                uint64_t *dest, uint64_t *destlen,
1066                                uint64_t *src, uint64_t *srclen,
1067                                uint16_t pad, int wordsize, uintptr_t ra)
1068 {
1069     const int mmu_idx = cpu_mmu_index(env, false);
1070     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1071     S390Access srca, desta;
1072     int i, cc;
1073 
1074     if (*destlen == *srclen) {
1075         cc = 0;
1076     } else if (*destlen < *srclen) {
1077         cc = 1;
1078     } else {
1079         cc = 2;
1080     }
1081 
1082     if (!*destlen) {
1083         return cc;
1084     }
1085 
1086     /*
1087      * Only perform one type of type of operation (move/pad) at a time.
1088      * Stay within single pages.
1089      */
1090     if (*srclen) {
1091         /* Copy the src array */
1092         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1093         *destlen -= len;
1094         *srclen -= len;
1095         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1096         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1097         access_memmove(env, &desta, &srca, ra);
1098         *src = wrap_address(env, *src + len);
1099         *dest = wrap_address(env, *dest + len);
1100     } else if (wordsize == 1) {
1101         /* Pad the remaining area */
1102         *destlen -= len;
1103         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1104         access_memset(env, &desta, pad, ra);
1105         *dest = wrap_address(env, *dest + len);
1106     } else {
1107         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1108 
1109         /* The remaining length selects the padding byte. */
1110         for (i = 0; i < len; (*destlen)--, i++) {
1111             if (*destlen & 1) {
1112                 access_set_byte(env, &desta, i, pad, ra);
1113             } else {
1114                 access_set_byte(env, &desta, i, pad >> 8, ra);
1115             }
1116         }
1117         *dest = wrap_address(env, *dest + len);
1118     }
1119 
1120     return *destlen ? 3 : cc;
1121 }
1122 
1123 /* move long */
1124 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1125 {
1126     const int mmu_idx = cpu_mmu_index(env, false);
1127     uintptr_t ra = GETPC();
1128     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1129     uint64_t dest = get_address(env, r1);
1130     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1131     uint64_t src = get_address(env, r2);
1132     uint8_t pad = env->regs[r2 + 1] >> 24;
1133     CPUState *cs = env_cpu(env);
1134     S390Access srca, desta;
1135     uint32_t cc, cur_len;
1136 
1137     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1138         cc = 3;
1139     } else if (srclen == destlen) {
1140         cc = 0;
1141     } else if (destlen < srclen) {
1142         cc = 1;
1143     } else {
1144         cc = 2;
1145     }
1146 
1147     /* We might have to zero-out some bits even if there was no action. */
1148     if (unlikely(!destlen || cc == 3)) {
1149         set_address_zero(env, r2, src);
1150         set_address_zero(env, r1, dest);
1151         return cc;
1152     } else if (!srclen) {
1153         set_address_zero(env, r2, src);
1154     }
1155 
1156     /*
1157      * Only perform one type of type of operation (move/pad) in one step.
1158      * Stay within single pages.
1159      */
1160     while (destlen) {
1161         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1162         if (!srclen) {
1163             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1164                                    ra);
1165             access_memset(env, &desta, pad, ra);
1166         } else {
1167             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1168 
1169             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1170                                   ra);
1171             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1172                                    ra);
1173             access_memmove(env, &desta, &srca, ra);
1174             src = wrap_address(env, src + cur_len);
1175             srclen -= cur_len;
1176             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1177             set_address_zero(env, r2, src);
1178         }
1179         dest = wrap_address(env, dest + cur_len);
1180         destlen -= cur_len;
1181         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1182         set_address_zero(env, r1, dest);
1183 
1184         /*
1185          * MVCL is interruptible. Return to the main loop if requested after
1186          * writing back all state to registers. If no interrupt will get
1187          * injected, we'll end up back in this handler and continue processing
1188          * the remaining parts.
1189          */
1190         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1191             cpu_loop_exit_restore(cs, ra);
1192         }
1193     }
1194     return cc;
1195 }
1196 
1197 /* move long extended */
1198 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1199                        uint32_t r3)
1200 {
1201     uintptr_t ra = GETPC();
1202     uint64_t destlen = get_length(env, r1 + 1);
1203     uint64_t dest = get_address(env, r1);
1204     uint64_t srclen = get_length(env, r3 + 1);
1205     uint64_t src = get_address(env, r3);
1206     uint8_t pad = a2;
1207     uint32_t cc;
1208 
1209     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1210 
1211     set_length(env, r1 + 1, destlen);
1212     set_length(env, r3 + 1, srclen);
1213     set_address(env, r1, dest);
1214     set_address(env, r3, src);
1215 
1216     return cc;
1217 }
1218 
1219 /* move long unicode */
1220 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1221                        uint32_t r3)
1222 {
1223     uintptr_t ra = GETPC();
1224     uint64_t destlen = get_length(env, r1 + 1);
1225     uint64_t dest = get_address(env, r1);
1226     uint64_t srclen = get_length(env, r3 + 1);
1227     uint64_t src = get_address(env, r3);
1228     uint16_t pad = a2;
1229     uint32_t cc;
1230 
1231     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1232 
1233     set_length(env, r1 + 1, destlen);
1234     set_length(env, r3 + 1, srclen);
1235     set_address(env, r1, dest);
1236     set_address(env, r3, src);
1237 
1238     return cc;
1239 }
1240 
1241 /* compare logical long helper */
1242 static inline uint32_t do_clcl(CPUS390XState *env,
1243                                uint64_t *src1, uint64_t *src1len,
1244                                uint64_t *src3, uint64_t *src3len,
1245                                uint16_t pad, uint64_t limit,
1246                                int wordsize, uintptr_t ra)
1247 {
1248     uint64_t len = MAX(*src1len, *src3len);
1249     uint32_t cc = 0;
1250 
1251     check_alignment(env, *src1len | *src3len, wordsize, ra);
1252 
1253     if (!len) {
1254         return cc;
1255     }
1256 
1257     /* Lest we fail to service interrupts in a timely manner, limit the
1258        amount of work we're willing to do.  */
1259     if (len > limit) {
1260         len = limit;
1261         cc = 3;
1262     }
1263 
1264     for (; len; len -= wordsize) {
1265         uint16_t v1 = pad;
1266         uint16_t v3 = pad;
1267 
1268         if (*src1len) {
1269             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1270         }
1271         if (*src3len) {
1272             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1273         }
1274 
1275         if (v1 != v3) {
1276             cc = (v1 < v3) ? 1 : 2;
1277             break;
1278         }
1279 
1280         if (*src1len) {
1281             *src1 += wordsize;
1282             *src1len -= wordsize;
1283         }
1284         if (*src3len) {
1285             *src3 += wordsize;
1286             *src3len -= wordsize;
1287         }
1288     }
1289 
1290     return cc;
1291 }
1292 
1293 
1294 /* compare logical long */
1295 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1296 {
1297     uintptr_t ra = GETPC();
1298     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1299     uint64_t src1 = get_address(env, r1);
1300     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1301     uint64_t src3 = get_address(env, r2);
1302     uint8_t pad = env->regs[r2 + 1] >> 24;
1303     uint32_t cc;
1304 
1305     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1306 
1307     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1308     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1309     set_address(env, r1, src1);
1310     set_address(env, r2, src3);
1311 
1312     return cc;
1313 }
1314 
1315 /* compare logical long extended memcompare insn with padding */
1316 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1317                        uint32_t r3)
1318 {
1319     uintptr_t ra = GETPC();
1320     uint64_t src1len = get_length(env, r1 + 1);
1321     uint64_t src1 = get_address(env, r1);
1322     uint64_t src3len = get_length(env, r3 + 1);
1323     uint64_t src3 = get_address(env, r3);
1324     uint8_t pad = a2;
1325     uint32_t cc;
1326 
1327     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1328 
1329     set_length(env, r1 + 1, src1len);
1330     set_length(env, r3 + 1, src3len);
1331     set_address(env, r1, src1);
1332     set_address(env, r3, src3);
1333 
1334     return cc;
1335 }
1336 
1337 /* compare logical long unicode memcompare insn with padding */
1338 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1339                        uint32_t r3)
1340 {
1341     uintptr_t ra = GETPC();
1342     uint64_t src1len = get_length(env, r1 + 1);
1343     uint64_t src1 = get_address(env, r1);
1344     uint64_t src3len = get_length(env, r3 + 1);
1345     uint64_t src3 = get_address(env, r3);
1346     uint16_t pad = a2;
1347     uint32_t cc = 0;
1348 
1349     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1350 
1351     set_length(env, r1 + 1, src1len);
1352     set_length(env, r3 + 1, src3len);
1353     set_address(env, r1, src1);
1354     set_address(env, r3, src3);
1355 
1356     return cc;
1357 }
1358 
1359 /* checksum */
1360 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1361                       uint64_t src, uint64_t src_len)
1362 {
1363     uintptr_t ra = GETPC();
1364     uint64_t max_len, len;
1365     uint64_t cksm = (uint32_t)r1;
1366 
1367     /* Lest we fail to service interrupts in a timely manner, limit the
1368        amount of work we're willing to do.  For now, let's cap at 8k.  */
1369     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1370 
1371     /* Process full words as available.  */
1372     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1373         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1374     }
1375 
1376     switch (max_len - len) {
1377     case 1:
1378         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1379         len += 1;
1380         break;
1381     case 2:
1382         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1383         len += 2;
1384         break;
1385     case 3:
1386         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1387         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1388         len += 3;
1389         break;
1390     }
1391 
1392     /* Fold the carry from the checksum.  Note that we can see carry-out
1393        during folding more than once (but probably not more than twice).  */
1394     while (cksm > 0xffffffffull) {
1395         cksm = (uint32_t)cksm + (cksm >> 32);
1396     }
1397 
1398     /* Indicate whether or not we've processed everything.  */
1399     env->cc_op = (len == src_len ? 0 : 3);
1400 
1401     /* Return both cksm and processed length.  */
1402     env->retxl = cksm;
1403     return len;
1404 }
1405 
1406 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1407 {
1408     uintptr_t ra = GETPC();
1409     int len_dest = len >> 4;
1410     int len_src = len & 0xf;
1411     uint8_t b;
1412 
1413     dest += len_dest;
1414     src += len_src;
1415 
1416     /* last byte is special, it only flips the nibbles */
1417     b = cpu_ldub_data_ra(env, src, ra);
1418     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1419     src--;
1420     len_src--;
1421 
1422     /* now pack every value */
1423     while (len_dest > 0) {
1424         b = 0;
1425 
1426         if (len_src >= 0) {
1427             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1428             src--;
1429             len_src--;
1430         }
1431         if (len_src >= 0) {
1432             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1433             src--;
1434             len_src--;
1435         }
1436 
1437         len_dest--;
1438         dest--;
1439         cpu_stb_data_ra(env, dest, b, ra);
1440     }
1441 }
1442 
1443 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1444                            uint32_t srclen, int ssize, uintptr_t ra)
1445 {
1446     int i;
1447     /* The destination operand is always 16 bytes long.  */
1448     const int destlen = 16;
1449 
1450     /* The operands are processed from right to left.  */
1451     src += srclen - 1;
1452     dest += destlen - 1;
1453 
1454     for (i = 0; i < destlen; i++) {
1455         uint8_t b = 0;
1456 
1457         /* Start with a positive sign */
1458         if (i == 0) {
1459             b = 0xc;
1460         } else if (srclen > ssize) {
1461             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1462             src -= ssize;
1463             srclen -= ssize;
1464         }
1465 
1466         if (srclen > ssize) {
1467             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1468             src -= ssize;
1469             srclen -= ssize;
1470         }
1471 
1472         cpu_stb_data_ra(env, dest, b, ra);
1473         dest--;
1474     }
1475 }
1476 
1477 
1478 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1479                  uint32_t srclen)
1480 {
1481     do_pkau(env, dest, src, srclen, 1, GETPC());
1482 }
1483 
1484 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1485                  uint32_t srclen)
1486 {
1487     do_pkau(env, dest, src, srclen, 2, GETPC());
1488 }
1489 
1490 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1491                   uint64_t src)
1492 {
1493     uintptr_t ra = GETPC();
1494     int len_dest = len >> 4;
1495     int len_src = len & 0xf;
1496     uint8_t b;
1497     int second_nibble = 0;
1498 
1499     dest += len_dest;
1500     src += len_src;
1501 
1502     /* last byte is special, it only flips the nibbles */
1503     b = cpu_ldub_data_ra(env, src, ra);
1504     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1505     src--;
1506     len_src--;
1507 
1508     /* now pad every nibble with 0xf0 */
1509 
1510     while (len_dest > 0) {
1511         uint8_t cur_byte = 0;
1512 
1513         if (len_src > 0) {
1514             cur_byte = cpu_ldub_data_ra(env, src, ra);
1515         }
1516 
1517         len_dest--;
1518         dest--;
1519 
1520         /* only advance one nibble at a time */
1521         if (second_nibble) {
1522             cur_byte >>= 4;
1523             len_src--;
1524             src--;
1525         }
1526         second_nibble = !second_nibble;
1527 
1528         /* digit */
1529         cur_byte = (cur_byte & 0xf);
1530         /* zone bits */
1531         cur_byte |= 0xf0;
1532 
1533         cpu_stb_data_ra(env, dest, cur_byte, ra);
1534     }
1535 }
1536 
1537 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1538                                  uint32_t destlen, int dsize, uint64_t src,
1539                                  uintptr_t ra)
1540 {
1541     int i;
1542     uint32_t cc;
1543     uint8_t b;
1544     /* The source operand is always 16 bytes long.  */
1545     const int srclen = 16;
1546 
1547     /* The operands are processed from right to left.  */
1548     src += srclen - 1;
1549     dest += destlen - dsize;
1550 
1551     /* Check for the sign.  */
1552     b = cpu_ldub_data_ra(env, src, ra);
1553     src--;
1554     switch (b & 0xf) {
1555     case 0xa:
1556     case 0xc:
1557     case 0xe ... 0xf:
1558         cc = 0;  /* plus */
1559         break;
1560     case 0xb:
1561     case 0xd:
1562         cc = 1;  /* minus */
1563         break;
1564     default:
1565     case 0x0 ... 0x9:
1566         cc = 3;  /* invalid */
1567         break;
1568     }
1569 
1570     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1571     for (i = 0; i < destlen; i += dsize) {
1572         if (i == (31 * dsize)) {
1573             /* If length is 32/64 bytes, the leftmost byte is 0. */
1574             b = 0;
1575         } else if (i % (2 * dsize)) {
1576             b = cpu_ldub_data_ra(env, src, ra);
1577             src--;
1578         } else {
1579             b >>= 4;
1580         }
1581         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1582         dest -= dsize;
1583     }
1584 
1585     return cc;
1586 }
1587 
1588 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1589                        uint64_t src)
1590 {
1591     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1592 }
1593 
1594 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1595                        uint64_t src)
1596 {
1597     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1598 }
1599 
1600 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1601 {
1602     uintptr_t ra = GETPC();
1603     uint32_t cc = 0;
1604     int i;
1605 
1606     for (i = 0; i < destlen; i++) {
1607         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1608         /* digit */
1609         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1610 
1611         if (i == (destlen - 1)) {
1612             /* sign */
1613             cc |= (b & 0xf) < 0xa ? 1 : 0;
1614         } else {
1615             /* digit */
1616             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1617         }
1618     }
1619 
1620     return cc;
1621 }
1622 
1623 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1624                              uint64_t trans, uintptr_t ra)
1625 {
1626     uint32_t i;
1627 
1628     for (i = 0; i <= len; i++) {
1629         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1630         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1631         cpu_stb_data_ra(env, array + i, new_byte, ra);
1632     }
1633 
1634     return env->cc_op;
1635 }
1636 
1637 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1638                 uint64_t trans)
1639 {
1640     do_helper_tr(env, len, array, trans, GETPC());
1641 }
1642 
1643 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1644                      uint64_t len, uint64_t trans)
1645 {
1646     uintptr_t ra = GETPC();
1647     uint8_t end = env->regs[0] & 0xff;
1648     uint64_t l = len;
1649     uint64_t i;
1650     uint32_t cc = 0;
1651 
1652     if (!(env->psw.mask & PSW_MASK_64)) {
1653         array &= 0x7fffffff;
1654         l = (uint32_t)l;
1655     }
1656 
1657     /* Lest we fail to service interrupts in a timely manner, limit the
1658        amount of work we're willing to do.  For now, let's cap at 8k.  */
1659     if (l > 0x2000) {
1660         l = 0x2000;
1661         cc = 3;
1662     }
1663 
1664     for (i = 0; i < l; i++) {
1665         uint8_t byte, new_byte;
1666 
1667         byte = cpu_ldub_data_ra(env, array + i, ra);
1668 
1669         if (byte == end) {
1670             cc = 1;
1671             break;
1672         }
1673 
1674         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1675         cpu_stb_data_ra(env, array + i, new_byte, ra);
1676     }
1677 
1678     env->cc_op = cc;
1679     env->retxl = len - i;
1680     return array + i;
1681 }
1682 
1683 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1684                                      uint64_t array, uint64_t trans,
1685                                      int inc, uintptr_t ra)
1686 {
1687     int i;
1688 
1689     for (i = 0; i <= len; i++) {
1690         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1691         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1692 
1693         if (sbyte != 0) {
1694             set_address(env, 1, array + i * inc);
1695             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1696             return (i == len) ? 2 : 1;
1697         }
1698     }
1699 
1700     return 0;
1701 }
1702 
1703 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1704                                   uint64_t array, uint64_t trans,
1705                                   uintptr_t ra)
1706 {
1707     return do_helper_trt(env, len, array, trans, 1, ra);
1708 }
1709 
1710 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                      uint64_t trans)
1712 {
1713     return do_helper_trt(env, len, array, trans, 1, GETPC());
1714 }
1715 
1716 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1717                                    uint64_t array, uint64_t trans,
1718                                    uintptr_t ra)
1719 {
1720     return do_helper_trt(env, len, array, trans, -1, ra);
1721 }
1722 
1723 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1724                       uint64_t trans)
1725 {
1726     return do_helper_trt(env, len, array, trans, -1, GETPC());
1727 }
1728 
1729 /* Translate one/two to one/two */
1730 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1731                       uint32_t tst, uint32_t sizes)
1732 {
1733     uintptr_t ra = GETPC();
1734     int dsize = (sizes & 1) ? 1 : 2;
1735     int ssize = (sizes & 2) ? 1 : 2;
1736     uint64_t tbl = get_address(env, 1);
1737     uint64_t dst = get_address(env, r1);
1738     uint64_t len = get_length(env, r1 + 1);
1739     uint64_t src = get_address(env, r2);
1740     uint32_t cc = 3;
1741     int i;
1742 
1743     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1744        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1745        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1746     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1747         tbl &= -4096;
1748     } else {
1749         tbl &= -8;
1750     }
1751 
1752     check_alignment(env, len, ssize, ra);
1753 
1754     /* Lest we fail to service interrupts in a timely manner, */
1755     /* limit the amount of work we're willing to do.   */
1756     for (i = 0; i < 0x2000; i++) {
1757         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1758         uint64_t tble = tbl + (sval * dsize);
1759         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1760         if (dval == tst) {
1761             cc = 1;
1762             break;
1763         }
1764         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1765 
1766         len -= ssize;
1767         src += ssize;
1768         dst += dsize;
1769 
1770         if (len == 0) {
1771             cc = 0;
1772             break;
1773         }
1774     }
1775 
1776     set_address(env, r1, dst);
1777     set_length(env, r1 + 1, len);
1778     set_address(env, r2, src);
1779 
1780     return cc;
1781 }
1782 
1783 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1784                   uint32_t r1, uint32_t r3)
1785 {
1786     uintptr_t ra = GETPC();
1787     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1788     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1789     Int128 oldv;
1790     uint64_t oldh, oldl;
1791     bool fail;
1792 
1793     check_alignment(env, addr, 16, ra);
1794 
1795     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1796     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1797 
1798     oldv = int128_make128(oldl, oldh);
1799     fail = !int128_eq(oldv, cmpv);
1800     if (fail) {
1801         newv = oldv;
1802     }
1803 
1804     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1805     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1806 
1807     env->cc_op = fail;
1808     env->regs[r1] = int128_gethi(oldv);
1809     env->regs[r1 + 1] = int128_getlo(oldv);
1810 }
1811 
1812 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1813                            uint32_t r1, uint32_t r3)
1814 {
1815     uintptr_t ra = GETPC();
1816     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1817     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1818     int mem_idx;
1819     MemOpIdx oi;
1820     Int128 oldv;
1821     bool fail;
1822 
1823     assert(HAVE_CMPXCHG128);
1824 
1825     mem_idx = cpu_mmu_index(env, false);
1826     oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1827     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1828     fail = !int128_eq(oldv, cmpv);
1829 
1830     env->cc_op = fail;
1831     env->regs[r1] = int128_gethi(oldv);
1832     env->regs[r1 + 1] = int128_getlo(oldv);
1833 }
1834 
1835 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1836                         uint64_t a2, bool parallel)
1837 {
1838     uint32_t mem_idx = cpu_mmu_index(env, false);
1839     uintptr_t ra = GETPC();
1840     uint32_t fc = extract32(env->regs[0], 0, 8);
1841     uint32_t sc = extract32(env->regs[0], 8, 8);
1842     uint64_t pl = get_address(env, 1) & -16;
1843     uint64_t svh, svl;
1844     uint32_t cc;
1845 
1846     /* Sanity check the function code and storage characteristic.  */
1847     if (fc > 1 || sc > 3) {
1848         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1849             goto spec_exception;
1850         }
1851         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1852             goto spec_exception;
1853         }
1854     }
1855 
1856     /* Sanity check the alignments.  */
1857     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1858         goto spec_exception;
1859     }
1860 
1861     /* Sanity check writability of the store address.  */
1862     probe_write(env, a2, 1 << sc, mem_idx, ra);
1863 
1864     /*
1865      * Note that the compare-and-swap is atomic, and the store is atomic,
1866      * but the complete operation is not.  Therefore we do not need to
1867      * assert serial context in order to implement this.  That said,
1868      * restart early if we can't support either operation that is supposed
1869      * to be atomic.
1870      */
1871     if (parallel) {
1872         uint32_t max = 2;
1873 #ifdef CONFIG_ATOMIC64
1874         max = 3;
1875 #endif
1876         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1877             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1878             cpu_loop_exit_atomic(env_cpu(env), ra);
1879         }
1880     }
1881 
1882     /* All loads happen before all stores.  For simplicity, load the entire
1883        store value area from the parameter list.  */
1884     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1885     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1886 
1887     switch (fc) {
1888     case 0:
1889         {
1890             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1891             uint32_t cv = env->regs[r3];
1892             uint32_t ov;
1893 
1894             if (parallel) {
1895 #ifdef CONFIG_USER_ONLY
1896                 uint32_t *haddr = g2h(env_cpu(env), a1);
1897                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1898 #else
1899                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1900                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1901 #endif
1902             } else {
1903                 ov = cpu_ldl_data_ra(env, a1, ra);
1904                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1905             }
1906             cc = (ov != cv);
1907             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1908         }
1909         break;
1910 
1911     case 1:
1912         {
1913             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1914             uint64_t cv = env->regs[r3];
1915             uint64_t ov;
1916 
1917             if (parallel) {
1918 #ifdef CONFIG_ATOMIC64
1919                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1920                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1921 #else
1922                 /* Note that we asserted !parallel above.  */
1923                 g_assert_not_reached();
1924 #endif
1925             } else {
1926                 ov = cpu_ldq_data_ra(env, a1, ra);
1927                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1928             }
1929             cc = (ov != cv);
1930             env->regs[r3] = ov;
1931         }
1932         break;
1933 
1934     case 2:
1935         {
1936             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1937             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1938             Int128 nv = int128_make128(nvl, nvh);
1939             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1940             Int128 ov;
1941 
1942             if (!parallel) {
1943                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1944                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1945 
1946                 ov = int128_make128(ol, oh);
1947                 cc = !int128_eq(ov, cv);
1948                 if (cc) {
1949                     nv = ov;
1950                 }
1951 
1952                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1953                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1954             } else if (HAVE_CMPXCHG128) {
1955                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1956                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1957                 cc = !int128_eq(ov, cv);
1958             } else {
1959                 /* Note that we asserted !parallel above.  */
1960                 g_assert_not_reached();
1961             }
1962 
1963             env->regs[r3 + 0] = int128_gethi(ov);
1964             env->regs[r3 + 1] = int128_getlo(ov);
1965         }
1966         break;
1967 
1968     default:
1969         g_assert_not_reached();
1970     }
1971 
1972     /* Store only if the comparison succeeded.  Note that above we use a pair
1973        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1974        from the most-significant bits of svh.  */
1975     if (cc == 0) {
1976         switch (sc) {
1977         case 0:
1978             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1979             break;
1980         case 1:
1981             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1982             break;
1983         case 2:
1984             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1985             break;
1986         case 3:
1987             cpu_stq_data_ra(env, a2, svh, ra);
1988             break;
1989         case 4:
1990             if (!parallel) {
1991                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1992                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1993             } else if (HAVE_ATOMIC128) {
1994                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1995                 Int128 sv = int128_make128(svl, svh);
1996                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1997             } else {
1998                 /* Note that we asserted !parallel above.  */
1999                 g_assert_not_reached();
2000             }
2001             break;
2002         default:
2003             g_assert_not_reached();
2004         }
2005     }
2006 
2007     return cc;
2008 
2009  spec_exception:
2010     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2011 }
2012 
2013 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2014 {
2015     return do_csst(env, r3, a1, a2, false);
2016 }
2017 
2018 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2019                                uint64_t a2)
2020 {
2021     return do_csst(env, r3, a1, a2, true);
2022 }
2023 
2024 #if !defined(CONFIG_USER_ONLY)
2025 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2026 {
2027     uintptr_t ra = GETPC();
2028     bool PERchanged = false;
2029     uint64_t src = a2;
2030     uint32_t i;
2031 
2032     if (src & 0x7) {
2033         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2034     }
2035 
2036     for (i = r1;; i = (i + 1) % 16) {
2037         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2038         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2039             PERchanged = true;
2040         }
2041         env->cregs[i] = val;
2042         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2043                    i, src, val);
2044         src += sizeof(uint64_t);
2045 
2046         if (i == r3) {
2047             break;
2048         }
2049     }
2050 
2051     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2052         s390_cpu_recompute_watchpoints(env_cpu(env));
2053     }
2054 
2055     tlb_flush(env_cpu(env));
2056 }
2057 
2058 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2059 {
2060     uintptr_t ra = GETPC();
2061     bool PERchanged = false;
2062     uint64_t src = a2;
2063     uint32_t i;
2064 
2065     if (src & 0x3) {
2066         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2067     }
2068 
2069     for (i = r1;; i = (i + 1) % 16) {
2070         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2071         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2072             PERchanged = true;
2073         }
2074         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2075         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2076         src += sizeof(uint32_t);
2077 
2078         if (i == r3) {
2079             break;
2080         }
2081     }
2082 
2083     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2084         s390_cpu_recompute_watchpoints(env_cpu(env));
2085     }
2086 
2087     tlb_flush(env_cpu(env));
2088 }
2089 
2090 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2091 {
2092     uintptr_t ra = GETPC();
2093     uint64_t dest = a2;
2094     uint32_t i;
2095 
2096     if (dest & 0x7) {
2097         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2098     }
2099 
2100     for (i = r1;; i = (i + 1) % 16) {
2101         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2102         dest += sizeof(uint64_t);
2103 
2104         if (i == r3) {
2105             break;
2106         }
2107     }
2108 }
2109 
2110 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2111 {
2112     uintptr_t ra = GETPC();
2113     uint64_t dest = a2;
2114     uint32_t i;
2115 
2116     if (dest & 0x3) {
2117         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2118     }
2119 
2120     for (i = r1;; i = (i + 1) % 16) {
2121         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2122         dest += sizeof(uint32_t);
2123 
2124         if (i == r3) {
2125             break;
2126         }
2127     }
2128 }
2129 
2130 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2131 {
2132     uintptr_t ra = GETPC();
2133     int i;
2134 
2135     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2136 
2137     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2138         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2139     }
2140 
2141     return 0;
2142 }
2143 
2144 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2145 {
2146     S390CPU *cpu = env_archcpu(env);
2147     CPUState *cs = env_cpu(env);
2148 
2149     /*
2150      * TODO: we currently don't handle all access protection types
2151      * (including access-list and key-controlled) as well as AR mode.
2152      */
2153     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2154         /* Fetching permitted; storing permitted */
2155         return 0;
2156     }
2157 
2158     if (env->int_pgm_code == PGM_PROTECTION) {
2159         /* retry if reading is possible */
2160         cs->exception_index = -1;
2161         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2162             /* Fetching permitted; storing not permitted */
2163             return 1;
2164         }
2165     }
2166 
2167     switch (env->int_pgm_code) {
2168     case PGM_PROTECTION:
2169         /* Fetching not permitted; storing not permitted */
2170         cs->exception_index = -1;
2171         return 2;
2172     case PGM_ADDRESSING:
2173     case PGM_TRANS_SPEC:
2174         /* exceptions forwarded to the guest */
2175         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2176         return 0;
2177     }
2178 
2179     /* Translation not available */
2180     cs->exception_index = -1;
2181     return 3;
2182 }
2183 
2184 /* insert storage key extended */
2185 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2186 {
2187     static S390SKeysState *ss;
2188     static S390SKeysClass *skeyclass;
2189     uint64_t addr = wrap_address(env, r2);
2190     uint8_t key;
2191     int rc;
2192 
2193     addr = mmu_real2abs(env, addr);
2194     if (!mmu_absolute_addr_valid(addr, false)) {
2195         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2196     }
2197 
2198     if (unlikely(!ss)) {
2199         ss = s390_get_skeys_device();
2200         skeyclass = S390_SKEYS_GET_CLASS(ss);
2201         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2202             tlb_flush_all_cpus_synced(env_cpu(env));
2203         }
2204     }
2205 
2206     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2207     if (rc) {
2208         trace_get_skeys_nonzero(rc);
2209         return 0;
2210     }
2211     return key;
2212 }
2213 
2214 /* set storage key extended */
2215 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2216 {
2217     static S390SKeysState *ss;
2218     static S390SKeysClass *skeyclass;
2219     uint64_t addr = wrap_address(env, r2);
2220     uint8_t key;
2221     int rc;
2222 
2223     addr = mmu_real2abs(env, addr);
2224     if (!mmu_absolute_addr_valid(addr, false)) {
2225         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2226     }
2227 
2228     if (unlikely(!ss)) {
2229         ss = s390_get_skeys_device();
2230         skeyclass = S390_SKEYS_GET_CLASS(ss);
2231         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2232             tlb_flush_all_cpus_synced(env_cpu(env));
2233         }
2234     }
2235 
2236     key = r1 & 0xfe;
2237     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2238     if (rc) {
2239         trace_set_skeys_nonzero(rc);
2240     }
2241    /*
2242     * As we can only flush by virtual address and not all the entries
2243     * that point to a physical address we have to flush the whole TLB.
2244     */
2245     tlb_flush_all_cpus_synced(env_cpu(env));
2246 }
2247 
2248 /* reset reference bit extended */
2249 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2250 {
2251     uint64_t addr = wrap_address(env, r2);
2252     static S390SKeysState *ss;
2253     static S390SKeysClass *skeyclass;
2254     uint8_t re, key;
2255     int rc;
2256 
2257     addr = mmu_real2abs(env, addr);
2258     if (!mmu_absolute_addr_valid(addr, false)) {
2259         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2260     }
2261 
2262     if (unlikely(!ss)) {
2263         ss = s390_get_skeys_device();
2264         skeyclass = S390_SKEYS_GET_CLASS(ss);
2265         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2266             tlb_flush_all_cpus_synced(env_cpu(env));
2267         }
2268     }
2269 
2270     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2271     if (rc) {
2272         trace_get_skeys_nonzero(rc);
2273         return 0;
2274     }
2275 
2276     re = key & (SK_R | SK_C);
2277     key &= ~SK_R;
2278 
2279     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2280     if (rc) {
2281         trace_set_skeys_nonzero(rc);
2282         return 0;
2283     }
2284    /*
2285     * As we can only flush by virtual address and not all the entries
2286     * that point to a physical address we have to flush the whole TLB.
2287     */
2288     tlb_flush_all_cpus_synced(env_cpu(env));
2289 
2290     /*
2291      * cc
2292      *
2293      * 0  Reference bit zero; change bit zero
2294      * 1  Reference bit zero; change bit one
2295      * 2  Reference bit one; change bit zero
2296      * 3  Reference bit one; change bit one
2297      */
2298 
2299     return re >> 1;
2300 }
2301 
2302 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2303 {
2304     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2305     S390Access srca, desta;
2306     uintptr_t ra = GETPC();
2307     int cc = 0;
2308 
2309     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2310                __func__, l, a1, a2);
2311 
2312     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2313         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2314         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2315     }
2316 
2317     l = wrap_length32(env, l);
2318     if (l > 256) {
2319         /* max 256 */
2320         l = 256;
2321         cc = 3;
2322     } else if (!l) {
2323         return cc;
2324     }
2325 
2326     /* TODO: Access key handling */
2327     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2328     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2329     access_memmove(env, &desta, &srca, ra);
2330     return cc;
2331 }
2332 
2333 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2334 {
2335     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2336     S390Access srca, desta;
2337     uintptr_t ra = GETPC();
2338     int cc = 0;
2339 
2340     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2341                __func__, l, a1, a2);
2342 
2343     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2344         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2345         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2346     }
2347 
2348     l = wrap_length32(env, l);
2349     if (l > 256) {
2350         /* max 256 */
2351         l = 256;
2352         cc = 3;
2353     } else if (!l) {
2354         return cc;
2355     }
2356 
2357     /* TODO: Access key handling */
2358     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2359     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2360     access_memmove(env, &desta, &srca, ra);
2361     return cc;
2362 }
2363 
2364 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2365 {
2366     CPUState *cs = env_cpu(env);
2367     const uintptr_t ra = GETPC();
2368     uint64_t table, entry, raddr;
2369     uint16_t entries, i, index = 0;
2370 
2371     if (r2 & 0xff000) {
2372         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2373     }
2374 
2375     if (!(r2 & 0x800)) {
2376         /* invalidation-and-clearing operation */
2377         table = r1 & ASCE_ORIGIN;
2378         entries = (r2 & 0x7ff) + 1;
2379 
2380         switch (r1 & ASCE_TYPE_MASK) {
2381         case ASCE_TYPE_REGION1:
2382             index = (r2 >> 53) & 0x7ff;
2383             break;
2384         case ASCE_TYPE_REGION2:
2385             index = (r2 >> 42) & 0x7ff;
2386             break;
2387         case ASCE_TYPE_REGION3:
2388             index = (r2 >> 31) & 0x7ff;
2389             break;
2390         case ASCE_TYPE_SEGMENT:
2391             index = (r2 >> 20) & 0x7ff;
2392             break;
2393         }
2394         for (i = 0; i < entries; i++) {
2395             /* addresses are not wrapped in 24/31bit mode but table index is */
2396             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2397             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2398             if (!(entry & REGION_ENTRY_I)) {
2399                 /* we are allowed to not store if already invalid */
2400                 entry |= REGION_ENTRY_I;
2401                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2402             }
2403         }
2404     }
2405 
2406     /* We simply flush the complete tlb, therefore we can ignore r3. */
2407     if (m4 & 1) {
2408         tlb_flush(cs);
2409     } else {
2410         tlb_flush_all_cpus_synced(cs);
2411     }
2412 }
2413 
2414 /* invalidate pte */
2415 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2416                   uint32_t m4)
2417 {
2418     CPUState *cs = env_cpu(env);
2419     const uintptr_t ra = GETPC();
2420     uint64_t page = vaddr & TARGET_PAGE_MASK;
2421     uint64_t pte_addr, pte;
2422 
2423     /* Compute the page table entry address */
2424     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2425     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2426 
2427     /* Mark the page table entry as invalid */
2428     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2429     pte |= PAGE_ENTRY_I;
2430     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2431 
2432     /* XXX we exploit the fact that Linux passes the exact virtual
2433        address here - it's not obliged to! */
2434     if (m4 & 1) {
2435         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2436             tlb_flush_page(cs, page);
2437             /* XXX 31-bit hack */
2438             tlb_flush_page(cs, page ^ 0x80000000);
2439         } else {
2440             /* looks like we don't have a valid virtual address */
2441             tlb_flush(cs);
2442         }
2443     } else {
2444         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2445             tlb_flush_page_all_cpus_synced(cs, page);
2446             /* XXX 31-bit hack */
2447             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2448         } else {
2449             /* looks like we don't have a valid virtual address */
2450             tlb_flush_all_cpus_synced(cs);
2451         }
2452     }
2453 }
2454 
2455 /* flush local tlb */
2456 void HELPER(ptlb)(CPUS390XState *env)
2457 {
2458     tlb_flush(env_cpu(env));
2459 }
2460 
2461 /* flush global tlb */
2462 void HELPER(purge)(CPUS390XState *env)
2463 {
2464     tlb_flush_all_cpus_synced(env_cpu(env));
2465 }
2466 
2467 /* load real address */
2468 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2469 {
2470     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2471     uint64_t ret, tec;
2472     int flags, exc, cc;
2473 
2474     /* XXX incomplete - has more corner cases */
2475     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2476         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2477     }
2478 
2479     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2480     if (exc) {
2481         cc = 3;
2482         ret = exc | 0x80000000;
2483     } else {
2484         cc = 0;
2485         ret |= addr & ~TARGET_PAGE_MASK;
2486     }
2487 
2488     env->cc_op = cc;
2489     return ret;
2490 }
2491 #endif
2492 
2493 /* load pair from quadword */
2494 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2495 {
2496     uintptr_t ra = GETPC();
2497     uint64_t hi, lo;
2498 
2499     check_alignment(env, addr, 16, ra);
2500     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2501     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2502 
2503     env->retxl = lo;
2504     return hi;
2505 }
2506 
2507 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2508 {
2509     uintptr_t ra = GETPC();
2510     uint64_t hi, lo;
2511     int mem_idx;
2512     MemOpIdx oi;
2513     Int128 v;
2514 
2515     assert(HAVE_ATOMIC128);
2516 
2517     mem_idx = cpu_mmu_index(env, false);
2518     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2519     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2520     hi = int128_gethi(v);
2521     lo = int128_getlo(v);
2522 
2523     env->retxl = lo;
2524     return hi;
2525 }
2526 
2527 /* store pair to quadword */
2528 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2529                   uint64_t low, uint64_t high)
2530 {
2531     uintptr_t ra = GETPC();
2532 
2533     check_alignment(env, addr, 16, ra);
2534     cpu_stq_data_ra(env, addr + 0, high, ra);
2535     cpu_stq_data_ra(env, addr + 8, low, ra);
2536 }
2537 
2538 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2539                            uint64_t low, uint64_t high)
2540 {
2541     uintptr_t ra = GETPC();
2542     int mem_idx;
2543     MemOpIdx oi;
2544     Int128 v;
2545 
2546     assert(HAVE_ATOMIC128);
2547 
2548     mem_idx = cpu_mmu_index(env, false);
2549     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2550     v = int128_make128(low, high);
2551     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2552 }
2553 
2554 /* Execute instruction.  This instruction executes an insn modified with
2555    the contents of r1.  It does not change the executed instruction in memory;
2556    it does not change the program counter.
2557 
2558    Perform this by recording the modified instruction in env->ex_value.
2559    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2560 */
2561 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2562 {
2563     uint64_t insn = cpu_lduw_code(env, addr);
2564     uint8_t opc = insn >> 8;
2565 
2566     /* Or in the contents of R1[56:63].  */
2567     insn |= r1 & 0xff;
2568 
2569     /* Load the rest of the instruction.  */
2570     insn <<= 48;
2571     switch (get_ilen(opc)) {
2572     case 2:
2573         break;
2574     case 4:
2575         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2576         break;
2577     case 6:
2578         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2579         break;
2580     default:
2581         g_assert_not_reached();
2582     }
2583 
2584     /* The very most common cases can be sped up by avoiding a new TB.  */
2585     if ((opc & 0xf0) == 0xd0) {
2586         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2587                                       uint64_t, uintptr_t);
2588         static const dx_helper dx[16] = {
2589             [0x0] = do_helper_trt_bkwd,
2590             [0x2] = do_helper_mvc,
2591             [0x4] = do_helper_nc,
2592             [0x5] = do_helper_clc,
2593             [0x6] = do_helper_oc,
2594             [0x7] = do_helper_xc,
2595             [0xc] = do_helper_tr,
2596             [0xd] = do_helper_trt_fwd,
2597         };
2598         dx_helper helper = dx[opc & 0xf];
2599 
2600         if (helper) {
2601             uint32_t l = extract64(insn, 48, 8);
2602             uint32_t b1 = extract64(insn, 44, 4);
2603             uint32_t d1 = extract64(insn, 32, 12);
2604             uint32_t b2 = extract64(insn, 28, 4);
2605             uint32_t d2 = extract64(insn, 16, 12);
2606             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2607             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2608 
2609             env->cc_op = helper(env, l, a1, a2, 0);
2610             env->psw.addr += ilen;
2611             return;
2612         }
2613     } else if (opc == 0x0a) {
2614         env->int_svc_code = extract64(insn, 48, 8);
2615         env->int_svc_ilen = ilen;
2616         helper_exception(env, EXCP_SVC);
2617         g_assert_not_reached();
2618     }
2619 
2620     /* Record the insn we want to execute as well as the ilen to use
2621        during the execution of the target insn.  This will also ensure
2622        that ex_value is non-zero, which flags that we are in a state
2623        that requires such execution.  */
2624     env->ex_value = insn | ilen;
2625 }
2626 
2627 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2628                        uint64_t len)
2629 {
2630     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2631     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2632     const uint64_t r0 = env->regs[0];
2633     const uintptr_t ra = GETPC();
2634     uint8_t dest_key, dest_as, dest_k, dest_a;
2635     uint8_t src_key, src_as, src_k, src_a;
2636     uint64_t val;
2637     int cc = 0;
2638 
2639     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2640                __func__, dest, src, len);
2641 
2642     if (!(env->psw.mask & PSW_MASK_DAT)) {
2643         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2644     }
2645 
2646     /* OAC (operand access control) for the first operand -> dest */
2647     val = (r0 & 0xffff0000ULL) >> 16;
2648     dest_key = (val >> 12) & 0xf;
2649     dest_as = (val >> 6) & 0x3;
2650     dest_k = (val >> 1) & 0x1;
2651     dest_a = val & 0x1;
2652 
2653     /* OAC (operand access control) for the second operand -> src */
2654     val = (r0 & 0x0000ffffULL);
2655     src_key = (val >> 12) & 0xf;
2656     src_as = (val >> 6) & 0x3;
2657     src_k = (val >> 1) & 0x1;
2658     src_a = val & 0x1;
2659 
2660     if (!dest_k) {
2661         dest_key = psw_key;
2662     }
2663     if (!src_k) {
2664         src_key = psw_key;
2665     }
2666     if (!dest_a) {
2667         dest_as = psw_as;
2668     }
2669     if (!src_a) {
2670         src_as = psw_as;
2671     }
2672 
2673     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2674         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2675     }
2676     if (!(env->cregs[0] & CR0_SECONDARY) &&
2677         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2678         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2679     }
2680     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2681         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2682     }
2683 
2684     len = wrap_length32(env, len);
2685     if (len > 4096) {
2686         cc = 3;
2687         len = 4096;
2688     }
2689 
2690     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2691     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2692         (env->psw.mask & PSW_MASK_PSTATE)) {
2693         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2694                       __func__);
2695         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2696     }
2697 
2698     /* FIXME: Access using correct keys and AR-mode */
2699     if (len) {
2700         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2701                                          mmu_idx_from_as(src_as), ra);
2702         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2703                                           mmu_idx_from_as(dest_as), ra);
2704 
2705         access_memmove(env, &desta, &srca, ra);
2706     }
2707 
2708     return cc;
2709 }
2710 
2711 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2712    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2713    value >= 0 indicates failure, and the CC value to be returned.  */
2714 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2715                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2716                                  uint32_t *ochar, uint32_t *olen);
2717 
2718 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2719    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2720    indicates failure, and the CC value to be returned.  */
2721 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2722                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2723                                  uint32_t *olen);
2724 
2725 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2726                        bool enh_check, uintptr_t ra,
2727                        uint32_t *ochar, uint32_t *olen)
2728 {
2729     uint8_t s0, s1, s2, s3;
2730     uint32_t c, l;
2731 
2732     if (ilen < 1) {
2733         return 0;
2734     }
2735     s0 = cpu_ldub_data_ra(env, addr, ra);
2736     if (s0 <= 0x7f) {
2737         /* one byte character */
2738         l = 1;
2739         c = s0;
2740     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2741         /* invalid character */
2742         return 2;
2743     } else if (s0 <= 0xdf) {
2744         /* two byte character */
2745         l = 2;
2746         if (ilen < 2) {
2747             return 0;
2748         }
2749         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2750         c = s0 & 0x1f;
2751         c = (c << 6) | (s1 & 0x3f);
2752         if (enh_check && (s1 & 0xc0) != 0x80) {
2753             return 2;
2754         }
2755     } else if (s0 <= 0xef) {
2756         /* three byte character */
2757         l = 3;
2758         if (ilen < 3) {
2759             return 0;
2760         }
2761         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2762         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2763         c = s0 & 0x0f;
2764         c = (c << 6) | (s1 & 0x3f);
2765         c = (c << 6) | (s2 & 0x3f);
2766         /* Fold the byte-by-byte range descriptions in the PoO into
2767            tests against the complete value.  It disallows encodings
2768            that could be smaller, and the UTF-16 surrogates.  */
2769         if (enh_check
2770             && ((s1 & 0xc0) != 0x80
2771                 || (s2 & 0xc0) != 0x80
2772                 || c < 0x1000
2773                 || (c >= 0xd800 && c <= 0xdfff))) {
2774             return 2;
2775         }
2776     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2777         /* four byte character */
2778         l = 4;
2779         if (ilen < 4) {
2780             return 0;
2781         }
2782         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2783         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2784         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2785         c = s0 & 0x07;
2786         c = (c << 6) | (s1 & 0x3f);
2787         c = (c << 6) | (s2 & 0x3f);
2788         c = (c << 6) | (s3 & 0x3f);
2789         /* See above.  */
2790         if (enh_check
2791             && ((s1 & 0xc0) != 0x80
2792                 || (s2 & 0xc0) != 0x80
2793                 || (s3 & 0xc0) != 0x80
2794                 || c < 0x010000
2795                 || c > 0x10ffff)) {
2796             return 2;
2797         }
2798     } else {
2799         /* invalid character */
2800         return 2;
2801     }
2802 
2803     *ochar = c;
2804     *olen = l;
2805     return -1;
2806 }
2807 
2808 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2809                         bool enh_check, uintptr_t ra,
2810                         uint32_t *ochar, uint32_t *olen)
2811 {
2812     uint16_t s0, s1;
2813     uint32_t c, l;
2814 
2815     if (ilen < 2) {
2816         return 0;
2817     }
2818     s0 = cpu_lduw_data_ra(env, addr, ra);
2819     if ((s0 & 0xfc00) != 0xd800) {
2820         /* one word character */
2821         l = 2;
2822         c = s0;
2823     } else {
2824         /* two word character */
2825         l = 4;
2826         if (ilen < 4) {
2827             return 0;
2828         }
2829         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2830         c = extract32(s0, 6, 4) + 1;
2831         c = (c << 6) | (s0 & 0x3f);
2832         c = (c << 10) | (s1 & 0x3ff);
2833         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2834             /* invalid surrogate character */
2835             return 2;
2836         }
2837     }
2838 
2839     *ochar = c;
2840     *olen = l;
2841     return -1;
2842 }
2843 
2844 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2845                         bool enh_check, uintptr_t ra,
2846                         uint32_t *ochar, uint32_t *olen)
2847 {
2848     uint32_t c;
2849 
2850     if (ilen < 4) {
2851         return 0;
2852     }
2853     c = cpu_ldl_data_ra(env, addr, ra);
2854     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2855         /* invalid unicode character */
2856         return 2;
2857     }
2858 
2859     *ochar = c;
2860     *olen = 4;
2861     return -1;
2862 }
2863 
2864 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2865                        uintptr_t ra, uint32_t c, uint32_t *olen)
2866 {
2867     uint8_t d[4];
2868     uint32_t l, i;
2869 
2870     if (c <= 0x7f) {
2871         /* one byte character */
2872         l = 1;
2873         d[0] = c;
2874     } else if (c <= 0x7ff) {
2875         /* two byte character */
2876         l = 2;
2877         d[1] = 0x80 | extract32(c, 0, 6);
2878         d[0] = 0xc0 | extract32(c, 6, 5);
2879     } else if (c <= 0xffff) {
2880         /* three byte character */
2881         l = 3;
2882         d[2] = 0x80 | extract32(c, 0, 6);
2883         d[1] = 0x80 | extract32(c, 6, 6);
2884         d[0] = 0xe0 | extract32(c, 12, 4);
2885     } else {
2886         /* four byte character */
2887         l = 4;
2888         d[3] = 0x80 | extract32(c, 0, 6);
2889         d[2] = 0x80 | extract32(c, 6, 6);
2890         d[1] = 0x80 | extract32(c, 12, 6);
2891         d[0] = 0xf0 | extract32(c, 18, 3);
2892     }
2893 
2894     if (ilen < l) {
2895         return 1;
2896     }
2897     for (i = 0; i < l; ++i) {
2898         cpu_stb_data_ra(env, addr + i, d[i], ra);
2899     }
2900 
2901     *olen = l;
2902     return -1;
2903 }
2904 
2905 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2906                         uintptr_t ra, uint32_t c, uint32_t *olen)
2907 {
2908     uint16_t d0, d1;
2909 
2910     if (c <= 0xffff) {
2911         /* one word character */
2912         if (ilen < 2) {
2913             return 1;
2914         }
2915         cpu_stw_data_ra(env, addr, c, ra);
2916         *olen = 2;
2917     } else {
2918         /* two word character */
2919         if (ilen < 4) {
2920             return 1;
2921         }
2922         d1 = 0xdc00 | extract32(c, 0, 10);
2923         d0 = 0xd800 | extract32(c, 10, 6);
2924         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2925         cpu_stw_data_ra(env, addr + 0, d0, ra);
2926         cpu_stw_data_ra(env, addr + 2, d1, ra);
2927         *olen = 4;
2928     }
2929 
2930     return -1;
2931 }
2932 
2933 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2934                         uintptr_t ra, uint32_t c, uint32_t *olen)
2935 {
2936     if (ilen < 4) {
2937         return 1;
2938     }
2939     cpu_stl_data_ra(env, addr, c, ra);
2940     *olen = 4;
2941     return -1;
2942 }
2943 
2944 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2945                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2946                                        decode_unicode_fn decode,
2947                                        encode_unicode_fn encode)
2948 {
2949     uint64_t dst = get_address(env, r1);
2950     uint64_t dlen = get_length(env, r1 + 1);
2951     uint64_t src = get_address(env, r2);
2952     uint64_t slen = get_length(env, r2 + 1);
2953     bool enh_check = m3 & 1;
2954     int cc, i;
2955 
2956     /* Lest we fail to service interrupts in a timely manner, limit the
2957        amount of work we're willing to do.  For now, let's cap at 256.  */
2958     for (i = 0; i < 256; ++i) {
2959         uint32_t c, ilen, olen;
2960 
2961         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2962         if (unlikely(cc >= 0)) {
2963             break;
2964         }
2965         cc = encode(env, dst, dlen, ra, c, &olen);
2966         if (unlikely(cc >= 0)) {
2967             break;
2968         }
2969 
2970         src += ilen;
2971         slen -= ilen;
2972         dst += olen;
2973         dlen -= olen;
2974         cc = 3;
2975     }
2976 
2977     set_address(env, r1, dst);
2978     set_length(env, r1 + 1, dlen);
2979     set_address(env, r2, src);
2980     set_length(env, r2 + 1, slen);
2981 
2982     return cc;
2983 }
2984 
2985 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2986 {
2987     return convert_unicode(env, r1, r2, m3, GETPC(),
2988                            decode_utf8, encode_utf16);
2989 }
2990 
2991 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2992 {
2993     return convert_unicode(env, r1, r2, m3, GETPC(),
2994                            decode_utf8, encode_utf32);
2995 }
2996 
2997 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2998 {
2999     return convert_unicode(env, r1, r2, m3, GETPC(),
3000                            decode_utf16, encode_utf8);
3001 }
3002 
3003 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3004 {
3005     return convert_unicode(env, r1, r2, m3, GETPC(),
3006                            decode_utf16, encode_utf32);
3007 }
3008 
3009 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3010 {
3011     return convert_unicode(env, r1, r2, m3, GETPC(),
3012                            decode_utf32, encode_utf8);
3013 }
3014 
3015 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3016 {
3017     return convert_unicode(env, r1, r2, m3, GETPC(),
3018                            decode_utf32, encode_utf16);
3019 }
3020 
3021 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3022                         uintptr_t ra)
3023 {
3024     /* test the actual access, not just any access to the page due to LAP */
3025     while (len) {
3026         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3027         const uint64_t curlen = MIN(pagelen, len);
3028 
3029         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3030         addr = wrap_address(env, addr + curlen);
3031         len -= curlen;
3032     }
3033 }
3034 
3035 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3036 {
3037     probe_write_access(env, addr, len, GETPC());
3038 }
3039