xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision bbadfb2e)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "hw/core/tcg-cpu-ops.h"
30 #include "qemu/int128.h"
31 #include "qemu/atomic128.h"
32 #include "trace.h"
33 
34 #if !defined(CONFIG_USER_ONLY)
35 #include "hw/s390x/storage-keys.h"
36 #include "hw/boards.h"
37 #endif
38 
39 #ifdef CONFIG_USER_ONLY
40 # define user_or_likely(X)    true
41 #else
42 # define user_or_likely(X)    likely(X)
43 #endif
44 
45 /*****************************************************************************/
46 /* Softmmu support */
47 
48 /* #define DEBUG_HELPER */
49 #ifdef DEBUG_HELPER
50 #define HELPER_LOG(x...) qemu_log(x)
51 #else
52 #define HELPER_LOG(x...)
53 #endif
54 
55 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
56 {
57     uint16_t pkm = env->cregs[3] >> 16;
58 
59     if (env->psw.mask & PSW_MASK_PSTATE) {
60         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
61         return pkm & (0x8000 >> psw_key);
62     }
63     return true;
64 }
65 
66 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
67                                    uint64_t src, uint32_t len)
68 {
69     if (!len || src == dest) {
70         return false;
71     }
72     /* Take care of wrapping at the end of address space. */
73     if (unlikely(wrap_address(env, src + len - 1) < src)) {
74         return dest > src || dest <= wrap_address(env, src + len - 1);
75     }
76     return dest > src && dest <= src + len - 1;
77 }
78 
79 /* Trigger a SPECIFICATION exception if an address or a length is not
80    naturally aligned.  */
81 static inline void check_alignment(CPUS390XState *env, uint64_t v,
82                                    int wordsize, uintptr_t ra)
83 {
84     if (v % wordsize) {
85         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
86     }
87 }
88 
89 /* Load a value from memory according to its size.  */
90 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
91                                            int wordsize, uintptr_t ra)
92 {
93     switch (wordsize) {
94     case 1:
95         return cpu_ldub_data_ra(env, addr, ra);
96     case 2:
97         return cpu_lduw_data_ra(env, addr, ra);
98     default:
99         abort();
100     }
101 }
102 
103 /* Store a to memory according to its size.  */
104 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
105                                       uint64_t value, int wordsize,
106                                       uintptr_t ra)
107 {
108     switch (wordsize) {
109     case 1:
110         cpu_stb_data_ra(env, addr, value, ra);
111         break;
112     case 2:
113         cpu_stw_data_ra(env, addr, value, ra);
114         break;
115     default:
116         abort();
117     }
118 }
119 
120 /* An access covers at most 4096 bytes and therefore at most two pages. */
121 typedef struct S390Access {
122     target_ulong vaddr1;
123     target_ulong vaddr2;
124     void *haddr1;
125     void *haddr2;
126     uint16_t size1;
127     uint16_t size2;
128     /*
129      * If we can't access the host page directly, we'll have to do I/O access
130      * via ld/st helpers. These are internal details, so we store the
131      * mmu idx to do the access here instead of passing it around in the
132      * helpers.
133      */
134     int mmu_idx;
135 } S390Access;
136 
137 /*
138  * With nonfault=1, return the PGM_ exception that would have been injected
139  * into the guest; return 0 if no exception was detected.
140  *
141  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
142  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
143  */
144 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
145                                     int size, MMUAccessType access_type,
146                                     int mmu_idx, bool nonfault,
147                                     void **phost, uintptr_t ra)
148 {
149     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
150                                    nonfault, phost, ra);
151 
152     if (unlikely(flags & TLB_INVALID_MASK)) {
153 #ifdef CONFIG_USER_ONLY
154         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
155         env->__excp_addr = addr & TARGET_PAGE_MASK;
156         return (page_get_flags(addr) & PAGE_VALID
157                 ? PGM_PROTECTION : PGM_ADDRESSING);
158 #else
159         return env->tlb_fill_exc;
160 #endif
161     }
162 
163 #ifndef CONFIG_USER_ONLY
164     if (unlikely(flags & TLB_WATCHPOINT)) {
165         /* S390 does not presently use transaction attributes. */
166         cpu_check_watchpoint(env_cpu(env), addr, size,
167                              MEMTXATTRS_UNSPECIFIED,
168                              (access_type == MMU_DATA_STORE
169                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
170     }
171 #endif
172 
173     return 0;
174 }
175 
176 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
177                              bool nonfault, vaddr vaddr1, int size,
178                              MMUAccessType access_type,
179                              int mmu_idx, uintptr_t ra)
180 {
181     int size1, size2, exc;
182 
183     assert(size > 0 && size <= 4096);
184 
185     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
186     size2 = size - size1;
187 
188     memset(access, 0, sizeof(*access));
189     access->vaddr1 = vaddr1;
190     access->size1 = size1;
191     access->size2 = size2;
192     access->mmu_idx = mmu_idx;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &access->haddr1, ra);
196     if (unlikely(exc)) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
202 
203         access->vaddr2 = vaddr2;
204         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
205                                 nonfault, &access->haddr2, ra);
206         if (unlikely(exc)) {
207             return exc;
208         }
209     }
210     return 0;
211 }
212 
213 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
214                                   vaddr vaddr, int size,
215                                   MMUAccessType access_type, int mmu_idx,
216                                   uintptr_t ra)
217 {
218     int exc = access_prepare_nf(ret, env, false, vaddr, size,
219                                 access_type, mmu_idx, ra);
220     assert(!exc);
221 }
222 
223 /* Helper to handle memset on a single page. */
224 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
225                              uint8_t byte, uint16_t size, int mmu_idx,
226                              uintptr_t ra)
227 {
228 #ifdef CONFIG_USER_ONLY
229     memset(haddr, byte, size);
230 #else
231     if (likely(haddr)) {
232         memset(haddr, byte, size);
233     } else {
234         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235         for (int i = 0; i < size; i++) {
236             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
237         }
238     }
239 #endif
240 }
241 
242 static void access_memset(CPUS390XState *env, S390Access *desta,
243                           uint8_t byte, uintptr_t ra)
244 {
245 
246     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
247                      desta->mmu_idx, ra);
248     if (likely(!desta->size2)) {
249         return;
250     }
251     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
252                      desta->mmu_idx, ra);
253 }
254 
255 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
256                                int offset, uintptr_t ra)
257 {
258     target_ulong vaddr = access->vaddr1;
259     void *haddr = access->haddr1;
260 
261     if (unlikely(offset >= access->size1)) {
262         offset -= access->size1;
263         vaddr = access->vaddr2;
264         haddr = access->haddr2;
265     }
266 
267     if (user_or_likely(haddr)) {
268         return ldub_p(haddr + offset);
269     } else {
270         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
271         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
272     }
273 }
274 
275 static void access_set_byte(CPUS390XState *env, S390Access *access,
276                             int offset, uint8_t byte, uintptr_t ra)
277 {
278     target_ulong vaddr = access->vaddr1;
279     void *haddr = access->haddr1;
280 
281     if (unlikely(offset >= access->size1)) {
282         offset -= access->size1;
283         vaddr = access->vaddr2;
284         haddr = access->haddr2;
285     }
286 
287     if (user_or_likely(haddr)) {
288         stb_p(haddr + offset, byte);
289     } else {
290         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
291         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
292     }
293 }
294 
295 /*
296  * Move data with the same semantics as memmove() in case ranges don't overlap
297  * or src > dest. Undefined behavior on destructive overlaps.
298  */
299 static void access_memmove(CPUS390XState *env, S390Access *desta,
300                            S390Access *srca, uintptr_t ra)
301 {
302     int len = desta->size1 + desta->size2;
303     int diff;
304 
305     assert(len == srca->size1 + srca->size2);
306 
307     /* Fallback to slow access in case we don't have access to all host pages */
308     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
309                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
310         int i;
311 
312         for (i = 0; i < len; i++) {
313             uint8_t byte = access_get_byte(env, srca, i, ra);
314 
315             access_set_byte(env, desta, i, byte, ra);
316         }
317         return;
318     }
319 
320     diff = desta->size1 - srca->size1;
321     if (likely(diff == 0)) {
322         memmove(desta->haddr1, srca->haddr1, srca->size1);
323         if (unlikely(srca->size2)) {
324             memmove(desta->haddr2, srca->haddr2, srca->size2);
325         }
326     } else if (diff > 0) {
327         memmove(desta->haddr1, srca->haddr1, srca->size1);
328         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
329         if (likely(desta->size2)) {
330             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
331         }
332     } else {
333         diff = -diff;
334         memmove(desta->haddr1, srca->haddr1, desta->size1);
335         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
336         if (likely(srca->size2)) {
337             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
338         }
339     }
340 }
341 
342 static int mmu_idx_from_as(uint8_t as)
343 {
344     switch (as) {
345     case AS_PRIMARY:
346         return MMU_PRIMARY_IDX;
347     case AS_SECONDARY:
348         return MMU_SECONDARY_IDX;
349     case AS_HOME:
350         return MMU_HOME_IDX;
351     default:
352         /* FIXME AS_ACCREG */
353         g_assert_not_reached();
354     }
355 }
356 
357 /* and on array */
358 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
359                              uint64_t src, uintptr_t ra)
360 {
361     const int mmu_idx = cpu_mmu_index(env, false);
362     S390Access srca1, srca2, desta;
363     uint32_t i;
364     uint8_t c = 0;
365 
366     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
367                __func__, l, dest, src);
368 
369     /* NC always processes one more byte than specified - maximum is 256 */
370     l++;
371 
372     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
373     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
374     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
375     for (i = 0; i < l; i++) {
376         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
377                           access_get_byte(env, &srca2, i, ra);
378 
379         c |= x;
380         access_set_byte(env, &desta, i, x, ra);
381     }
382     return c != 0;
383 }
384 
385 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
386                     uint64_t src)
387 {
388     return do_helper_nc(env, l, dest, src, GETPC());
389 }
390 
391 /* xor on array */
392 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
393                              uint64_t src, uintptr_t ra)
394 {
395     const int mmu_idx = cpu_mmu_index(env, false);
396     S390Access srca1, srca2, desta;
397     uint32_t i;
398     uint8_t c = 0;
399 
400     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
401                __func__, l, dest, src);
402 
403     /* XC always processes one more byte than specified - maximum is 256 */
404     l++;
405 
406     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
407     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
408     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
409 
410     /* xor with itself is the same as memset(0) */
411     if (src == dest) {
412         access_memset(env, &desta, 0, ra);
413         return 0;
414     }
415 
416     for (i = 0; i < l; i++) {
417         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
418                           access_get_byte(env, &srca2, i, ra);
419 
420         c |= x;
421         access_set_byte(env, &desta, i, x, ra);
422     }
423     return c != 0;
424 }
425 
426 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
427                     uint64_t src)
428 {
429     return do_helper_xc(env, l, dest, src, GETPC());
430 }
431 
432 /* or on array */
433 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
434                              uint64_t src, uintptr_t ra)
435 {
436     const int mmu_idx = cpu_mmu_index(env, false);
437     S390Access srca1, srca2, desta;
438     uint32_t i;
439     uint8_t c = 0;
440 
441     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
442                __func__, l, dest, src);
443 
444     /* OC always processes one more byte than specified - maximum is 256 */
445     l++;
446 
447     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
448     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
449     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
450     for (i = 0; i < l; i++) {
451         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
452                           access_get_byte(env, &srca2, i, ra);
453 
454         c |= x;
455         access_set_byte(env, &desta, i, x, ra);
456     }
457     return c != 0;
458 }
459 
460 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
461                     uint64_t src)
462 {
463     return do_helper_oc(env, l, dest, src, GETPC());
464 }
465 
466 /* memmove */
467 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
468                               uint64_t src, uintptr_t ra)
469 {
470     const int mmu_idx = cpu_mmu_index(env, false);
471     S390Access srca, desta;
472     uint32_t i;
473 
474     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
475                __func__, l, dest, src);
476 
477     /* MVC always copies one more byte than specified - maximum is 256 */
478     l++;
479 
480     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
481     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
482 
483     /*
484      * "When the operands overlap, the result is obtained as if the operands
485      * were processed one byte at a time". Only non-destructive overlaps
486      * behave like memmove().
487      */
488     if (dest == src + 1) {
489         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
490     } else if (!is_destructive_overlap(env, dest, src, l)) {
491         access_memmove(env, &desta, &srca, ra);
492     } else {
493         for (i = 0; i < l; i++) {
494             uint8_t byte = access_get_byte(env, &srca, i, ra);
495 
496             access_set_byte(env, &desta, i, byte, ra);
497         }
498     }
499 
500     return env->cc_op;
501 }
502 
503 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
504 {
505     do_helper_mvc(env, l, dest, src, GETPC());
506 }
507 
508 /* move right to left */
509 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
510 {
511     const int mmu_idx = cpu_mmu_index(env, false);
512     const uint64_t ra = GETPC();
513     S390Access srca, desta;
514     int32_t i;
515 
516     /* MVCRL always copies one more byte than specified - maximum is 256 */
517     l++;
518 
519     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
520     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
521 
522     for (i = l - 1; i >= 0; i--) {
523         uint8_t byte = access_get_byte(env, &srca, i, ra);
524         access_set_byte(env, &desta, i, byte, ra);
525     }
526 }
527 
528 /* move inverse  */
529 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
530 {
531     const int mmu_idx = cpu_mmu_index(env, false);
532     S390Access srca, desta;
533     uintptr_t ra = GETPC();
534     int i;
535 
536     /* MVCIN always copies one more byte than specified - maximum is 256 */
537     l++;
538 
539     src = wrap_address(env, src - l + 1);
540     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
541     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
542     for (i = 0; i < l; i++) {
543         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
544 
545         access_set_byte(env, &desta, i, x, ra);
546     }
547 }
548 
549 /* move numerics  */
550 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
551 {
552     const int mmu_idx = cpu_mmu_index(env, false);
553     S390Access srca1, srca2, desta;
554     uintptr_t ra = GETPC();
555     int i;
556 
557     /* MVN always copies one more byte than specified - maximum is 256 */
558     l++;
559 
560     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
561     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
562     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
563     for (i = 0; i < l; i++) {
564         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
565                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
566 
567         access_set_byte(env, &desta, i, x, ra);
568     }
569 }
570 
571 /* move with offset  */
572 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
573 {
574     const int mmu_idx = cpu_mmu_index(env, false);
575     /* MVO always processes one more byte than specified - maximum is 16 */
576     const int len_dest = (l >> 4) + 1;
577     const int len_src = (l & 0xf) + 1;
578     uintptr_t ra = GETPC();
579     uint8_t byte_dest, byte_src;
580     S390Access srca, desta;
581     int i, j;
582 
583     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
584     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
585 
586     /* Handle rightmost byte */
587     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
588     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
589     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
590     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
591 
592     /* Process remaining bytes from right to left */
593     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
594         byte_dest = byte_src >> 4;
595         if (j >= 0) {
596             byte_src = access_get_byte(env, &srca, j, ra);
597         } else {
598             byte_src = 0;
599         }
600         byte_dest |= byte_src << 4;
601         access_set_byte(env, &desta, i, byte_dest, ra);
602     }
603 }
604 
605 /* move zones  */
606 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
607 {
608     const int mmu_idx = cpu_mmu_index(env, false);
609     S390Access srca1, srca2, desta;
610     uintptr_t ra = GETPC();
611     int i;
612 
613     /* MVZ always copies one more byte than specified - maximum is 256 */
614     l++;
615 
616     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
617     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
618     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
619     for (i = 0; i < l; i++) {
620         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
621                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
622 
623         access_set_byte(env, &desta, i, x, ra);
624     }
625 }
626 
627 /* compare unsigned byte arrays */
628 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
629                               uint64_t s2, uintptr_t ra)
630 {
631     uint32_t i;
632     uint32_t cc = 0;
633 
634     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
635                __func__, l, s1, s2);
636 
637     for (i = 0; i <= l; i++) {
638         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
639         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
640         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
641         if (x < y) {
642             cc = 1;
643             break;
644         } else if (x > y) {
645             cc = 2;
646             break;
647         }
648     }
649 
650     HELPER_LOG("\n");
651     return cc;
652 }
653 
654 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
655 {
656     return do_helper_clc(env, l, s1, s2, GETPC());
657 }
658 
659 /* compare logical under mask */
660 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
661                      uint64_t addr)
662 {
663     uintptr_t ra = GETPC();
664     uint32_t cc = 0;
665 
666     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
667                mask, addr);
668 
669     while (mask) {
670         if (mask & 8) {
671             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
672             uint8_t r = extract32(r1, 24, 8);
673             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
674                        addr);
675             if (r < d) {
676                 cc = 1;
677                 break;
678             } else if (r > d) {
679                 cc = 2;
680                 break;
681             }
682             addr++;
683         }
684         mask = (mask << 1) & 0xf;
685         r1 <<= 8;
686     }
687 
688     HELPER_LOG("\n");
689     return cc;
690 }
691 
692 static inline uint64_t get_address(CPUS390XState *env, int reg)
693 {
694     return wrap_address(env, env->regs[reg]);
695 }
696 
697 /*
698  * Store the address to the given register, zeroing out unused leftmost
699  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
700  */
701 static inline void set_address_zero(CPUS390XState *env, int reg,
702                                     uint64_t address)
703 {
704     if (env->psw.mask & PSW_MASK_64) {
705         env->regs[reg] = address;
706     } else {
707         if (!(env->psw.mask & PSW_MASK_32)) {
708             address &= 0x00ffffff;
709         } else {
710             address &= 0x7fffffff;
711         }
712         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
713     }
714 }
715 
716 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
717 {
718     if (env->psw.mask & PSW_MASK_64) {
719         /* 64-Bit mode */
720         env->regs[reg] = address;
721     } else {
722         if (!(env->psw.mask & PSW_MASK_32)) {
723             /* 24-Bit mode. According to the PoO it is implementation
724             dependent if bits 32-39 remain unchanged or are set to
725             zeros.  Choose the former so that the function can also be
726             used for TRT.  */
727             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
728         } else {
729             /* 31-Bit mode. According to the PoO it is implementation
730             dependent if bit 32 remains unchanged or is set to zero.
731             Choose the latter so that the function can also be used for
732             TRT.  */
733             address &= 0x7fffffff;
734             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
735         }
736     }
737 }
738 
739 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
740 {
741     if (!(env->psw.mask & PSW_MASK_64)) {
742         return (uint32_t)length;
743     }
744     return length;
745 }
746 
747 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
748 {
749     if (!(env->psw.mask & PSW_MASK_64)) {
750         /* 24-Bit and 31-Bit mode */
751         length &= 0x7fffffff;
752     }
753     return length;
754 }
755 
756 static inline uint64_t get_length(CPUS390XState *env, int reg)
757 {
758     return wrap_length31(env, env->regs[reg]);
759 }
760 
761 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
762 {
763     if (env->psw.mask & PSW_MASK_64) {
764         /* 64-Bit mode */
765         env->regs[reg] = length;
766     } else {
767         /* 24-Bit and 31-Bit mode */
768         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
769     }
770 }
771 
772 /* search string (c is byte to search, r2 is string, r1 end of string) */
773 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
774 {
775     uintptr_t ra = GETPC();
776     uint64_t end, str;
777     uint32_t len;
778     uint8_t v, c = env->regs[0];
779 
780     /* Bits 32-55 must contain all 0.  */
781     if (env->regs[0] & 0xffffff00u) {
782         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
783     }
784 
785     str = get_address(env, r2);
786     end = get_address(env, r1);
787 
788     /* Lest we fail to service interrupts in a timely manner, limit the
789        amount of work we're willing to do.  For now, let's cap at 8k.  */
790     for (len = 0; len < 0x2000; ++len) {
791         if (str + len == end) {
792             /* Character not found.  R1 & R2 are unmodified.  */
793             env->cc_op = 2;
794             return;
795         }
796         v = cpu_ldub_data_ra(env, str + len, ra);
797         if (v == c) {
798             /* Character found.  Set R1 to the location; R2 is unmodified.  */
799             env->cc_op = 1;
800             set_address(env, r1, str + len);
801             return;
802         }
803     }
804 
805     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
806     env->cc_op = 3;
807     set_address(env, r2, str + len);
808 }
809 
810 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
811 {
812     uintptr_t ra = GETPC();
813     uint32_t len;
814     uint16_t v, c = env->regs[0];
815     uint64_t end, str, adj_end;
816 
817     /* Bits 32-47 of R0 must be zero.  */
818     if (env->regs[0] & 0xffff0000u) {
819         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
820     }
821 
822     str = get_address(env, r2);
823     end = get_address(env, r1);
824 
825     /* If the LSB of the two addresses differ, use one extra byte.  */
826     adj_end = end + ((str ^ end) & 1);
827 
828     /* Lest we fail to service interrupts in a timely manner, limit the
829        amount of work we're willing to do.  For now, let's cap at 8k.  */
830     for (len = 0; len < 0x2000; len += 2) {
831         if (str + len == adj_end) {
832             /* End of input found.  */
833             env->cc_op = 2;
834             return;
835         }
836         v = cpu_lduw_data_ra(env, str + len, ra);
837         if (v == c) {
838             /* Character found.  Set R1 to the location; R2 is unmodified.  */
839             env->cc_op = 1;
840             set_address(env, r1, str + len);
841             return;
842         }
843     }
844 
845     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
846     env->cc_op = 3;
847     set_address(env, r2, str + len);
848 }
849 
850 /* unsigned string compare (c is string terminator) */
851 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
852 {
853     uintptr_t ra = GETPC();
854     uint32_t len;
855 
856     c = c & 0xff;
857     s1 = wrap_address(env, s1);
858     s2 = wrap_address(env, s2);
859 
860     /* Lest we fail to service interrupts in a timely manner, limit the
861        amount of work we're willing to do.  For now, let's cap at 8k.  */
862     for (len = 0; len < 0x2000; ++len) {
863         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
864         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
865         if (v1 == v2) {
866             if (v1 == c) {
867                 /* Equal.  CC=0, and don't advance the registers.  */
868                 env->cc_op = 0;
869                 return int128_make128(s2, s1);
870             }
871         } else {
872             /* Unequal.  CC={1,2}, and advance the registers.  Note that
873                the terminator need not be zero, but the string that contains
874                the terminator is by definition "low".  */
875             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
876             return int128_make128(s2 + len, s1 + len);
877         }
878     }
879 
880     /* CPU-determined bytes equal; advance the registers.  */
881     env->cc_op = 3;
882     return int128_make128(s2 + len, s1 + len);
883 }
884 
885 /* move page */
886 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
887 {
888     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
889     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
890     const int mmu_idx = cpu_mmu_index(env, false);
891     const bool f = extract64(r0, 11, 1);
892     const bool s = extract64(r0, 10, 1);
893     const bool cco = extract64(r0, 8, 1);
894     uintptr_t ra = GETPC();
895     S390Access srca, desta;
896     int exc;
897 
898     if ((f && s) || extract64(r0, 12, 4)) {
899         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
900     }
901 
902     /*
903      * We always manually handle exceptions such that we can properly store
904      * r1/r2 to the lowcore on page-translation exceptions.
905      *
906      * TODO: Access key handling
907      */
908     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
909                             MMU_DATA_LOAD, mmu_idx, ra);
910     if (exc) {
911         if (cco) {
912             return 2;
913         }
914         goto inject_exc;
915     }
916     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
917                             MMU_DATA_STORE, mmu_idx, ra);
918     if (exc) {
919         if (cco && exc != PGM_PROTECTION) {
920             return 1;
921         }
922         goto inject_exc;
923     }
924     access_memmove(env, &desta, &srca, ra);
925     return 0; /* data moved */
926 inject_exc:
927 #if !defined(CONFIG_USER_ONLY)
928     if (exc != PGM_ADDRESSING) {
929         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
930                  env->tlb_fill_tec);
931     }
932     if (exc == PGM_PAGE_TRANS) {
933         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
934                  r1 << 4 | r2);
935     }
936 #endif
937     tcg_s390_program_interrupt(env, exc, ra);
938 }
939 
940 /* string copy */
941 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
942 {
943     const int mmu_idx = cpu_mmu_index(env, false);
944     const uint64_t d = get_address(env, r1);
945     const uint64_t s = get_address(env, r2);
946     const uint8_t c = env->regs[0];
947     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
948     S390Access srca, desta;
949     uintptr_t ra = GETPC();
950     int i;
951 
952     if (env->regs[0] & 0xffffff00ull) {
953         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
954     }
955 
956     /*
957      * Our access should not exceed single pages, as we must not report access
958      * exceptions exceeding the actually copied range (which we don't know at
959      * this point). We might over-indicate watchpoints within the pages
960      * (if we ever care, we have to limit processing to a single byte).
961      */
962     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
963     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
964     for (i = 0; i < len; i++) {
965         const uint8_t v = access_get_byte(env, &srca, i, ra);
966 
967         access_set_byte(env, &desta, i, v, ra);
968         if (v == c) {
969             set_address_zero(env, r1, d + i);
970             return 1;
971         }
972     }
973     set_address_zero(env, r1, d + len);
974     set_address_zero(env, r2, s + len);
975     return 3;
976 }
977 
978 /* load access registers r1 to r3 from memory at a2 */
979 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
980 {
981     uintptr_t ra = GETPC();
982     int i;
983 
984     if (a2 & 0x3) {
985         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
986     }
987 
988     for (i = r1;; i = (i + 1) % 16) {
989         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
990         a2 += 4;
991 
992         if (i == r3) {
993             break;
994         }
995     }
996 }
997 
998 /* store access registers r1 to r3 in memory at a2 */
999 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1000 {
1001     uintptr_t ra = GETPC();
1002     int i;
1003 
1004     if (a2 & 0x3) {
1005         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1006     }
1007 
1008     for (i = r1;; i = (i + 1) % 16) {
1009         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1010         a2 += 4;
1011 
1012         if (i == r3) {
1013             break;
1014         }
1015     }
1016 }
1017 
1018 /* move long helper */
1019 static inline uint32_t do_mvcl(CPUS390XState *env,
1020                                uint64_t *dest, uint64_t *destlen,
1021                                uint64_t *src, uint64_t *srclen,
1022                                uint16_t pad, int wordsize, uintptr_t ra)
1023 {
1024     const int mmu_idx = cpu_mmu_index(env, false);
1025     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1026     S390Access srca, desta;
1027     int i, cc;
1028 
1029     if (*destlen == *srclen) {
1030         cc = 0;
1031     } else if (*destlen < *srclen) {
1032         cc = 1;
1033     } else {
1034         cc = 2;
1035     }
1036 
1037     if (!*destlen) {
1038         return cc;
1039     }
1040 
1041     /*
1042      * Only perform one type of type of operation (move/pad) at a time.
1043      * Stay within single pages.
1044      */
1045     if (*srclen) {
1046         /* Copy the src array */
1047         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1048         *destlen -= len;
1049         *srclen -= len;
1050         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1051         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1052         access_memmove(env, &desta, &srca, ra);
1053         *src = wrap_address(env, *src + len);
1054         *dest = wrap_address(env, *dest + len);
1055     } else if (wordsize == 1) {
1056         /* Pad the remaining area */
1057         *destlen -= len;
1058         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1059         access_memset(env, &desta, pad, ra);
1060         *dest = wrap_address(env, *dest + len);
1061     } else {
1062         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1063 
1064         /* The remaining length selects the padding byte. */
1065         for (i = 0; i < len; (*destlen)--, i++) {
1066             if (*destlen & 1) {
1067                 access_set_byte(env, &desta, i, pad, ra);
1068             } else {
1069                 access_set_byte(env, &desta, i, pad >> 8, ra);
1070             }
1071         }
1072         *dest = wrap_address(env, *dest + len);
1073     }
1074 
1075     return *destlen ? 3 : cc;
1076 }
1077 
1078 /* move long */
1079 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1080 {
1081     const int mmu_idx = cpu_mmu_index(env, false);
1082     uintptr_t ra = GETPC();
1083     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1084     uint64_t dest = get_address(env, r1);
1085     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1086     uint64_t src = get_address(env, r2);
1087     uint8_t pad = env->regs[r2 + 1] >> 24;
1088     CPUState *cs = env_cpu(env);
1089     S390Access srca, desta;
1090     uint32_t cc, cur_len;
1091 
1092     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1093         cc = 3;
1094     } else if (srclen == destlen) {
1095         cc = 0;
1096     } else if (destlen < srclen) {
1097         cc = 1;
1098     } else {
1099         cc = 2;
1100     }
1101 
1102     /* We might have to zero-out some bits even if there was no action. */
1103     if (unlikely(!destlen || cc == 3)) {
1104         set_address_zero(env, r2, src);
1105         set_address_zero(env, r1, dest);
1106         return cc;
1107     } else if (!srclen) {
1108         set_address_zero(env, r2, src);
1109     }
1110 
1111     /*
1112      * Only perform one type of type of operation (move/pad) in one step.
1113      * Stay within single pages.
1114      */
1115     while (destlen) {
1116         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1117         if (!srclen) {
1118             access_prepare(&desta, env, dest, cur_len,
1119                            MMU_DATA_STORE, mmu_idx, ra);
1120             access_memset(env, &desta, pad, ra);
1121         } else {
1122             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1123 
1124             access_prepare(&srca, env, src, cur_len,
1125                            MMU_DATA_LOAD, mmu_idx, ra);
1126             access_prepare(&desta, env, dest, cur_len,
1127                            MMU_DATA_STORE, mmu_idx, ra);
1128             access_memmove(env, &desta, &srca, ra);
1129             src = wrap_address(env, src + cur_len);
1130             srclen -= cur_len;
1131             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1132             set_address_zero(env, r2, src);
1133         }
1134         dest = wrap_address(env, dest + cur_len);
1135         destlen -= cur_len;
1136         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1137         set_address_zero(env, r1, dest);
1138 
1139         /*
1140          * MVCL is interruptible. Return to the main loop if requested after
1141          * writing back all state to registers. If no interrupt will get
1142          * injected, we'll end up back in this handler and continue processing
1143          * the remaining parts.
1144          */
1145         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1146             cpu_loop_exit_restore(cs, ra);
1147         }
1148     }
1149     return cc;
1150 }
1151 
1152 /* move long extended */
1153 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1154                        uint32_t r3)
1155 {
1156     uintptr_t ra = GETPC();
1157     uint64_t destlen = get_length(env, r1 + 1);
1158     uint64_t dest = get_address(env, r1);
1159     uint64_t srclen = get_length(env, r3 + 1);
1160     uint64_t src = get_address(env, r3);
1161     uint8_t pad = a2;
1162     uint32_t cc;
1163 
1164     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1165 
1166     set_length(env, r1 + 1, destlen);
1167     set_length(env, r3 + 1, srclen);
1168     set_address(env, r1, dest);
1169     set_address(env, r3, src);
1170 
1171     return cc;
1172 }
1173 
1174 /* move long unicode */
1175 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1176                        uint32_t r3)
1177 {
1178     uintptr_t ra = GETPC();
1179     uint64_t destlen = get_length(env, r1 + 1);
1180     uint64_t dest = get_address(env, r1);
1181     uint64_t srclen = get_length(env, r3 + 1);
1182     uint64_t src = get_address(env, r3);
1183     uint16_t pad = a2;
1184     uint32_t cc;
1185 
1186     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1187 
1188     set_length(env, r1 + 1, destlen);
1189     set_length(env, r3 + 1, srclen);
1190     set_address(env, r1, dest);
1191     set_address(env, r3, src);
1192 
1193     return cc;
1194 }
1195 
1196 /* compare logical long helper */
1197 static inline uint32_t do_clcl(CPUS390XState *env,
1198                                uint64_t *src1, uint64_t *src1len,
1199                                uint64_t *src3, uint64_t *src3len,
1200                                uint16_t pad, uint64_t limit,
1201                                int wordsize, uintptr_t ra)
1202 {
1203     uint64_t len = MAX(*src1len, *src3len);
1204     uint32_t cc = 0;
1205 
1206     check_alignment(env, *src1len | *src3len, wordsize, ra);
1207 
1208     if (!len) {
1209         return cc;
1210     }
1211 
1212     /* Lest we fail to service interrupts in a timely manner, limit the
1213        amount of work we're willing to do.  */
1214     if (len > limit) {
1215         len = limit;
1216         cc = 3;
1217     }
1218 
1219     for (; len; len -= wordsize) {
1220         uint16_t v1 = pad;
1221         uint16_t v3 = pad;
1222 
1223         if (*src1len) {
1224             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1225         }
1226         if (*src3len) {
1227             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1228         }
1229 
1230         if (v1 != v3) {
1231             cc = (v1 < v3) ? 1 : 2;
1232             break;
1233         }
1234 
1235         if (*src1len) {
1236             *src1 += wordsize;
1237             *src1len -= wordsize;
1238         }
1239         if (*src3len) {
1240             *src3 += wordsize;
1241             *src3len -= wordsize;
1242         }
1243     }
1244 
1245     return cc;
1246 }
1247 
1248 
1249 /* compare logical long */
1250 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1251 {
1252     uintptr_t ra = GETPC();
1253     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1254     uint64_t src1 = get_address(env, r1);
1255     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1256     uint64_t src3 = get_address(env, r2);
1257     uint8_t pad = env->regs[r2 + 1] >> 24;
1258     uint32_t cc;
1259 
1260     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1261 
1262     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1263     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1264     set_address(env, r1, src1);
1265     set_address(env, r2, src3);
1266 
1267     return cc;
1268 }
1269 
1270 /* compare logical long extended memcompare insn with padding */
1271 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1272                        uint32_t r3)
1273 {
1274     uintptr_t ra = GETPC();
1275     uint64_t src1len = get_length(env, r1 + 1);
1276     uint64_t src1 = get_address(env, r1);
1277     uint64_t src3len = get_length(env, r3 + 1);
1278     uint64_t src3 = get_address(env, r3);
1279     uint8_t pad = a2;
1280     uint32_t cc;
1281 
1282     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1283 
1284     set_length(env, r1 + 1, src1len);
1285     set_length(env, r3 + 1, src3len);
1286     set_address(env, r1, src1);
1287     set_address(env, r3, src3);
1288 
1289     return cc;
1290 }
1291 
1292 /* compare logical long unicode memcompare insn with padding */
1293 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1294                        uint32_t r3)
1295 {
1296     uintptr_t ra = GETPC();
1297     uint64_t src1len = get_length(env, r1 + 1);
1298     uint64_t src1 = get_address(env, r1);
1299     uint64_t src3len = get_length(env, r3 + 1);
1300     uint64_t src3 = get_address(env, r3);
1301     uint16_t pad = a2;
1302     uint32_t cc = 0;
1303 
1304     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1305 
1306     set_length(env, r1 + 1, src1len);
1307     set_length(env, r3 + 1, src3len);
1308     set_address(env, r1, src1);
1309     set_address(env, r3, src3);
1310 
1311     return cc;
1312 }
1313 
1314 /* checksum */
1315 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1316                     uint64_t src, uint64_t src_len)
1317 {
1318     uintptr_t ra = GETPC();
1319     uint64_t max_len, len;
1320     uint64_t cksm = (uint32_t)r1;
1321 
1322     /* Lest we fail to service interrupts in a timely manner, limit the
1323        amount of work we're willing to do.  For now, let's cap at 8k.  */
1324     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1325 
1326     /* Process full words as available.  */
1327     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1328         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1329     }
1330 
1331     switch (max_len - len) {
1332     case 1:
1333         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1334         len += 1;
1335         break;
1336     case 2:
1337         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1338         len += 2;
1339         break;
1340     case 3:
1341         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1342         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1343         len += 3;
1344         break;
1345     }
1346 
1347     /* Fold the carry from the checksum.  Note that we can see carry-out
1348        during folding more than once (but probably not more than twice).  */
1349     while (cksm > 0xffffffffull) {
1350         cksm = (uint32_t)cksm + (cksm >> 32);
1351     }
1352 
1353     /* Indicate whether or not we've processed everything.  */
1354     env->cc_op = (len == src_len ? 0 : 3);
1355 
1356     /* Return both cksm and processed length.  */
1357     return int128_make128(cksm, len);
1358 }
1359 
1360 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1361 {
1362     uintptr_t ra = GETPC();
1363     int len_dest = len >> 4;
1364     int len_src = len & 0xf;
1365     uint8_t b;
1366 
1367     dest += len_dest;
1368     src += len_src;
1369 
1370     /* last byte is special, it only flips the nibbles */
1371     b = cpu_ldub_data_ra(env, src, ra);
1372     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1373     src--;
1374     len_src--;
1375 
1376     /* now pack every value */
1377     while (len_dest > 0) {
1378         b = 0;
1379 
1380         if (len_src >= 0) {
1381             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1382             src--;
1383             len_src--;
1384         }
1385         if (len_src >= 0) {
1386             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1387             src--;
1388             len_src--;
1389         }
1390 
1391         len_dest--;
1392         dest--;
1393         cpu_stb_data_ra(env, dest, b, ra);
1394     }
1395 }
1396 
1397 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1398                            uint32_t srclen, int ssize, uintptr_t ra)
1399 {
1400     int i;
1401     /* The destination operand is always 16 bytes long.  */
1402     const int destlen = 16;
1403 
1404     /* The operands are processed from right to left.  */
1405     src += srclen - 1;
1406     dest += destlen - 1;
1407 
1408     for (i = 0; i < destlen; i++) {
1409         uint8_t b = 0;
1410 
1411         /* Start with a positive sign */
1412         if (i == 0) {
1413             b = 0xc;
1414         } else if (srclen > ssize) {
1415             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1416             src -= ssize;
1417             srclen -= ssize;
1418         }
1419 
1420         if (srclen > ssize) {
1421             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1422             src -= ssize;
1423             srclen -= ssize;
1424         }
1425 
1426         cpu_stb_data_ra(env, dest, b, ra);
1427         dest--;
1428     }
1429 }
1430 
1431 
1432 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1433                  uint32_t srclen)
1434 {
1435     do_pkau(env, dest, src, srclen, 1, GETPC());
1436 }
1437 
1438 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1439                  uint32_t srclen)
1440 {
1441     do_pkau(env, dest, src, srclen, 2, GETPC());
1442 }
1443 
1444 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1445                   uint64_t src)
1446 {
1447     uintptr_t ra = GETPC();
1448     int len_dest = len >> 4;
1449     int len_src = len & 0xf;
1450     uint8_t b;
1451     int second_nibble = 0;
1452 
1453     dest += len_dest;
1454     src += len_src;
1455 
1456     /* last byte is special, it only flips the nibbles */
1457     b = cpu_ldub_data_ra(env, src, ra);
1458     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1459     src--;
1460     len_src--;
1461 
1462     /* now pad every nibble with 0xf0 */
1463 
1464     while (len_dest > 0) {
1465         uint8_t cur_byte = 0;
1466 
1467         if (len_src > 0) {
1468             cur_byte = cpu_ldub_data_ra(env, src, ra);
1469         }
1470 
1471         len_dest--;
1472         dest--;
1473 
1474         /* only advance one nibble at a time */
1475         if (second_nibble) {
1476             cur_byte >>= 4;
1477             len_src--;
1478             src--;
1479         }
1480         second_nibble = !second_nibble;
1481 
1482         /* digit */
1483         cur_byte = (cur_byte & 0xf);
1484         /* zone bits */
1485         cur_byte |= 0xf0;
1486 
1487         cpu_stb_data_ra(env, dest, cur_byte, ra);
1488     }
1489 }
1490 
1491 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1492                                  uint32_t destlen, int dsize, uint64_t src,
1493                                  uintptr_t ra)
1494 {
1495     int i;
1496     uint32_t cc;
1497     uint8_t b;
1498     /* The source operand is always 16 bytes long.  */
1499     const int srclen = 16;
1500 
1501     /* The operands are processed from right to left.  */
1502     src += srclen - 1;
1503     dest += destlen - dsize;
1504 
1505     /* Check for the sign.  */
1506     b = cpu_ldub_data_ra(env, src, ra);
1507     src--;
1508     switch (b & 0xf) {
1509     case 0xa:
1510     case 0xc:
1511     case 0xe ... 0xf:
1512         cc = 0;  /* plus */
1513         break;
1514     case 0xb:
1515     case 0xd:
1516         cc = 1;  /* minus */
1517         break;
1518     default:
1519     case 0x0 ... 0x9:
1520         cc = 3;  /* invalid */
1521         break;
1522     }
1523 
1524     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1525     for (i = 0; i < destlen; i += dsize) {
1526         if (i == (31 * dsize)) {
1527             /* If length is 32/64 bytes, the leftmost byte is 0. */
1528             b = 0;
1529         } else if (i % (2 * dsize)) {
1530             b = cpu_ldub_data_ra(env, src, ra);
1531             src--;
1532         } else {
1533             b >>= 4;
1534         }
1535         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1536         dest -= dsize;
1537     }
1538 
1539     return cc;
1540 }
1541 
1542 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1543                        uint64_t src)
1544 {
1545     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1546 }
1547 
1548 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1549                        uint64_t src)
1550 {
1551     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1552 }
1553 
1554 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1555 {
1556     uintptr_t ra = GETPC();
1557     uint32_t cc = 0;
1558     int i;
1559 
1560     for (i = 0; i < destlen; i++) {
1561         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1562         /* digit */
1563         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1564 
1565         if (i == (destlen - 1)) {
1566             /* sign */
1567             cc |= (b & 0xf) < 0xa ? 1 : 0;
1568         } else {
1569             /* digit */
1570             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1571         }
1572     }
1573 
1574     return cc;
1575 }
1576 
1577 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1578                              uint64_t trans, uintptr_t ra)
1579 {
1580     uint32_t i;
1581 
1582     for (i = 0; i <= len; i++) {
1583         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1584         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1585         cpu_stb_data_ra(env, array + i, new_byte, ra);
1586     }
1587 
1588     return env->cc_op;
1589 }
1590 
1591 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1592                 uint64_t trans)
1593 {
1594     do_helper_tr(env, len, array, trans, GETPC());
1595 }
1596 
1597 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1598                    uint64_t len, uint64_t trans)
1599 {
1600     uintptr_t ra = GETPC();
1601     uint8_t end = env->regs[0] & 0xff;
1602     uint64_t l = len;
1603     uint64_t i;
1604     uint32_t cc = 0;
1605 
1606     if (!(env->psw.mask & PSW_MASK_64)) {
1607         array &= 0x7fffffff;
1608         l = (uint32_t)l;
1609     }
1610 
1611     /* Lest we fail to service interrupts in a timely manner, limit the
1612        amount of work we're willing to do.  For now, let's cap at 8k.  */
1613     if (l > 0x2000) {
1614         l = 0x2000;
1615         cc = 3;
1616     }
1617 
1618     for (i = 0; i < l; i++) {
1619         uint8_t byte, new_byte;
1620 
1621         byte = cpu_ldub_data_ra(env, array + i, ra);
1622 
1623         if (byte == end) {
1624             cc = 1;
1625             break;
1626         }
1627 
1628         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1629         cpu_stb_data_ra(env, array + i, new_byte, ra);
1630     }
1631 
1632     env->cc_op = cc;
1633     return int128_make128(len - i, array + i);
1634 }
1635 
1636 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1637                                      uint64_t array, uint64_t trans,
1638                                      int inc, uintptr_t ra)
1639 {
1640     int i;
1641 
1642     for (i = 0; i <= len; i++) {
1643         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1644         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1645 
1646         if (sbyte != 0) {
1647             set_address(env, 1, array + i * inc);
1648             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1649             return (i == len) ? 2 : 1;
1650         }
1651     }
1652 
1653     return 0;
1654 }
1655 
1656 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1657                                   uint64_t array, uint64_t trans,
1658                                   uintptr_t ra)
1659 {
1660     return do_helper_trt(env, len, array, trans, 1, ra);
1661 }
1662 
1663 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1664                      uint64_t trans)
1665 {
1666     return do_helper_trt(env, len, array, trans, 1, GETPC());
1667 }
1668 
1669 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1670                                    uint64_t array, uint64_t trans,
1671                                    uintptr_t ra)
1672 {
1673     return do_helper_trt(env, len, array, trans, -1, ra);
1674 }
1675 
1676 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1677                       uint64_t trans)
1678 {
1679     return do_helper_trt(env, len, array, trans, -1, GETPC());
1680 }
1681 
1682 /* Translate one/two to one/two */
1683 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1684                       uint32_t tst, uint32_t sizes)
1685 {
1686     uintptr_t ra = GETPC();
1687     int dsize = (sizes & 1) ? 1 : 2;
1688     int ssize = (sizes & 2) ? 1 : 2;
1689     uint64_t tbl = get_address(env, 1);
1690     uint64_t dst = get_address(env, r1);
1691     uint64_t len = get_length(env, r1 + 1);
1692     uint64_t src = get_address(env, r2);
1693     uint32_t cc = 3;
1694     int i;
1695 
1696     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1697        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1698        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1699     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1700         tbl &= -4096;
1701     } else {
1702         tbl &= -8;
1703     }
1704 
1705     check_alignment(env, len, ssize, ra);
1706 
1707     /* Lest we fail to service interrupts in a timely manner, */
1708     /* limit the amount of work we're willing to do.   */
1709     for (i = 0; i < 0x2000; i++) {
1710         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1711         uint64_t tble = tbl + (sval * dsize);
1712         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1713         if (dval == tst) {
1714             cc = 1;
1715             break;
1716         }
1717         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1718 
1719         len -= ssize;
1720         src += ssize;
1721         dst += dsize;
1722 
1723         if (len == 0) {
1724             cc = 0;
1725             break;
1726         }
1727     }
1728 
1729     set_address(env, r1, dst);
1730     set_length(env, r1 + 1, len);
1731     set_address(env, r2, src);
1732 
1733     return cc;
1734 }
1735 
1736 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1737                         uint64_t a2, bool parallel)
1738 {
1739     uint32_t mem_idx = cpu_mmu_index(env, false);
1740     uintptr_t ra = GETPC();
1741     uint32_t fc = extract32(env->regs[0], 0, 8);
1742     uint32_t sc = extract32(env->regs[0], 8, 8);
1743     uint64_t pl = get_address(env, 1) & -16;
1744     uint64_t svh, svl;
1745     uint32_t cc;
1746 
1747     /* Sanity check the function code and storage characteristic.  */
1748     if (fc > 1 || sc > 3) {
1749         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1750             goto spec_exception;
1751         }
1752         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1753             goto spec_exception;
1754         }
1755     }
1756 
1757     /* Sanity check the alignments.  */
1758     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1759         goto spec_exception;
1760     }
1761 
1762     /* Sanity check writability of the store address.  */
1763     probe_write(env, a2, 1 << sc, mem_idx, ra);
1764 
1765     /*
1766      * Note that the compare-and-swap is atomic, and the store is atomic,
1767      * but the complete operation is not.  Therefore we do not need to
1768      * assert serial context in order to implement this.  That said,
1769      * restart early if we can't support either operation that is supposed
1770      * to be atomic.
1771      */
1772     if (parallel) {
1773         uint32_t max = 2;
1774 #ifdef CONFIG_ATOMIC64
1775         max = 3;
1776 #endif
1777         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1778             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1779             cpu_loop_exit_atomic(env_cpu(env), ra);
1780         }
1781     }
1782 
1783     /* All loads happen before all stores.  For simplicity, load the entire
1784        store value area from the parameter list.  */
1785     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1786     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1787 
1788     switch (fc) {
1789     case 0:
1790         {
1791             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1792             uint32_t cv = env->regs[r3];
1793             uint32_t ov;
1794 
1795             if (parallel) {
1796 #ifdef CONFIG_USER_ONLY
1797                 uint32_t *haddr = g2h(env_cpu(env), a1);
1798                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1799 #else
1800                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1801                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1802 #endif
1803             } else {
1804                 ov = cpu_ldl_data_ra(env, a1, ra);
1805                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1806             }
1807             cc = (ov != cv);
1808             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1809         }
1810         break;
1811 
1812     case 1:
1813         {
1814             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1815             uint64_t cv = env->regs[r3];
1816             uint64_t ov;
1817 
1818             if (parallel) {
1819 #ifdef CONFIG_ATOMIC64
1820                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1821                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1822 #else
1823                 /* Note that we asserted !parallel above.  */
1824                 g_assert_not_reached();
1825 #endif
1826             } else {
1827                 ov = cpu_ldq_data_ra(env, a1, ra);
1828                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1829             }
1830             cc = (ov != cv);
1831             env->regs[r3] = ov;
1832         }
1833         break;
1834 
1835     case 2:
1836         {
1837             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1838             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1839             Int128 nv = int128_make128(nvl, nvh);
1840             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1841             Int128 ov;
1842 
1843             if (!parallel) {
1844                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1845                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1846 
1847                 ov = int128_make128(ol, oh);
1848                 cc = !int128_eq(ov, cv);
1849                 if (cc) {
1850                     nv = ov;
1851                 }
1852 
1853                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1854                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1855             } else if (HAVE_CMPXCHG128) {
1856                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1857                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1858                 cc = !int128_eq(ov, cv);
1859             } else {
1860                 /* Note that we asserted !parallel above.  */
1861                 g_assert_not_reached();
1862             }
1863 
1864             env->regs[r3 + 0] = int128_gethi(ov);
1865             env->regs[r3 + 1] = int128_getlo(ov);
1866         }
1867         break;
1868 
1869     default:
1870         g_assert_not_reached();
1871     }
1872 
1873     /* Store only if the comparison succeeded.  Note that above we use a pair
1874        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1875        from the most-significant bits of svh.  */
1876     if (cc == 0) {
1877         switch (sc) {
1878         case 0:
1879             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1880             break;
1881         case 1:
1882             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1883             break;
1884         case 2:
1885             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1886             break;
1887         case 3:
1888             cpu_stq_data_ra(env, a2, svh, ra);
1889             break;
1890         case 4:
1891             if (!parallel) {
1892                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1893                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1894             } else if (HAVE_ATOMIC128) {
1895                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1896                 Int128 sv = int128_make128(svl, svh);
1897                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1898             } else {
1899                 /* Note that we asserted !parallel above.  */
1900                 g_assert_not_reached();
1901             }
1902             break;
1903         default:
1904             g_assert_not_reached();
1905         }
1906     }
1907 
1908     return cc;
1909 
1910  spec_exception:
1911     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1912 }
1913 
1914 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1915 {
1916     return do_csst(env, r3, a1, a2, false);
1917 }
1918 
1919 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1920                                uint64_t a2)
1921 {
1922     return do_csst(env, r3, a1, a2, true);
1923 }
1924 
1925 #if !defined(CONFIG_USER_ONLY)
1926 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1927 {
1928     uintptr_t ra = GETPC();
1929     bool PERchanged = false;
1930     uint64_t src = a2;
1931     uint32_t i;
1932 
1933     if (src & 0x7) {
1934         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1935     }
1936 
1937     for (i = r1;; i = (i + 1) % 16) {
1938         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1939         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1940             PERchanged = true;
1941         }
1942         env->cregs[i] = val;
1943         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1944                    i, src, val);
1945         src += sizeof(uint64_t);
1946 
1947         if (i == r3) {
1948             break;
1949         }
1950     }
1951 
1952     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1953         s390_cpu_recompute_watchpoints(env_cpu(env));
1954     }
1955 
1956     tlb_flush(env_cpu(env));
1957 }
1958 
1959 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1960 {
1961     uintptr_t ra = GETPC();
1962     bool PERchanged = false;
1963     uint64_t src = a2;
1964     uint32_t i;
1965 
1966     if (src & 0x3) {
1967         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1968     }
1969 
1970     for (i = r1;; i = (i + 1) % 16) {
1971         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1972         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1973             PERchanged = true;
1974         }
1975         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1976         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1977         src += sizeof(uint32_t);
1978 
1979         if (i == r3) {
1980             break;
1981         }
1982     }
1983 
1984     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1985         s390_cpu_recompute_watchpoints(env_cpu(env));
1986     }
1987 
1988     tlb_flush(env_cpu(env));
1989 }
1990 
1991 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1992 {
1993     uintptr_t ra = GETPC();
1994     uint64_t dest = a2;
1995     uint32_t i;
1996 
1997     if (dest & 0x7) {
1998         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1999     }
2000 
2001     for (i = r1;; i = (i + 1) % 16) {
2002         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2003         dest += sizeof(uint64_t);
2004 
2005         if (i == r3) {
2006             break;
2007         }
2008     }
2009 }
2010 
2011 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2012 {
2013     uintptr_t ra = GETPC();
2014     uint64_t dest = a2;
2015     uint32_t i;
2016 
2017     if (dest & 0x3) {
2018         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2019     }
2020 
2021     for (i = r1;; i = (i + 1) % 16) {
2022         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2023         dest += sizeof(uint32_t);
2024 
2025         if (i == r3) {
2026             break;
2027         }
2028     }
2029 }
2030 
2031 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2032 {
2033     uintptr_t ra = GETPC();
2034     int i;
2035 
2036     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2037 
2038     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2039         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2040     }
2041 
2042     return 0;
2043 }
2044 
2045 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2046 {
2047     S390CPU *cpu = env_archcpu(env);
2048     CPUState *cs = env_cpu(env);
2049 
2050     /*
2051      * TODO: we currently don't handle all access protection types
2052      * (including access-list and key-controlled) as well as AR mode.
2053      */
2054     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2055         /* Fetching permitted; storing permitted */
2056         return 0;
2057     }
2058 
2059     if (env->int_pgm_code == PGM_PROTECTION) {
2060         /* retry if reading is possible */
2061         cs->exception_index = -1;
2062         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2063             /* Fetching permitted; storing not permitted */
2064             return 1;
2065         }
2066     }
2067 
2068     switch (env->int_pgm_code) {
2069     case PGM_PROTECTION:
2070         /* Fetching not permitted; storing not permitted */
2071         cs->exception_index = -1;
2072         return 2;
2073     case PGM_ADDRESSING:
2074     case PGM_TRANS_SPEC:
2075         /* exceptions forwarded to the guest */
2076         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2077         return 0;
2078     }
2079 
2080     /* Translation not available */
2081     cs->exception_index = -1;
2082     return 3;
2083 }
2084 
2085 /* insert storage key extended */
2086 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2087 {
2088     static S390SKeysState *ss;
2089     static S390SKeysClass *skeyclass;
2090     uint64_t addr = wrap_address(env, r2);
2091     uint8_t key;
2092     int rc;
2093 
2094     addr = mmu_real2abs(env, addr);
2095     if (!mmu_absolute_addr_valid(addr, false)) {
2096         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2097     }
2098 
2099     if (unlikely(!ss)) {
2100         ss = s390_get_skeys_device();
2101         skeyclass = S390_SKEYS_GET_CLASS(ss);
2102         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2103             tlb_flush_all_cpus_synced(env_cpu(env));
2104         }
2105     }
2106 
2107     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2108     if (rc) {
2109         trace_get_skeys_nonzero(rc);
2110         return 0;
2111     }
2112     return key;
2113 }
2114 
2115 /* set storage key extended */
2116 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2117 {
2118     static S390SKeysState *ss;
2119     static S390SKeysClass *skeyclass;
2120     uint64_t addr = wrap_address(env, r2);
2121     uint8_t key;
2122     int rc;
2123 
2124     addr = mmu_real2abs(env, addr);
2125     if (!mmu_absolute_addr_valid(addr, false)) {
2126         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2127     }
2128 
2129     if (unlikely(!ss)) {
2130         ss = s390_get_skeys_device();
2131         skeyclass = S390_SKEYS_GET_CLASS(ss);
2132         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2133             tlb_flush_all_cpus_synced(env_cpu(env));
2134         }
2135     }
2136 
2137     key = r1 & 0xfe;
2138     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2139     if (rc) {
2140         trace_set_skeys_nonzero(rc);
2141     }
2142    /*
2143     * As we can only flush by virtual address and not all the entries
2144     * that point to a physical address we have to flush the whole TLB.
2145     */
2146     tlb_flush_all_cpus_synced(env_cpu(env));
2147 }
2148 
2149 /* reset reference bit extended */
2150 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2151 {
2152     uint64_t addr = wrap_address(env, r2);
2153     static S390SKeysState *ss;
2154     static S390SKeysClass *skeyclass;
2155     uint8_t re, key;
2156     int rc;
2157 
2158     addr = mmu_real2abs(env, addr);
2159     if (!mmu_absolute_addr_valid(addr, false)) {
2160         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2161     }
2162 
2163     if (unlikely(!ss)) {
2164         ss = s390_get_skeys_device();
2165         skeyclass = S390_SKEYS_GET_CLASS(ss);
2166         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2167             tlb_flush_all_cpus_synced(env_cpu(env));
2168         }
2169     }
2170 
2171     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2172     if (rc) {
2173         trace_get_skeys_nonzero(rc);
2174         return 0;
2175     }
2176 
2177     re = key & (SK_R | SK_C);
2178     key &= ~SK_R;
2179 
2180     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2181     if (rc) {
2182         trace_set_skeys_nonzero(rc);
2183         return 0;
2184     }
2185    /*
2186     * As we can only flush by virtual address and not all the entries
2187     * that point to a physical address we have to flush the whole TLB.
2188     */
2189     tlb_flush_all_cpus_synced(env_cpu(env));
2190 
2191     /*
2192      * cc
2193      *
2194      * 0  Reference bit zero; change bit zero
2195      * 1  Reference bit zero; change bit one
2196      * 2  Reference bit one; change bit zero
2197      * 3  Reference bit one; change bit one
2198      */
2199 
2200     return re >> 1;
2201 }
2202 
2203 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2204                       uint64_t key)
2205 {
2206     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2207     S390Access srca, desta;
2208     uintptr_t ra = GETPC();
2209     int cc = 0;
2210 
2211     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2212                __func__, l, a1, a2);
2213 
2214     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2215         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2216         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2217     }
2218 
2219     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2220         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2221     }
2222 
2223     l = wrap_length32(env, l);
2224     if (l > 256) {
2225         /* max 256 */
2226         l = 256;
2227         cc = 3;
2228     } else if (!l) {
2229         return cc;
2230     }
2231 
2232     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2233     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2234     access_memmove(env, &desta, &srca, ra);
2235     return cc;
2236 }
2237 
2238 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2239                       uint64_t key)
2240 {
2241     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2242     S390Access srca, desta;
2243     uintptr_t ra = GETPC();
2244     int cc = 0;
2245 
2246     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2247                __func__, l, a1, a2);
2248 
2249     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2250         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2251         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2252     }
2253 
2254     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2255         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2256     }
2257 
2258     l = wrap_length32(env, l);
2259     if (l > 256) {
2260         /* max 256 */
2261         l = 256;
2262         cc = 3;
2263     } else if (!l) {
2264         return cc;
2265     }
2266     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2267     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2268     access_memmove(env, &desta, &srca, ra);
2269     return cc;
2270 }
2271 
2272 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2273 {
2274     CPUState *cs = env_cpu(env);
2275     const uintptr_t ra = GETPC();
2276     uint64_t table, entry, raddr;
2277     uint16_t entries, i, index = 0;
2278 
2279     if (r2 & 0xff000) {
2280         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2281     }
2282 
2283     if (!(r2 & 0x800)) {
2284         /* invalidation-and-clearing operation */
2285         table = r1 & ASCE_ORIGIN;
2286         entries = (r2 & 0x7ff) + 1;
2287 
2288         switch (r1 & ASCE_TYPE_MASK) {
2289         case ASCE_TYPE_REGION1:
2290             index = (r2 >> 53) & 0x7ff;
2291             break;
2292         case ASCE_TYPE_REGION2:
2293             index = (r2 >> 42) & 0x7ff;
2294             break;
2295         case ASCE_TYPE_REGION3:
2296             index = (r2 >> 31) & 0x7ff;
2297             break;
2298         case ASCE_TYPE_SEGMENT:
2299             index = (r2 >> 20) & 0x7ff;
2300             break;
2301         }
2302         for (i = 0; i < entries; i++) {
2303             /* addresses are not wrapped in 24/31bit mode but table index is */
2304             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2305             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2306             if (!(entry & REGION_ENTRY_I)) {
2307                 /* we are allowed to not store if already invalid */
2308                 entry |= REGION_ENTRY_I;
2309                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2310             }
2311         }
2312     }
2313 
2314     /* We simply flush the complete tlb, therefore we can ignore r3. */
2315     if (m4 & 1) {
2316         tlb_flush(cs);
2317     } else {
2318         tlb_flush_all_cpus_synced(cs);
2319     }
2320 }
2321 
2322 /* invalidate pte */
2323 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2324                   uint32_t m4)
2325 {
2326     CPUState *cs = env_cpu(env);
2327     const uintptr_t ra = GETPC();
2328     uint64_t page = vaddr & TARGET_PAGE_MASK;
2329     uint64_t pte_addr, pte;
2330 
2331     /* Compute the page table entry address */
2332     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2333     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2334 
2335     /* Mark the page table entry as invalid */
2336     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2337     pte |= PAGE_ENTRY_I;
2338     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2339 
2340     /* XXX we exploit the fact that Linux passes the exact virtual
2341        address here - it's not obliged to! */
2342     if (m4 & 1) {
2343         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2344             tlb_flush_page(cs, page);
2345             /* XXX 31-bit hack */
2346             tlb_flush_page(cs, page ^ 0x80000000);
2347         } else {
2348             /* looks like we don't have a valid virtual address */
2349             tlb_flush(cs);
2350         }
2351     } else {
2352         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2353             tlb_flush_page_all_cpus_synced(cs, page);
2354             /* XXX 31-bit hack */
2355             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2356         } else {
2357             /* looks like we don't have a valid virtual address */
2358             tlb_flush_all_cpus_synced(cs);
2359         }
2360     }
2361 }
2362 
2363 /* flush local tlb */
2364 void HELPER(ptlb)(CPUS390XState *env)
2365 {
2366     tlb_flush(env_cpu(env));
2367 }
2368 
2369 /* flush global tlb */
2370 void HELPER(purge)(CPUS390XState *env)
2371 {
2372     tlb_flush_all_cpus_synced(env_cpu(env));
2373 }
2374 
2375 /* load real address */
2376 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2377 {
2378     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2379     uint64_t ret, tec;
2380     int flags, exc, cc;
2381 
2382     /* XXX incomplete - has more corner cases */
2383     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2384         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2385     }
2386 
2387     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2388     if (exc) {
2389         cc = 3;
2390         ret = exc | 0x80000000;
2391     } else {
2392         cc = 0;
2393         ret |= addr & ~TARGET_PAGE_MASK;
2394     }
2395 
2396     env->cc_op = cc;
2397     return ret;
2398 }
2399 #endif
2400 
2401 /* load pair from quadword */
2402 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2403 {
2404     uintptr_t ra = GETPC();
2405     uint64_t hi, lo;
2406 
2407     check_alignment(env, addr, 16, ra);
2408     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2409     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2410 
2411     env->retxl = lo;
2412     return hi;
2413 }
2414 
2415 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2416 {
2417     uintptr_t ra = GETPC();
2418     uint64_t hi, lo;
2419     int mem_idx;
2420     MemOpIdx oi;
2421     Int128 v;
2422 
2423     assert(HAVE_ATOMIC128);
2424 
2425     mem_idx = cpu_mmu_index(env, false);
2426     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2427     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2428     hi = int128_gethi(v);
2429     lo = int128_getlo(v);
2430 
2431     env->retxl = lo;
2432     return hi;
2433 }
2434 
2435 /* store pair to quadword */
2436 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2437                   uint64_t low, uint64_t high)
2438 {
2439     uintptr_t ra = GETPC();
2440 
2441     check_alignment(env, addr, 16, ra);
2442     cpu_stq_data_ra(env, addr + 0, high, ra);
2443     cpu_stq_data_ra(env, addr + 8, low, ra);
2444 }
2445 
2446 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2447                            uint64_t low, uint64_t high)
2448 {
2449     uintptr_t ra = GETPC();
2450     int mem_idx;
2451     MemOpIdx oi;
2452     Int128 v;
2453 
2454     assert(HAVE_ATOMIC128);
2455 
2456     mem_idx = cpu_mmu_index(env, false);
2457     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2458     v = int128_make128(low, high);
2459     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2460 }
2461 
2462 /* Execute instruction.  This instruction executes an insn modified with
2463    the contents of r1.  It does not change the executed instruction in memory;
2464    it does not change the program counter.
2465 
2466    Perform this by recording the modified instruction in env->ex_value.
2467    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2468 */
2469 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2470 {
2471     uint64_t insn;
2472     uint8_t opc;
2473 
2474     /* EXECUTE targets must be at even addresses.  */
2475     if (addr & 1) {
2476         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2477     }
2478 
2479     insn = cpu_lduw_code(env, addr);
2480     opc = insn >> 8;
2481 
2482     /* Or in the contents of R1[56:63].  */
2483     insn |= r1 & 0xff;
2484 
2485     /* Load the rest of the instruction.  */
2486     insn <<= 48;
2487     switch (get_ilen(opc)) {
2488     case 2:
2489         break;
2490     case 4:
2491         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2492         break;
2493     case 6:
2494         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2495         break;
2496     default:
2497         g_assert_not_reached();
2498     }
2499 
2500     /* The very most common cases can be sped up by avoiding a new TB.  */
2501     if ((opc & 0xf0) == 0xd0) {
2502         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2503                                       uint64_t, uintptr_t);
2504         static const dx_helper dx[16] = {
2505             [0x0] = do_helper_trt_bkwd,
2506             [0x2] = do_helper_mvc,
2507             [0x4] = do_helper_nc,
2508             [0x5] = do_helper_clc,
2509             [0x6] = do_helper_oc,
2510             [0x7] = do_helper_xc,
2511             [0xc] = do_helper_tr,
2512             [0xd] = do_helper_trt_fwd,
2513         };
2514         dx_helper helper = dx[opc & 0xf];
2515 
2516         if (helper) {
2517             uint32_t l = extract64(insn, 48, 8);
2518             uint32_t b1 = extract64(insn, 44, 4);
2519             uint32_t d1 = extract64(insn, 32, 12);
2520             uint32_t b2 = extract64(insn, 28, 4);
2521             uint32_t d2 = extract64(insn, 16, 12);
2522             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2523             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2524 
2525             env->cc_op = helper(env, l, a1, a2, 0);
2526             env->psw.addr += ilen;
2527             return;
2528         }
2529     } else if (opc == 0x0a) {
2530         env->int_svc_code = extract64(insn, 48, 8);
2531         env->int_svc_ilen = ilen;
2532         helper_exception(env, EXCP_SVC);
2533         g_assert_not_reached();
2534     }
2535 
2536     /* Record the insn we want to execute as well as the ilen to use
2537        during the execution of the target insn.  This will also ensure
2538        that ex_value is non-zero, which flags that we are in a state
2539        that requires such execution.  */
2540     env->ex_value = insn | ilen;
2541     env->ex_target = addr;
2542 }
2543 
2544 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2545                        uint64_t len)
2546 {
2547     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2548     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2549     const uint64_t r0 = env->regs[0];
2550     const uintptr_t ra = GETPC();
2551     uint8_t dest_key, dest_as, dest_k, dest_a;
2552     uint8_t src_key, src_as, src_k, src_a;
2553     uint64_t val;
2554     int cc = 0;
2555 
2556     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2557                __func__, dest, src, len);
2558 
2559     if (!(env->psw.mask & PSW_MASK_DAT)) {
2560         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2561     }
2562 
2563     /* OAC (operand access control) for the first operand -> dest */
2564     val = (r0 & 0xffff0000ULL) >> 16;
2565     dest_key = (val >> 12) & 0xf;
2566     dest_as = (val >> 6) & 0x3;
2567     dest_k = (val >> 1) & 0x1;
2568     dest_a = val & 0x1;
2569 
2570     /* OAC (operand access control) for the second operand -> src */
2571     val = (r0 & 0x0000ffffULL);
2572     src_key = (val >> 12) & 0xf;
2573     src_as = (val >> 6) & 0x3;
2574     src_k = (val >> 1) & 0x1;
2575     src_a = val & 0x1;
2576 
2577     if (!dest_k) {
2578         dest_key = psw_key;
2579     }
2580     if (!src_k) {
2581         src_key = psw_key;
2582     }
2583     if (!dest_a) {
2584         dest_as = psw_as;
2585     }
2586     if (!src_a) {
2587         src_as = psw_as;
2588     }
2589 
2590     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2591         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2592     }
2593     if (!(env->cregs[0] & CR0_SECONDARY) &&
2594         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2595         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2596     }
2597     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2598         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2599     }
2600 
2601     len = wrap_length32(env, len);
2602     if (len > 4096) {
2603         cc = 3;
2604         len = 4096;
2605     }
2606 
2607     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2608     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2609         (env->psw.mask & PSW_MASK_PSTATE)) {
2610         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2611                       __func__);
2612         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2613     }
2614 
2615     /* FIXME: Access using correct keys and AR-mode */
2616     if (len) {
2617         S390Access srca, desta;
2618 
2619         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2620                        mmu_idx_from_as(src_as), ra);
2621         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2622                        mmu_idx_from_as(dest_as), ra);
2623 
2624         access_memmove(env, &desta, &srca, ra);
2625     }
2626 
2627     return cc;
2628 }
2629 
2630 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2631    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2632    value >= 0 indicates failure, and the CC value to be returned.  */
2633 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2634                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2635                                  uint32_t *ochar, uint32_t *olen);
2636 
2637 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2638    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2639    indicates failure, and the CC value to be returned.  */
2640 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2641                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2642                                  uint32_t *olen);
2643 
2644 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2645                        bool enh_check, uintptr_t ra,
2646                        uint32_t *ochar, uint32_t *olen)
2647 {
2648     uint8_t s0, s1, s2, s3;
2649     uint32_t c, l;
2650 
2651     if (ilen < 1) {
2652         return 0;
2653     }
2654     s0 = cpu_ldub_data_ra(env, addr, ra);
2655     if (s0 <= 0x7f) {
2656         /* one byte character */
2657         l = 1;
2658         c = s0;
2659     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2660         /* invalid character */
2661         return 2;
2662     } else if (s0 <= 0xdf) {
2663         /* two byte character */
2664         l = 2;
2665         if (ilen < 2) {
2666             return 0;
2667         }
2668         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2669         c = s0 & 0x1f;
2670         c = (c << 6) | (s1 & 0x3f);
2671         if (enh_check && (s1 & 0xc0) != 0x80) {
2672             return 2;
2673         }
2674     } else if (s0 <= 0xef) {
2675         /* three byte character */
2676         l = 3;
2677         if (ilen < 3) {
2678             return 0;
2679         }
2680         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2681         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2682         c = s0 & 0x0f;
2683         c = (c << 6) | (s1 & 0x3f);
2684         c = (c << 6) | (s2 & 0x3f);
2685         /* Fold the byte-by-byte range descriptions in the PoO into
2686            tests against the complete value.  It disallows encodings
2687            that could be smaller, and the UTF-16 surrogates.  */
2688         if (enh_check
2689             && ((s1 & 0xc0) != 0x80
2690                 || (s2 & 0xc0) != 0x80
2691                 || c < 0x1000
2692                 || (c >= 0xd800 && c <= 0xdfff))) {
2693             return 2;
2694         }
2695     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2696         /* four byte character */
2697         l = 4;
2698         if (ilen < 4) {
2699             return 0;
2700         }
2701         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2702         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2703         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2704         c = s0 & 0x07;
2705         c = (c << 6) | (s1 & 0x3f);
2706         c = (c << 6) | (s2 & 0x3f);
2707         c = (c << 6) | (s3 & 0x3f);
2708         /* See above.  */
2709         if (enh_check
2710             && ((s1 & 0xc0) != 0x80
2711                 || (s2 & 0xc0) != 0x80
2712                 || (s3 & 0xc0) != 0x80
2713                 || c < 0x010000
2714                 || c > 0x10ffff)) {
2715             return 2;
2716         }
2717     } else {
2718         /* invalid character */
2719         return 2;
2720     }
2721 
2722     *ochar = c;
2723     *olen = l;
2724     return -1;
2725 }
2726 
2727 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2728                         bool enh_check, uintptr_t ra,
2729                         uint32_t *ochar, uint32_t *olen)
2730 {
2731     uint16_t s0, s1;
2732     uint32_t c, l;
2733 
2734     if (ilen < 2) {
2735         return 0;
2736     }
2737     s0 = cpu_lduw_data_ra(env, addr, ra);
2738     if ((s0 & 0xfc00) != 0xd800) {
2739         /* one word character */
2740         l = 2;
2741         c = s0;
2742     } else {
2743         /* two word character */
2744         l = 4;
2745         if (ilen < 4) {
2746             return 0;
2747         }
2748         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2749         c = extract32(s0, 6, 4) + 1;
2750         c = (c << 6) | (s0 & 0x3f);
2751         c = (c << 10) | (s1 & 0x3ff);
2752         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2753             /* invalid surrogate character */
2754             return 2;
2755         }
2756     }
2757 
2758     *ochar = c;
2759     *olen = l;
2760     return -1;
2761 }
2762 
2763 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2764                         bool enh_check, uintptr_t ra,
2765                         uint32_t *ochar, uint32_t *olen)
2766 {
2767     uint32_t c;
2768 
2769     if (ilen < 4) {
2770         return 0;
2771     }
2772     c = cpu_ldl_data_ra(env, addr, ra);
2773     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2774         /* invalid unicode character */
2775         return 2;
2776     }
2777 
2778     *ochar = c;
2779     *olen = 4;
2780     return -1;
2781 }
2782 
2783 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2784                        uintptr_t ra, uint32_t c, uint32_t *olen)
2785 {
2786     uint8_t d[4];
2787     uint32_t l, i;
2788 
2789     if (c <= 0x7f) {
2790         /* one byte character */
2791         l = 1;
2792         d[0] = c;
2793     } else if (c <= 0x7ff) {
2794         /* two byte character */
2795         l = 2;
2796         d[1] = 0x80 | extract32(c, 0, 6);
2797         d[0] = 0xc0 | extract32(c, 6, 5);
2798     } else if (c <= 0xffff) {
2799         /* three byte character */
2800         l = 3;
2801         d[2] = 0x80 | extract32(c, 0, 6);
2802         d[1] = 0x80 | extract32(c, 6, 6);
2803         d[0] = 0xe0 | extract32(c, 12, 4);
2804     } else {
2805         /* four byte character */
2806         l = 4;
2807         d[3] = 0x80 | extract32(c, 0, 6);
2808         d[2] = 0x80 | extract32(c, 6, 6);
2809         d[1] = 0x80 | extract32(c, 12, 6);
2810         d[0] = 0xf0 | extract32(c, 18, 3);
2811     }
2812 
2813     if (ilen < l) {
2814         return 1;
2815     }
2816     for (i = 0; i < l; ++i) {
2817         cpu_stb_data_ra(env, addr + i, d[i], ra);
2818     }
2819 
2820     *olen = l;
2821     return -1;
2822 }
2823 
2824 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2825                         uintptr_t ra, uint32_t c, uint32_t *olen)
2826 {
2827     uint16_t d0, d1;
2828 
2829     if (c <= 0xffff) {
2830         /* one word character */
2831         if (ilen < 2) {
2832             return 1;
2833         }
2834         cpu_stw_data_ra(env, addr, c, ra);
2835         *olen = 2;
2836     } else {
2837         /* two word character */
2838         if (ilen < 4) {
2839             return 1;
2840         }
2841         d1 = 0xdc00 | extract32(c, 0, 10);
2842         d0 = 0xd800 | extract32(c, 10, 6);
2843         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2844         cpu_stw_data_ra(env, addr + 0, d0, ra);
2845         cpu_stw_data_ra(env, addr + 2, d1, ra);
2846         *olen = 4;
2847     }
2848 
2849     return -1;
2850 }
2851 
2852 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2853                         uintptr_t ra, uint32_t c, uint32_t *olen)
2854 {
2855     if (ilen < 4) {
2856         return 1;
2857     }
2858     cpu_stl_data_ra(env, addr, c, ra);
2859     *olen = 4;
2860     return -1;
2861 }
2862 
2863 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2864                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2865                                        decode_unicode_fn decode,
2866                                        encode_unicode_fn encode)
2867 {
2868     uint64_t dst = get_address(env, r1);
2869     uint64_t dlen = get_length(env, r1 + 1);
2870     uint64_t src = get_address(env, r2);
2871     uint64_t slen = get_length(env, r2 + 1);
2872     bool enh_check = m3 & 1;
2873     int cc, i;
2874 
2875     /* Lest we fail to service interrupts in a timely manner, limit the
2876        amount of work we're willing to do.  For now, let's cap at 256.  */
2877     for (i = 0; i < 256; ++i) {
2878         uint32_t c, ilen, olen;
2879 
2880         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2881         if (unlikely(cc >= 0)) {
2882             break;
2883         }
2884         cc = encode(env, dst, dlen, ra, c, &olen);
2885         if (unlikely(cc >= 0)) {
2886             break;
2887         }
2888 
2889         src += ilen;
2890         slen -= ilen;
2891         dst += olen;
2892         dlen -= olen;
2893         cc = 3;
2894     }
2895 
2896     set_address(env, r1, dst);
2897     set_length(env, r1 + 1, dlen);
2898     set_address(env, r2, src);
2899     set_length(env, r2 + 1, slen);
2900 
2901     return cc;
2902 }
2903 
2904 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2905 {
2906     return convert_unicode(env, r1, r2, m3, GETPC(),
2907                            decode_utf8, encode_utf16);
2908 }
2909 
2910 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2911 {
2912     return convert_unicode(env, r1, r2, m3, GETPC(),
2913                            decode_utf8, encode_utf32);
2914 }
2915 
2916 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2917 {
2918     return convert_unicode(env, r1, r2, m3, GETPC(),
2919                            decode_utf16, encode_utf8);
2920 }
2921 
2922 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2923 {
2924     return convert_unicode(env, r1, r2, m3, GETPC(),
2925                            decode_utf16, encode_utf32);
2926 }
2927 
2928 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2929 {
2930     return convert_unicode(env, r1, r2, m3, GETPC(),
2931                            decode_utf32, encode_utf8);
2932 }
2933 
2934 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2935 {
2936     return convert_unicode(env, r1, r2, m3, GETPC(),
2937                            decode_utf32, encode_utf16);
2938 }
2939 
2940 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2941                         uintptr_t ra)
2942 {
2943     /* test the actual access, not just any access to the page due to LAP */
2944     while (len) {
2945         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2946         const uint64_t curlen = MIN(pagelen, len);
2947 
2948         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2949         addr = wrap_address(env, addr + curlen);
2950         len -= curlen;
2951     }
2952 }
2953 
2954 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2955 {
2956     probe_write_access(env, addr, len, GETPC());
2957 }
2958