xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 737308fe)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/page-protection.h"
29 #include "exec/cpu_ldst.h"
30 #include "hw/core/tcg-cpu-ops.h"
31 #include "qemu/int128.h"
32 #include "qemu/atomic128.h"
33 
34 #if !defined(CONFIG_USER_ONLY)
35 #include "hw/s390x/storage-keys.h"
36 #include "hw/boards.h"
37 #endif
38 
39 #ifdef CONFIG_USER_ONLY
40 # define user_or_likely(X)    true
41 #else
42 # define user_or_likely(X)    likely(X)
43 #endif
44 
45 /*****************************************************************************/
46 /* Softmmu support */
47 
48 /* #define DEBUG_HELPER */
49 #ifdef DEBUG_HELPER
50 #define HELPER_LOG(x...) qemu_log(x)
51 #else
52 #define HELPER_LOG(x...)
53 #endif
54 
55 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
56 {
57     uint16_t pkm = env->cregs[3] >> 16;
58 
59     if (env->psw.mask & PSW_MASK_PSTATE) {
60         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
61         return pkm & (0x8000 >> psw_key);
62     }
63     return true;
64 }
65 
66 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
67                                    uint64_t src, uint32_t len)
68 {
69     if (!len || src == dest) {
70         return false;
71     }
72     /* Take care of wrapping at the end of address space. */
73     if (unlikely(wrap_address(env, src + len - 1) < src)) {
74         return dest > src || dest <= wrap_address(env, src + len - 1);
75     }
76     return dest > src && dest <= src + len - 1;
77 }
78 
79 /* Trigger a SPECIFICATION exception if an address or a length is not
80    naturally aligned.  */
81 static inline void check_alignment(CPUS390XState *env, uint64_t v,
82                                    int wordsize, uintptr_t ra)
83 {
84     if (v % wordsize) {
85         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
86     }
87 }
88 
89 /* Load a value from memory according to its size.  */
90 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
91                                            int wordsize, uintptr_t ra)
92 {
93     switch (wordsize) {
94     case 1:
95         return cpu_ldub_data_ra(env, addr, ra);
96     case 2:
97         return cpu_lduw_data_ra(env, addr, ra);
98     default:
99         abort();
100     }
101 }
102 
103 /* Store a to memory according to its size.  */
104 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
105                                       uint64_t value, int wordsize,
106                                       uintptr_t ra)
107 {
108     switch (wordsize) {
109     case 1:
110         cpu_stb_data_ra(env, addr, value, ra);
111         break;
112     case 2:
113         cpu_stw_data_ra(env, addr, value, ra);
114         break;
115     default:
116         abort();
117     }
118 }
119 
120 /* An access covers at most 4096 bytes and therefore at most two pages. */
121 typedef struct S390Access {
122     target_ulong vaddr1;
123     target_ulong vaddr2;
124     void *haddr1;
125     void *haddr2;
126     uint16_t size1;
127     uint16_t size2;
128     /*
129      * If we can't access the host page directly, we'll have to do I/O access
130      * via ld/st helpers. These are internal details, so we store the
131      * mmu idx to do the access here instead of passing it around in the
132      * helpers.
133      */
134     int mmu_idx;
135 } S390Access;
136 
137 /*
138  * With nonfault=1, return the PGM_ exception that would have been injected
139  * into the guest; return 0 if no exception was detected.
140  *
141  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
142  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
143  */
144 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
145                                     int size, MMUAccessType access_type,
146                                     int mmu_idx, bool nonfault,
147                                     void **phost, uintptr_t ra)
148 {
149     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
150                                    nonfault, phost, ra);
151 
152     if (unlikely(flags & TLB_INVALID_MASK)) {
153 #ifdef CONFIG_USER_ONLY
154         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
155         env->__excp_addr = addr & TARGET_PAGE_MASK;
156         return (page_get_flags(addr) & PAGE_VALID
157                 ? PGM_PROTECTION : PGM_ADDRESSING);
158 #else
159         return env->tlb_fill_exc;
160 #endif
161     }
162 
163 #ifndef CONFIG_USER_ONLY
164     if (unlikely(flags & TLB_WATCHPOINT)) {
165         /* S390 does not presently use transaction attributes. */
166         cpu_check_watchpoint(env_cpu(env), addr, size,
167                              MEMTXATTRS_UNSPECIFIED,
168                              (access_type == MMU_DATA_STORE
169                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
170     }
171 #endif
172 
173     return 0;
174 }
175 
176 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
177                              bool nonfault, vaddr vaddr1, int size,
178                              MMUAccessType access_type,
179                              int mmu_idx, uintptr_t ra)
180 {
181     int size1, size2, exc;
182 
183     assert(size > 0 && size <= 4096);
184 
185     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
186     size2 = size - size1;
187 
188     memset(access, 0, sizeof(*access));
189     access->vaddr1 = vaddr1;
190     access->size1 = size1;
191     access->size2 = size2;
192     access->mmu_idx = mmu_idx;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &access->haddr1, ra);
196     if (unlikely(exc)) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
202 
203         access->vaddr2 = vaddr2;
204         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
205                                 nonfault, &access->haddr2, ra);
206         if (unlikely(exc)) {
207             return exc;
208         }
209     }
210     return 0;
211 }
212 
213 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
214                                   vaddr vaddr, int size,
215                                   MMUAccessType access_type, int mmu_idx,
216                                   uintptr_t ra)
217 {
218     int exc = access_prepare_nf(ret, env, false, vaddr, size,
219                                 access_type, mmu_idx, ra);
220     assert(!exc);
221 }
222 
223 /* Helper to handle memset on a single page. */
224 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
225                              uint8_t byte, uint16_t size, int mmu_idx,
226                              uintptr_t ra)
227 {
228 #ifdef CONFIG_USER_ONLY
229     memset(haddr, byte, size);
230 #else
231     if (likely(haddr)) {
232         memset(haddr, byte, size);
233     } else {
234         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235         for (int i = 0; i < size; i++) {
236             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
237         }
238     }
239 #endif
240 }
241 
242 static void access_memset(CPUS390XState *env, S390Access *desta,
243                           uint8_t byte, uintptr_t ra)
244 {
245 
246     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
247                      desta->mmu_idx, ra);
248     if (likely(!desta->size2)) {
249         return;
250     }
251     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
252                      desta->mmu_idx, ra);
253 }
254 
255 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
256                                int offset, uintptr_t ra)
257 {
258     target_ulong vaddr = access->vaddr1;
259     void *haddr = access->haddr1;
260 
261     if (unlikely(offset >= access->size1)) {
262         offset -= access->size1;
263         vaddr = access->vaddr2;
264         haddr = access->haddr2;
265     }
266 
267     if (user_or_likely(haddr)) {
268         return ldub_p(haddr + offset);
269     } else {
270         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
271         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
272     }
273 }
274 
275 static void access_set_byte(CPUS390XState *env, S390Access *access,
276                             int offset, uint8_t byte, uintptr_t ra)
277 {
278     target_ulong vaddr = access->vaddr1;
279     void *haddr = access->haddr1;
280 
281     if (unlikely(offset >= access->size1)) {
282         offset -= access->size1;
283         vaddr = access->vaddr2;
284         haddr = access->haddr2;
285     }
286 
287     if (user_or_likely(haddr)) {
288         stb_p(haddr + offset, byte);
289     } else {
290         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
291         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
292     }
293 }
294 
295 /*
296  * Move data with the same semantics as memmove() in case ranges don't overlap
297  * or src > dest. Undefined behavior on destructive overlaps.
298  */
299 static void access_memmove(CPUS390XState *env, S390Access *desta,
300                            S390Access *srca, uintptr_t ra)
301 {
302     int len = desta->size1 + desta->size2;
303     int diff;
304 
305     assert(len == srca->size1 + srca->size2);
306 
307     /* Fallback to slow access in case we don't have access to all host pages */
308     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
309                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
310         int i;
311 
312         for (i = 0; i < len; i++) {
313             uint8_t byte = access_get_byte(env, srca, i, ra);
314 
315             access_set_byte(env, desta, i, byte, ra);
316         }
317         return;
318     }
319 
320     diff = desta->size1 - srca->size1;
321     if (likely(diff == 0)) {
322         memmove(desta->haddr1, srca->haddr1, srca->size1);
323         if (unlikely(srca->size2)) {
324             memmove(desta->haddr2, srca->haddr2, srca->size2);
325         }
326     } else if (diff > 0) {
327         memmove(desta->haddr1, srca->haddr1, srca->size1);
328         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
329         if (likely(desta->size2)) {
330             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
331         }
332     } else {
333         diff = -diff;
334         memmove(desta->haddr1, srca->haddr1, desta->size1);
335         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
336         if (likely(srca->size2)) {
337             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
338         }
339     }
340 }
341 
342 static int mmu_idx_from_as(uint8_t as)
343 {
344     switch (as) {
345     case AS_PRIMARY:
346         return MMU_PRIMARY_IDX;
347     case AS_SECONDARY:
348         return MMU_SECONDARY_IDX;
349     case AS_HOME:
350         return MMU_HOME_IDX;
351     default:
352         /* FIXME AS_ACCREG */
353         g_assert_not_reached();
354     }
355 }
356 
357 /* and on array */
358 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
359                              uint64_t src, uintptr_t ra)
360 {
361     const int mmu_idx = s390x_env_mmu_index(env, false);
362     S390Access srca1, srca2, desta;
363     uint32_t i;
364     uint8_t c = 0;
365 
366     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
367                __func__, l, dest, src);
368 
369     /* NC always processes one more byte than specified - maximum is 256 */
370     l++;
371 
372     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
373     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
374     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
375     for (i = 0; i < l; i++) {
376         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
377                           access_get_byte(env, &srca2, i, ra);
378 
379         c |= x;
380         access_set_byte(env, &desta, i, x, ra);
381     }
382     return c != 0;
383 }
384 
385 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
386                     uint64_t src)
387 {
388     return do_helper_nc(env, l, dest, src, GETPC());
389 }
390 
391 /* xor on array */
392 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
393                              uint64_t src, uintptr_t ra)
394 {
395     const int mmu_idx = s390x_env_mmu_index(env, false);
396     S390Access srca1, srca2, desta;
397     uint32_t i;
398     uint8_t c = 0;
399 
400     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
401                __func__, l, dest, src);
402 
403     /* XC always processes one more byte than specified - maximum is 256 */
404     l++;
405 
406     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
407     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
408     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
409 
410     /* xor with itself is the same as memset(0) */
411     if (src == dest) {
412         access_memset(env, &desta, 0, ra);
413         return 0;
414     }
415 
416     for (i = 0; i < l; i++) {
417         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
418                           access_get_byte(env, &srca2, i, ra);
419 
420         c |= x;
421         access_set_byte(env, &desta, i, x, ra);
422     }
423     return c != 0;
424 }
425 
426 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
427                     uint64_t src)
428 {
429     return do_helper_xc(env, l, dest, src, GETPC());
430 }
431 
432 /* or on array */
433 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
434                              uint64_t src, uintptr_t ra)
435 {
436     const int mmu_idx = s390x_env_mmu_index(env, false);
437     S390Access srca1, srca2, desta;
438     uint32_t i;
439     uint8_t c = 0;
440 
441     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
442                __func__, l, dest, src);
443 
444     /* OC always processes one more byte than specified - maximum is 256 */
445     l++;
446 
447     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
448     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
449     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
450     for (i = 0; i < l; i++) {
451         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
452                           access_get_byte(env, &srca2, i, ra);
453 
454         c |= x;
455         access_set_byte(env, &desta, i, x, ra);
456     }
457     return c != 0;
458 }
459 
460 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
461                     uint64_t src)
462 {
463     return do_helper_oc(env, l, dest, src, GETPC());
464 }
465 
466 /* memmove */
467 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
468                               uint64_t src, uintptr_t ra)
469 {
470     const int mmu_idx = s390x_env_mmu_index(env, false);
471     S390Access srca, desta;
472     uint32_t i;
473 
474     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
475                __func__, l, dest, src);
476 
477     /* MVC always copies one more byte than specified - maximum is 256 */
478     l++;
479 
480     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
481     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
482 
483     /*
484      * "When the operands overlap, the result is obtained as if the operands
485      * were processed one byte at a time". Only non-destructive overlaps
486      * behave like memmove().
487      */
488     if (dest == src + 1) {
489         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
490     } else if (!is_destructive_overlap(env, dest, src, l)) {
491         access_memmove(env, &desta, &srca, ra);
492     } else {
493         for (i = 0; i < l; i++) {
494             uint8_t byte = access_get_byte(env, &srca, i, ra);
495 
496             access_set_byte(env, &desta, i, byte, ra);
497         }
498     }
499 
500     return env->cc_op;
501 }
502 
503 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
504 {
505     do_helper_mvc(env, l, dest, src, GETPC());
506 }
507 
508 /* move right to left */
509 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
510 {
511     const int mmu_idx = s390x_env_mmu_index(env, false);
512     const uint64_t ra = GETPC();
513     S390Access srca, desta;
514     int32_t i;
515 
516     /* MVCRL always copies one more byte than specified - maximum is 256 */
517     l &= 0xff;
518     l++;
519 
520     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
521     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
522 
523     for (i = l - 1; i >= 0; i--) {
524         uint8_t byte = access_get_byte(env, &srca, i, ra);
525         access_set_byte(env, &desta, i, byte, ra);
526     }
527 }
528 
529 /* move inverse  */
530 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
531 {
532     const int mmu_idx = s390x_env_mmu_index(env, false);
533     S390Access srca, desta;
534     uintptr_t ra = GETPC();
535     int i;
536 
537     /* MVCIN always copies one more byte than specified - maximum is 256 */
538     l++;
539 
540     src = wrap_address(env, src - l + 1);
541     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
542     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
543     for (i = 0; i < l; i++) {
544         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
545 
546         access_set_byte(env, &desta, i, x, ra);
547     }
548 }
549 
550 /* move numerics  */
551 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
552 {
553     const int mmu_idx = s390x_env_mmu_index(env, false);
554     S390Access srca1, srca2, desta;
555     uintptr_t ra = GETPC();
556     int i;
557 
558     /* MVN always copies one more byte than specified - maximum is 256 */
559     l++;
560 
561     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
562     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
563     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
564     for (i = 0; i < l; i++) {
565         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
566                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
567 
568         access_set_byte(env, &desta, i, x, ra);
569     }
570 }
571 
572 /* move with offset  */
573 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
574 {
575     const int mmu_idx = s390x_env_mmu_index(env, false);
576     /* MVO always processes one more byte than specified - maximum is 16 */
577     const int len_dest = (l >> 4) + 1;
578     const int len_src = (l & 0xf) + 1;
579     uintptr_t ra = GETPC();
580     uint8_t byte_dest, byte_src;
581     S390Access srca, desta;
582     int i, j;
583 
584     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
585     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
586 
587     /* Handle rightmost byte */
588     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
589     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
590     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
591     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
592 
593     /* Process remaining bytes from right to left */
594     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
595         byte_dest = byte_src >> 4;
596         if (j >= 0) {
597             byte_src = access_get_byte(env, &srca, j, ra);
598         } else {
599             byte_src = 0;
600         }
601         byte_dest |= byte_src << 4;
602         access_set_byte(env, &desta, i, byte_dest, ra);
603     }
604 }
605 
606 /* move zones  */
607 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
608 {
609     const int mmu_idx = s390x_env_mmu_index(env, false);
610     S390Access srca1, srca2, desta;
611     uintptr_t ra = GETPC();
612     int i;
613 
614     /* MVZ always copies one more byte than specified - maximum is 256 */
615     l++;
616 
617     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
618     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
619     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
620     for (i = 0; i < l; i++) {
621         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
622                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
623 
624         access_set_byte(env, &desta, i, x, ra);
625     }
626 }
627 
628 /* compare unsigned byte arrays */
629 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
630                               uint64_t s2, uintptr_t ra)
631 {
632     uint32_t i;
633     uint32_t cc = 0;
634 
635     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
636                __func__, l, s1, s2);
637 
638     for (i = 0; i <= l; i++) {
639         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
640         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
641         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
642         if (x < y) {
643             cc = 1;
644             break;
645         } else if (x > y) {
646             cc = 2;
647             break;
648         }
649     }
650 
651     HELPER_LOG("\n");
652     return cc;
653 }
654 
655 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
656 {
657     return do_helper_clc(env, l, s1, s2, GETPC());
658 }
659 
660 /* compare logical under mask */
661 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
662                      uint64_t addr)
663 {
664     uintptr_t ra = GETPC();
665     uint32_t cc = 0;
666 
667     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
668                mask, addr);
669 
670     if (!mask) {
671         /* Recognize access exceptions for the first byte */
672         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
673     }
674 
675     while (mask) {
676         if (mask & 8) {
677             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
678             uint8_t r = extract32(r1, 24, 8);
679             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
680                        addr);
681             if (r < d) {
682                 cc = 1;
683                 break;
684             } else if (r > d) {
685                 cc = 2;
686                 break;
687             }
688             addr++;
689         }
690         mask = (mask << 1) & 0xf;
691         r1 <<= 8;
692     }
693 
694     HELPER_LOG("\n");
695     return cc;
696 }
697 
698 static inline uint64_t get_address(CPUS390XState *env, int reg)
699 {
700     return wrap_address(env, env->regs[reg]);
701 }
702 
703 /*
704  * Store the address to the given register, zeroing out unused leftmost
705  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
706  */
707 static inline void set_address_zero(CPUS390XState *env, int reg,
708                                     uint64_t address)
709 {
710     if (env->psw.mask & PSW_MASK_64) {
711         env->regs[reg] = address;
712     } else {
713         if (!(env->psw.mask & PSW_MASK_32)) {
714             address &= 0x00ffffff;
715         } else {
716             address &= 0x7fffffff;
717         }
718         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
719     }
720 }
721 
722 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
723 {
724     if (env->psw.mask & PSW_MASK_64) {
725         /* 64-Bit mode */
726         env->regs[reg] = address;
727     } else {
728         if (!(env->psw.mask & PSW_MASK_32)) {
729             /* 24-Bit mode. According to the PoO it is implementation
730             dependent if bits 32-39 remain unchanged or are set to
731             zeros.  Choose the former so that the function can also be
732             used for TRT.  */
733             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
734         } else {
735             /* 31-Bit mode. According to the PoO it is implementation
736             dependent if bit 32 remains unchanged or is set to zero.
737             Choose the latter so that the function can also be used for
738             TRT.  */
739             address &= 0x7fffffff;
740             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
741         }
742     }
743 }
744 
745 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
746 {
747     if (!(env->psw.mask & PSW_MASK_64)) {
748         return (uint32_t)length;
749     }
750     return length;
751 }
752 
753 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
754 {
755     if (!(env->psw.mask & PSW_MASK_64)) {
756         /* 24-Bit and 31-Bit mode */
757         length &= 0x7fffffff;
758     }
759     return length;
760 }
761 
762 static inline uint64_t get_length(CPUS390XState *env, int reg)
763 {
764     return wrap_length31(env, env->regs[reg]);
765 }
766 
767 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
768 {
769     if (env->psw.mask & PSW_MASK_64) {
770         /* 64-Bit mode */
771         env->regs[reg] = length;
772     } else {
773         /* 24-Bit and 31-Bit mode */
774         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
775     }
776 }
777 
778 /* search string (c is byte to search, r2 is string, r1 end of string) */
779 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
780 {
781     uintptr_t ra = GETPC();
782     uint64_t end, str;
783     uint32_t len;
784     uint8_t v, c = env->regs[0];
785 
786     /* Bits 32-55 must contain all 0.  */
787     if (env->regs[0] & 0xffffff00u) {
788         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
789     }
790 
791     str = get_address(env, r2);
792     end = get_address(env, r1);
793 
794     /* Lest we fail to service interrupts in a timely manner, limit the
795        amount of work we're willing to do.  For now, let's cap at 8k.  */
796     for (len = 0; len < 0x2000; ++len) {
797         if (str + len == end) {
798             /* Character not found.  R1 & R2 are unmodified.  */
799             env->cc_op = 2;
800             return;
801         }
802         v = cpu_ldub_data_ra(env, str + len, ra);
803         if (v == c) {
804             /* Character found.  Set R1 to the location; R2 is unmodified.  */
805             env->cc_op = 1;
806             set_address(env, r1, str + len);
807             return;
808         }
809     }
810 
811     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
812     env->cc_op = 3;
813     set_address(env, r2, str + len);
814 }
815 
816 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
817 {
818     uintptr_t ra = GETPC();
819     uint32_t len;
820     uint16_t v, c = env->regs[0];
821     uint64_t end, str, adj_end;
822 
823     /* Bits 32-47 of R0 must be zero.  */
824     if (env->regs[0] & 0xffff0000u) {
825         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
826     }
827 
828     str = get_address(env, r2);
829     end = get_address(env, r1);
830 
831     /* If the LSB of the two addresses differ, use one extra byte.  */
832     adj_end = end + ((str ^ end) & 1);
833 
834     /* Lest we fail to service interrupts in a timely manner, limit the
835        amount of work we're willing to do.  For now, let's cap at 8k.  */
836     for (len = 0; len < 0x2000; len += 2) {
837         if (str + len == adj_end) {
838             /* End of input found.  */
839             env->cc_op = 2;
840             return;
841         }
842         v = cpu_lduw_data_ra(env, str + len, ra);
843         if (v == c) {
844             /* Character found.  Set R1 to the location; R2 is unmodified.  */
845             env->cc_op = 1;
846             set_address(env, r1, str + len);
847             return;
848         }
849     }
850 
851     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
852     env->cc_op = 3;
853     set_address(env, r2, str + len);
854 }
855 
856 /* unsigned string compare (c is string terminator) */
857 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
858 {
859     uintptr_t ra = GETPC();
860     uint32_t len;
861 
862     c = c & 0xff;
863     s1 = wrap_address(env, s1);
864     s2 = wrap_address(env, s2);
865 
866     /* Lest we fail to service interrupts in a timely manner, limit the
867        amount of work we're willing to do.  For now, let's cap at 8k.  */
868     for (len = 0; len < 0x2000; ++len) {
869         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
870         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
871         if (v1 == v2) {
872             if (v1 == c) {
873                 /* Equal.  CC=0, and don't advance the registers.  */
874                 env->cc_op = 0;
875                 return int128_make128(s2, s1);
876             }
877         } else {
878             /* Unequal.  CC={1,2}, and advance the registers.  Note that
879                the terminator need not be zero, but the string that contains
880                the terminator is by definition "low".  */
881             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
882             return int128_make128(s2 + len, s1 + len);
883         }
884     }
885 
886     /* CPU-determined bytes equal; advance the registers.  */
887     env->cc_op = 3;
888     return int128_make128(s2 + len, s1 + len);
889 }
890 
891 /* move page */
892 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
893 {
894     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
895     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
896     const int mmu_idx = s390x_env_mmu_index(env, false);
897     const bool f = extract64(r0, 11, 1);
898     const bool s = extract64(r0, 10, 1);
899     const bool cco = extract64(r0, 8, 1);
900     uintptr_t ra = GETPC();
901     S390Access srca, desta;
902     int exc;
903 
904     if ((f && s) || extract64(r0, 12, 4)) {
905         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
906     }
907 
908     /*
909      * We always manually handle exceptions such that we can properly store
910      * r1/r2 to the lowcore on page-translation exceptions.
911      *
912      * TODO: Access key handling
913      */
914     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
915                             MMU_DATA_LOAD, mmu_idx, ra);
916     if (exc) {
917         if (cco) {
918             return 2;
919         }
920         goto inject_exc;
921     }
922     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
923                             MMU_DATA_STORE, mmu_idx, ra);
924     if (exc) {
925         if (cco && exc != PGM_PROTECTION) {
926             return 1;
927         }
928         goto inject_exc;
929     }
930     access_memmove(env, &desta, &srca, ra);
931     return 0; /* data moved */
932 inject_exc:
933 #if !defined(CONFIG_USER_ONLY)
934     if (exc != PGM_ADDRESSING) {
935         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
936                  env->tlb_fill_tec);
937     }
938     if (exc == PGM_PAGE_TRANS) {
939         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
940                  r1 << 4 | r2);
941     }
942 #endif
943     tcg_s390_program_interrupt(env, exc, ra);
944 }
945 
946 /* string copy */
947 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
948 {
949     const int mmu_idx = s390x_env_mmu_index(env, false);
950     const uint64_t d = get_address(env, r1);
951     const uint64_t s = get_address(env, r2);
952     const uint8_t c = env->regs[0];
953     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
954     S390Access srca, desta;
955     uintptr_t ra = GETPC();
956     int i;
957 
958     if (env->regs[0] & 0xffffff00ull) {
959         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
960     }
961 
962     /*
963      * Our access should not exceed single pages, as we must not report access
964      * exceptions exceeding the actually copied range (which we don't know at
965      * this point). We might over-indicate watchpoints within the pages
966      * (if we ever care, we have to limit processing to a single byte).
967      */
968     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
969     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
970     for (i = 0; i < len; i++) {
971         const uint8_t v = access_get_byte(env, &srca, i, ra);
972 
973         access_set_byte(env, &desta, i, v, ra);
974         if (v == c) {
975             set_address_zero(env, r1, d + i);
976             return 1;
977         }
978     }
979     set_address_zero(env, r1, d + len);
980     set_address_zero(env, r2, s + len);
981     return 3;
982 }
983 
984 /* load access registers r1 to r3 from memory at a2 */
985 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
986 {
987     uintptr_t ra = GETPC();
988     int i;
989 
990     if (a2 & 0x3) {
991         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
992     }
993 
994     for (i = r1;; i = (i + 1) % 16) {
995         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
996         a2 += 4;
997 
998         if (i == r3) {
999             break;
1000         }
1001     }
1002 }
1003 
1004 /* store access registers r1 to r3 in memory at a2 */
1005 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1006 {
1007     uintptr_t ra = GETPC();
1008     int i;
1009 
1010     if (a2 & 0x3) {
1011         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1012     }
1013 
1014     for (i = r1;; i = (i + 1) % 16) {
1015         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1016         a2 += 4;
1017 
1018         if (i == r3) {
1019             break;
1020         }
1021     }
1022 }
1023 
1024 /* move long helper */
1025 static inline uint32_t do_mvcl(CPUS390XState *env,
1026                                uint64_t *dest, uint64_t *destlen,
1027                                uint64_t *src, uint64_t *srclen,
1028                                uint16_t pad, int wordsize, uintptr_t ra)
1029 {
1030     const int mmu_idx = s390x_env_mmu_index(env, false);
1031     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1032     S390Access srca, desta;
1033     int i, cc;
1034 
1035     if (*destlen == *srclen) {
1036         cc = 0;
1037     } else if (*destlen < *srclen) {
1038         cc = 1;
1039     } else {
1040         cc = 2;
1041     }
1042 
1043     if (!*destlen) {
1044         return cc;
1045     }
1046 
1047     /*
1048      * Only perform one type of type of operation (move/pad) at a time.
1049      * Stay within single pages.
1050      */
1051     if (*srclen) {
1052         /* Copy the src array */
1053         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1054         *destlen -= len;
1055         *srclen -= len;
1056         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1057         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1058         access_memmove(env, &desta, &srca, ra);
1059         *src = wrap_address(env, *src + len);
1060         *dest = wrap_address(env, *dest + len);
1061     } else if (wordsize == 1) {
1062         /* Pad the remaining area */
1063         *destlen -= len;
1064         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1065         access_memset(env, &desta, pad, ra);
1066         *dest = wrap_address(env, *dest + len);
1067     } else {
1068         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1069 
1070         /* The remaining length selects the padding byte. */
1071         for (i = 0; i < len; (*destlen)--, i++) {
1072             if (*destlen & 1) {
1073                 access_set_byte(env, &desta, i, pad, ra);
1074             } else {
1075                 access_set_byte(env, &desta, i, pad >> 8, ra);
1076             }
1077         }
1078         *dest = wrap_address(env, *dest + len);
1079     }
1080 
1081     return *destlen ? 3 : cc;
1082 }
1083 
1084 /* move long */
1085 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1086 {
1087     const int mmu_idx = s390x_env_mmu_index(env, false);
1088     uintptr_t ra = GETPC();
1089     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1090     uint64_t dest = get_address(env, r1);
1091     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1092     uint64_t src = get_address(env, r2);
1093     uint8_t pad = env->regs[r2 + 1] >> 24;
1094     CPUState *cs = env_cpu(env);
1095     S390Access srca, desta;
1096     uint32_t cc, cur_len;
1097 
1098     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1099         cc = 3;
1100     } else if (srclen == destlen) {
1101         cc = 0;
1102     } else if (destlen < srclen) {
1103         cc = 1;
1104     } else {
1105         cc = 2;
1106     }
1107 
1108     /* We might have to zero-out some bits even if there was no action. */
1109     if (unlikely(!destlen || cc == 3)) {
1110         set_address_zero(env, r2, src);
1111         set_address_zero(env, r1, dest);
1112         return cc;
1113     } else if (!srclen) {
1114         set_address_zero(env, r2, src);
1115     }
1116 
1117     /*
1118      * Only perform one type of type of operation (move/pad) in one step.
1119      * Stay within single pages.
1120      */
1121     while (destlen) {
1122         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1123         if (!srclen) {
1124             access_prepare(&desta, env, dest, cur_len,
1125                            MMU_DATA_STORE, mmu_idx, ra);
1126             access_memset(env, &desta, pad, ra);
1127         } else {
1128             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1129 
1130             access_prepare(&srca, env, src, cur_len,
1131                            MMU_DATA_LOAD, mmu_idx, ra);
1132             access_prepare(&desta, env, dest, cur_len,
1133                            MMU_DATA_STORE, mmu_idx, ra);
1134             access_memmove(env, &desta, &srca, ra);
1135             src = wrap_address(env, src + cur_len);
1136             srclen -= cur_len;
1137             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1138             set_address_zero(env, r2, src);
1139         }
1140         dest = wrap_address(env, dest + cur_len);
1141         destlen -= cur_len;
1142         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1143         set_address_zero(env, r1, dest);
1144 
1145         /*
1146          * MVCL is interruptible. Return to the main loop if requested after
1147          * writing back all state to registers. If no interrupt will get
1148          * injected, we'll end up back in this handler and continue processing
1149          * the remaining parts.
1150          */
1151         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1152             cpu_loop_exit_restore(cs, ra);
1153         }
1154     }
1155     return cc;
1156 }
1157 
1158 /* move long extended */
1159 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1160                        uint32_t r3)
1161 {
1162     uintptr_t ra = GETPC();
1163     uint64_t destlen = get_length(env, r1 + 1);
1164     uint64_t dest = get_address(env, r1);
1165     uint64_t srclen = get_length(env, r3 + 1);
1166     uint64_t src = get_address(env, r3);
1167     uint8_t pad = a2;
1168     uint32_t cc;
1169 
1170     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1171 
1172     set_length(env, r1 + 1, destlen);
1173     set_length(env, r3 + 1, srclen);
1174     set_address(env, r1, dest);
1175     set_address(env, r3, src);
1176 
1177     return cc;
1178 }
1179 
1180 /* move long unicode */
1181 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1182                        uint32_t r3)
1183 {
1184     uintptr_t ra = GETPC();
1185     uint64_t destlen = get_length(env, r1 + 1);
1186     uint64_t dest = get_address(env, r1);
1187     uint64_t srclen = get_length(env, r3 + 1);
1188     uint64_t src = get_address(env, r3);
1189     uint16_t pad = a2;
1190     uint32_t cc;
1191 
1192     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1193 
1194     set_length(env, r1 + 1, destlen);
1195     set_length(env, r3 + 1, srclen);
1196     set_address(env, r1, dest);
1197     set_address(env, r3, src);
1198 
1199     return cc;
1200 }
1201 
1202 /* compare logical long helper */
1203 static inline uint32_t do_clcl(CPUS390XState *env,
1204                                uint64_t *src1, uint64_t *src1len,
1205                                uint64_t *src3, uint64_t *src3len,
1206                                uint16_t pad, uint64_t limit,
1207                                int wordsize, uintptr_t ra)
1208 {
1209     uint64_t len = MAX(*src1len, *src3len);
1210     uint32_t cc = 0;
1211 
1212     check_alignment(env, *src1len | *src3len, wordsize, ra);
1213 
1214     if (!len) {
1215         return cc;
1216     }
1217 
1218     /* Lest we fail to service interrupts in a timely manner, limit the
1219        amount of work we're willing to do.  */
1220     if (len > limit) {
1221         len = limit;
1222         cc = 3;
1223     }
1224 
1225     for (; len; len -= wordsize) {
1226         uint16_t v1 = pad;
1227         uint16_t v3 = pad;
1228 
1229         if (*src1len) {
1230             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1231         }
1232         if (*src3len) {
1233             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1234         }
1235 
1236         if (v1 != v3) {
1237             cc = (v1 < v3) ? 1 : 2;
1238             break;
1239         }
1240 
1241         if (*src1len) {
1242             *src1 += wordsize;
1243             *src1len -= wordsize;
1244         }
1245         if (*src3len) {
1246             *src3 += wordsize;
1247             *src3len -= wordsize;
1248         }
1249     }
1250 
1251     return cc;
1252 }
1253 
1254 
1255 /* compare logical long */
1256 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1257 {
1258     uintptr_t ra = GETPC();
1259     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1260     uint64_t src1 = get_address(env, r1);
1261     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1262     uint64_t src3 = get_address(env, r2);
1263     uint8_t pad = env->regs[r2 + 1] >> 24;
1264     uint32_t cc;
1265 
1266     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1267 
1268     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1269     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1270     set_address(env, r1, src1);
1271     set_address(env, r2, src3);
1272 
1273     return cc;
1274 }
1275 
1276 /* compare logical long extended memcompare insn with padding */
1277 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1278                        uint32_t r3)
1279 {
1280     uintptr_t ra = GETPC();
1281     uint64_t src1len = get_length(env, r1 + 1);
1282     uint64_t src1 = get_address(env, r1);
1283     uint64_t src3len = get_length(env, r3 + 1);
1284     uint64_t src3 = get_address(env, r3);
1285     uint8_t pad = a2;
1286     uint32_t cc;
1287 
1288     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1289 
1290     set_length(env, r1 + 1, src1len);
1291     set_length(env, r3 + 1, src3len);
1292     set_address(env, r1, src1);
1293     set_address(env, r3, src3);
1294 
1295     return cc;
1296 }
1297 
1298 /* compare logical long unicode memcompare insn with padding */
1299 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1300                        uint32_t r3)
1301 {
1302     uintptr_t ra = GETPC();
1303     uint64_t src1len = get_length(env, r1 + 1);
1304     uint64_t src1 = get_address(env, r1);
1305     uint64_t src3len = get_length(env, r3 + 1);
1306     uint64_t src3 = get_address(env, r3);
1307     uint16_t pad = a2;
1308     uint32_t cc = 0;
1309 
1310     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1311 
1312     set_length(env, r1 + 1, src1len);
1313     set_length(env, r3 + 1, src3len);
1314     set_address(env, r1, src1);
1315     set_address(env, r3, src3);
1316 
1317     return cc;
1318 }
1319 
1320 /* checksum */
1321 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1322                     uint64_t src, uint64_t src_len)
1323 {
1324     uintptr_t ra = GETPC();
1325     uint64_t max_len, len;
1326     uint64_t cksm = (uint32_t)r1;
1327 
1328     /* Lest we fail to service interrupts in a timely manner, limit the
1329        amount of work we're willing to do.  For now, let's cap at 8k.  */
1330     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1331 
1332     /* Process full words as available.  */
1333     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1334         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1335     }
1336 
1337     switch (max_len - len) {
1338     case 1:
1339         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1340         len += 1;
1341         break;
1342     case 2:
1343         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1344         len += 2;
1345         break;
1346     case 3:
1347         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1348         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1349         len += 3;
1350         break;
1351     }
1352 
1353     /* Fold the carry from the checksum.  Note that we can see carry-out
1354        during folding more than once (but probably not more than twice).  */
1355     while (cksm > 0xffffffffull) {
1356         cksm = (uint32_t)cksm + (cksm >> 32);
1357     }
1358 
1359     /* Indicate whether or not we've processed everything.  */
1360     env->cc_op = (len == src_len ? 0 : 3);
1361 
1362     /* Return both cksm and processed length.  */
1363     return int128_make128(cksm, len);
1364 }
1365 
1366 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1367 {
1368     uintptr_t ra = GETPC();
1369     int len_dest = len >> 4;
1370     int len_src = len & 0xf;
1371     uint8_t b;
1372 
1373     dest += len_dest;
1374     src += len_src;
1375 
1376     /* last byte is special, it only flips the nibbles */
1377     b = cpu_ldub_data_ra(env, src, ra);
1378     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1379     src--;
1380     len_src--;
1381 
1382     /* now pack every value */
1383     while (len_dest > 0) {
1384         b = 0;
1385 
1386         if (len_src >= 0) {
1387             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1388             src--;
1389             len_src--;
1390         }
1391         if (len_src >= 0) {
1392             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1393             src--;
1394             len_src--;
1395         }
1396 
1397         len_dest--;
1398         dest--;
1399         cpu_stb_data_ra(env, dest, b, ra);
1400     }
1401 }
1402 
1403 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1404                            uint32_t srclen, int ssize, uintptr_t ra)
1405 {
1406     int i;
1407     /* The destination operand is always 16 bytes long.  */
1408     const int destlen = 16;
1409 
1410     /* The operands are processed from right to left.  */
1411     src += srclen - 1;
1412     dest += destlen - 1;
1413 
1414     for (i = 0; i < destlen; i++) {
1415         uint8_t b = 0;
1416 
1417         /* Start with a positive sign */
1418         if (i == 0) {
1419             b = 0xc;
1420         } else if (srclen > ssize) {
1421             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1422             src -= ssize;
1423             srclen -= ssize;
1424         }
1425 
1426         if (srclen > ssize) {
1427             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1428             src -= ssize;
1429             srclen -= ssize;
1430         }
1431 
1432         cpu_stb_data_ra(env, dest, b, ra);
1433         dest--;
1434     }
1435 }
1436 
1437 
1438 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1439                  uint32_t srclen)
1440 {
1441     do_pkau(env, dest, src, srclen, 1, GETPC());
1442 }
1443 
1444 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1445                  uint32_t srclen)
1446 {
1447     do_pkau(env, dest, src, srclen, 2, GETPC());
1448 }
1449 
1450 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1451                   uint64_t src)
1452 {
1453     uintptr_t ra = GETPC();
1454     int len_dest = len >> 4;
1455     int len_src = len & 0xf;
1456     uint8_t b;
1457     int second_nibble = 0;
1458 
1459     dest += len_dest;
1460     src += len_src;
1461 
1462     /* last byte is special, it only flips the nibbles */
1463     b = cpu_ldub_data_ra(env, src, ra);
1464     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1465     src--;
1466     len_src--;
1467 
1468     /* now pad every nibble with 0xf0 */
1469 
1470     while (len_dest > 0) {
1471         uint8_t cur_byte = 0;
1472 
1473         if (len_src > 0) {
1474             cur_byte = cpu_ldub_data_ra(env, src, ra);
1475         }
1476 
1477         len_dest--;
1478         dest--;
1479 
1480         /* only advance one nibble at a time */
1481         if (second_nibble) {
1482             cur_byte >>= 4;
1483             len_src--;
1484             src--;
1485         }
1486         second_nibble = !second_nibble;
1487 
1488         /* digit */
1489         cur_byte = (cur_byte & 0xf);
1490         /* zone bits */
1491         cur_byte |= 0xf0;
1492 
1493         cpu_stb_data_ra(env, dest, cur_byte, ra);
1494     }
1495 }
1496 
1497 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1498                                  uint32_t destlen, int dsize, uint64_t src,
1499                                  uintptr_t ra)
1500 {
1501     int i;
1502     uint32_t cc;
1503     uint8_t b;
1504     /* The source operand is always 16 bytes long.  */
1505     const int srclen = 16;
1506 
1507     /* The operands are processed from right to left.  */
1508     src += srclen - 1;
1509     dest += destlen - dsize;
1510 
1511     /* Check for the sign.  */
1512     b = cpu_ldub_data_ra(env, src, ra);
1513     src--;
1514     switch (b & 0xf) {
1515     case 0xa:
1516     case 0xc:
1517     case 0xe ... 0xf:
1518         cc = 0;  /* plus */
1519         break;
1520     case 0xb:
1521     case 0xd:
1522         cc = 1;  /* minus */
1523         break;
1524     default:
1525     case 0x0 ... 0x9:
1526         cc = 3;  /* invalid */
1527         break;
1528     }
1529 
1530     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1531     for (i = 0; i < destlen; i += dsize) {
1532         if (i == (31 * dsize)) {
1533             /* If length is 32/64 bytes, the leftmost byte is 0. */
1534             b = 0;
1535         } else if (i % (2 * dsize)) {
1536             b = cpu_ldub_data_ra(env, src, ra);
1537             src--;
1538         } else {
1539             b >>= 4;
1540         }
1541         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1542         dest -= dsize;
1543     }
1544 
1545     return cc;
1546 }
1547 
1548 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1549                        uint64_t src)
1550 {
1551     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1552 }
1553 
1554 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1555                        uint64_t src)
1556 {
1557     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1558 }
1559 
1560 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1561 {
1562     uintptr_t ra = GETPC();
1563     uint32_t cc = 0;
1564     int i;
1565 
1566     for (i = 0; i < destlen; i++) {
1567         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1568         /* digit */
1569         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1570 
1571         if (i == (destlen - 1)) {
1572             /* sign */
1573             cc |= (b & 0xf) < 0xa ? 1 : 0;
1574         } else {
1575             /* digit */
1576             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1577         }
1578     }
1579 
1580     return cc;
1581 }
1582 
1583 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1584                              uint64_t trans, uintptr_t ra)
1585 {
1586     uint32_t i;
1587 
1588     for (i = 0; i <= len; i++) {
1589         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1590         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1591         cpu_stb_data_ra(env, array + i, new_byte, ra);
1592     }
1593 
1594     return env->cc_op;
1595 }
1596 
1597 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1598                 uint64_t trans)
1599 {
1600     do_helper_tr(env, len, array, trans, GETPC());
1601 }
1602 
1603 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1604                    uint64_t len, uint64_t trans)
1605 {
1606     uintptr_t ra = GETPC();
1607     uint8_t end = env->regs[0] & 0xff;
1608     uint64_t l = len;
1609     uint64_t i;
1610     uint32_t cc = 0;
1611 
1612     if (!(env->psw.mask & PSW_MASK_64)) {
1613         array &= 0x7fffffff;
1614         l = (uint32_t)l;
1615     }
1616 
1617     /* Lest we fail to service interrupts in a timely manner, limit the
1618        amount of work we're willing to do.  For now, let's cap at 8k.  */
1619     if (l > 0x2000) {
1620         l = 0x2000;
1621         cc = 3;
1622     }
1623 
1624     for (i = 0; i < l; i++) {
1625         uint8_t byte, new_byte;
1626 
1627         byte = cpu_ldub_data_ra(env, array + i, ra);
1628 
1629         if (byte == end) {
1630             cc = 1;
1631             break;
1632         }
1633 
1634         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1635         cpu_stb_data_ra(env, array + i, new_byte, ra);
1636     }
1637 
1638     env->cc_op = cc;
1639     return int128_make128(len - i, array + i);
1640 }
1641 
1642 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1643                                      uint64_t array, uint64_t trans,
1644                                      int inc, uintptr_t ra)
1645 {
1646     int i;
1647 
1648     for (i = 0; i <= len; i++) {
1649         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1650         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1651 
1652         if (sbyte != 0) {
1653             set_address(env, 1, array + i * inc);
1654             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1655             return (i == len) ? 2 : 1;
1656         }
1657     }
1658 
1659     return 0;
1660 }
1661 
1662 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1663                                   uint64_t array, uint64_t trans,
1664                                   uintptr_t ra)
1665 {
1666     return do_helper_trt(env, len, array, trans, 1, ra);
1667 }
1668 
1669 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1670                      uint64_t trans)
1671 {
1672     return do_helper_trt(env, len, array, trans, 1, GETPC());
1673 }
1674 
1675 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1676                                    uint64_t array, uint64_t trans,
1677                                    uintptr_t ra)
1678 {
1679     return do_helper_trt(env, len, array, trans, -1, ra);
1680 }
1681 
1682 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1683                       uint64_t trans)
1684 {
1685     return do_helper_trt(env, len, array, trans, -1, GETPC());
1686 }
1687 
1688 /* Translate one/two to one/two */
1689 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1690                       uint32_t tst, uint32_t sizes)
1691 {
1692     uintptr_t ra = GETPC();
1693     int dsize = (sizes & 1) ? 1 : 2;
1694     int ssize = (sizes & 2) ? 1 : 2;
1695     uint64_t tbl = get_address(env, 1);
1696     uint64_t dst = get_address(env, r1);
1697     uint64_t len = get_length(env, r1 + 1);
1698     uint64_t src = get_address(env, r2);
1699     uint32_t cc = 3;
1700     int i;
1701 
1702     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1703        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1704        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1705     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1706         tbl &= -4096;
1707     } else {
1708         tbl &= -8;
1709     }
1710 
1711     check_alignment(env, len, ssize, ra);
1712 
1713     /* Lest we fail to service interrupts in a timely manner, */
1714     /* limit the amount of work we're willing to do.   */
1715     for (i = 0; i < 0x2000; i++) {
1716         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1717         uint64_t tble = tbl + (sval * dsize);
1718         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1719         if (dval == tst) {
1720             cc = 1;
1721             break;
1722         }
1723         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1724 
1725         len -= ssize;
1726         src += ssize;
1727         dst += dsize;
1728 
1729         if (len == 0) {
1730             cc = 0;
1731             break;
1732         }
1733     }
1734 
1735     set_address(env, r1, dst);
1736     set_length(env, r1 + 1, len);
1737     set_address(env, r2, src);
1738 
1739     return cc;
1740 }
1741 
1742 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1743                         uint64_t a2, bool parallel)
1744 {
1745     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1746     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1747     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1748     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1749     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1750     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1751     uintptr_t ra = GETPC();
1752     uint32_t fc = extract32(env->regs[0], 0, 8);
1753     uint32_t sc = extract32(env->regs[0], 8, 8);
1754     uint64_t pl = get_address(env, 1) & -16;
1755     uint64_t svh, svl;
1756     uint32_t cc;
1757 
1758     /* Sanity check the function code and storage characteristic.  */
1759     if (fc > 1 || sc > 3) {
1760         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1761             goto spec_exception;
1762         }
1763         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1764             goto spec_exception;
1765         }
1766     }
1767 
1768     /* Sanity check the alignments.  */
1769     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1770         goto spec_exception;
1771     }
1772 
1773     /* Sanity check writability of the store address.  */
1774     probe_write(env, a2, 1 << sc, mem_idx, ra);
1775 
1776     /*
1777      * Note that the compare-and-swap is atomic, and the store is atomic,
1778      * but the complete operation is not.  Therefore we do not need to
1779      * assert serial context in order to implement this.  That said,
1780      * restart early if we can't support either operation that is supposed
1781      * to be atomic.
1782      */
1783     if (parallel) {
1784         uint32_t max = 2;
1785 #ifdef CONFIG_ATOMIC64
1786         max = 3;
1787 #endif
1788         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1789             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1790             cpu_loop_exit_atomic(env_cpu(env), ra);
1791         }
1792     }
1793 
1794     /*
1795      * All loads happen before all stores.  For simplicity, load the entire
1796      * store value area from the parameter list.
1797      */
1798     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1799     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1800 
1801     switch (fc) {
1802     case 0:
1803         {
1804             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1805             uint32_t cv = env->regs[r3];
1806             uint32_t ov;
1807 
1808             if (parallel) {
1809                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1810             } else {
1811                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1812                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1813             }
1814             cc = (ov != cv);
1815             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1816         }
1817         break;
1818 
1819     case 1:
1820         {
1821             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1822             uint64_t cv = env->regs[r3];
1823             uint64_t ov;
1824 
1825             if (parallel) {
1826 #ifdef CONFIG_ATOMIC64
1827                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1828 #else
1829                 /* Note that we asserted !parallel above.  */
1830                 g_assert_not_reached();
1831 #endif
1832             } else {
1833                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1834                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1835             }
1836             cc = (ov != cv);
1837             env->regs[r3] = ov;
1838         }
1839         break;
1840 
1841     case 2:
1842         {
1843             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1844             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1845             Int128 ov;
1846 
1847             if (!parallel) {
1848                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1849                 cc = !int128_eq(ov, cv);
1850                 if (cc) {
1851                     nv = ov;
1852                 }
1853                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1854             } else if (HAVE_CMPXCHG128) {
1855                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1856                 cc = !int128_eq(ov, cv);
1857             } else {
1858                 /* Note that we asserted !parallel above.  */
1859                 g_assert_not_reached();
1860             }
1861 
1862             env->regs[r3 + 0] = int128_gethi(ov);
1863             env->regs[r3 + 1] = int128_getlo(ov);
1864         }
1865         break;
1866 
1867     default:
1868         g_assert_not_reached();
1869     }
1870 
1871     /* Store only if the comparison succeeded.  Note that above we use a pair
1872        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1873        from the most-significant bits of svh.  */
1874     if (cc == 0) {
1875         switch (sc) {
1876         case 0:
1877             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1878             break;
1879         case 1:
1880             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1881             break;
1882         case 2:
1883             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1884             break;
1885         case 3:
1886             cpu_stq_mmu(env, a2, svh, oi8, ra);
1887             break;
1888         case 4:
1889             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1890             break;
1891         default:
1892             g_assert_not_reached();
1893         }
1894     }
1895 
1896     return cc;
1897 
1898  spec_exception:
1899     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1900 }
1901 
1902 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1903 {
1904     return do_csst(env, r3, a1, a2, false);
1905 }
1906 
1907 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1908                                uint64_t a2)
1909 {
1910     return do_csst(env, r3, a1, a2, true);
1911 }
1912 
1913 #if !defined(CONFIG_USER_ONLY)
1914 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1915 {
1916     uintptr_t ra = GETPC();
1917     bool PERchanged = false;
1918     uint64_t src = a2;
1919     uint32_t i;
1920 
1921     if (src & 0x7) {
1922         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1923     }
1924 
1925     for (i = r1;; i = (i + 1) % 16) {
1926         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1927         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1928             PERchanged = true;
1929         }
1930         env->cregs[i] = val;
1931         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1932                    i, src, val);
1933         src += sizeof(uint64_t);
1934 
1935         if (i == r3) {
1936             break;
1937         }
1938     }
1939 
1940     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1941         s390_cpu_recompute_watchpoints(env_cpu(env));
1942     }
1943 
1944     tlb_flush(env_cpu(env));
1945 }
1946 
1947 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1948 {
1949     uintptr_t ra = GETPC();
1950     bool PERchanged = false;
1951     uint64_t src = a2;
1952     uint32_t i;
1953 
1954     if (src & 0x3) {
1955         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1956     }
1957 
1958     for (i = r1;; i = (i + 1) % 16) {
1959         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1960         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1961             PERchanged = true;
1962         }
1963         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1964         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1965         src += sizeof(uint32_t);
1966 
1967         if (i == r3) {
1968             break;
1969         }
1970     }
1971 
1972     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1973         s390_cpu_recompute_watchpoints(env_cpu(env));
1974     }
1975 
1976     tlb_flush(env_cpu(env));
1977 }
1978 
1979 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1980 {
1981     uintptr_t ra = GETPC();
1982     uint64_t dest = a2;
1983     uint32_t i;
1984 
1985     if (dest & 0x7) {
1986         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1987     }
1988 
1989     for (i = r1;; i = (i + 1) % 16) {
1990         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1991         dest += sizeof(uint64_t);
1992 
1993         if (i == r3) {
1994             break;
1995         }
1996     }
1997 }
1998 
1999 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2000 {
2001     uintptr_t ra = GETPC();
2002     uint64_t dest = a2;
2003     uint32_t i;
2004 
2005     if (dest & 0x3) {
2006         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2007     }
2008 
2009     for (i = r1;; i = (i + 1) % 16) {
2010         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2011         dest += sizeof(uint32_t);
2012 
2013         if (i == r3) {
2014             break;
2015         }
2016     }
2017 }
2018 
2019 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2020 {
2021     uintptr_t ra = GETPC();
2022     int i;
2023 
2024     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2025 
2026     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2027         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2028     }
2029 
2030     return 0;
2031 }
2032 
2033 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2034 {
2035     S390CPU *cpu = env_archcpu(env);
2036     CPUState *cs = env_cpu(env);
2037 
2038     /*
2039      * TODO: we currently don't handle all access protection types
2040      * (including access-list and key-controlled) as well as AR mode.
2041      */
2042     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2043         /* Fetching permitted; storing permitted */
2044         return 0;
2045     }
2046 
2047     if (env->int_pgm_code == PGM_PROTECTION) {
2048         /* retry if reading is possible */
2049         cs->exception_index = -1;
2050         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2051             /* Fetching permitted; storing not permitted */
2052             return 1;
2053         }
2054     }
2055 
2056     switch (env->int_pgm_code) {
2057     case PGM_PROTECTION:
2058         /* Fetching not permitted; storing not permitted */
2059         cs->exception_index = -1;
2060         return 2;
2061     case PGM_ADDRESSING:
2062     case PGM_TRANS_SPEC:
2063         /* exceptions forwarded to the guest */
2064         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2065         return 0;
2066     }
2067 
2068     /* Translation not available */
2069     cs->exception_index = -1;
2070     return 3;
2071 }
2072 
2073 /* insert storage key extended */
2074 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2075 {
2076     static S390SKeysState *ss;
2077     static S390SKeysClass *skeyclass;
2078     uint64_t addr = wrap_address(env, r2);
2079     uint8_t key;
2080     int rc;
2081 
2082     addr = mmu_real2abs(env, addr);
2083     if (!mmu_absolute_addr_valid(addr, false)) {
2084         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2085     }
2086 
2087     if (unlikely(!ss)) {
2088         ss = s390_get_skeys_device();
2089         skeyclass = S390_SKEYS_GET_CLASS(ss);
2090         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2091             tlb_flush_all_cpus_synced(env_cpu(env));
2092         }
2093     }
2094 
2095     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2096     if (rc) {
2097         return 0;
2098     }
2099     return key;
2100 }
2101 
2102 /* set storage key extended */
2103 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2104 {
2105     static S390SKeysState *ss;
2106     static S390SKeysClass *skeyclass;
2107     uint64_t addr = wrap_address(env, r2);
2108     uint8_t key;
2109 
2110     addr = mmu_real2abs(env, addr);
2111     if (!mmu_absolute_addr_valid(addr, false)) {
2112         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2113     }
2114 
2115     if (unlikely(!ss)) {
2116         ss = s390_get_skeys_device();
2117         skeyclass = S390_SKEYS_GET_CLASS(ss);
2118         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2119             tlb_flush_all_cpus_synced(env_cpu(env));
2120         }
2121     }
2122 
2123     key = r1 & 0xfe;
2124     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2125    /*
2126     * As we can only flush by virtual address and not all the entries
2127     * that point to a physical address we have to flush the whole TLB.
2128     */
2129     tlb_flush_all_cpus_synced(env_cpu(env));
2130 }
2131 
2132 /* reset reference bit extended */
2133 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2134 {
2135     uint64_t addr = wrap_address(env, r2);
2136     static S390SKeysState *ss;
2137     static S390SKeysClass *skeyclass;
2138     uint8_t re, key;
2139     int rc;
2140 
2141     addr = mmu_real2abs(env, addr);
2142     if (!mmu_absolute_addr_valid(addr, false)) {
2143         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2144     }
2145 
2146     if (unlikely(!ss)) {
2147         ss = s390_get_skeys_device();
2148         skeyclass = S390_SKEYS_GET_CLASS(ss);
2149         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2150             tlb_flush_all_cpus_synced(env_cpu(env));
2151         }
2152     }
2153 
2154     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2155     if (rc) {
2156         return 0;
2157     }
2158 
2159     re = key & (SK_R | SK_C);
2160     key &= ~SK_R;
2161 
2162     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2163     if (rc) {
2164         return 0;
2165     }
2166    /*
2167     * As we can only flush by virtual address and not all the entries
2168     * that point to a physical address we have to flush the whole TLB.
2169     */
2170     tlb_flush_all_cpus_synced(env_cpu(env));
2171 
2172     /*
2173      * cc
2174      *
2175      * 0  Reference bit zero; change bit zero
2176      * 1  Reference bit zero; change bit one
2177      * 2  Reference bit one; change bit zero
2178      * 3  Reference bit one; change bit one
2179      */
2180 
2181     return re >> 1;
2182 }
2183 
2184 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2185                       uint64_t key)
2186 {
2187     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2188     S390Access srca, desta;
2189     uintptr_t ra = GETPC();
2190     int cc = 0;
2191 
2192     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2193                __func__, l, a1, a2);
2194 
2195     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2196         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2197         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2198     }
2199 
2200     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2201         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2202     }
2203 
2204     l = wrap_length32(env, l);
2205     if (l > 256) {
2206         /* max 256 */
2207         l = 256;
2208         cc = 3;
2209     } else if (!l) {
2210         return cc;
2211     }
2212 
2213     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2214     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2215     access_memmove(env, &desta, &srca, ra);
2216     return cc;
2217 }
2218 
2219 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2220                       uint64_t key)
2221 {
2222     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2223     S390Access srca, desta;
2224     uintptr_t ra = GETPC();
2225     int cc = 0;
2226 
2227     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2228                __func__, l, a1, a2);
2229 
2230     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2231         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2232         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2233     }
2234 
2235     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2236         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2237     }
2238 
2239     l = wrap_length32(env, l);
2240     if (l > 256) {
2241         /* max 256 */
2242         l = 256;
2243         cc = 3;
2244     } else if (!l) {
2245         return cc;
2246     }
2247     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2248     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2249     access_memmove(env, &desta, &srca, ra);
2250     return cc;
2251 }
2252 
2253 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2254 {
2255     CPUState *cs = env_cpu(env);
2256     const uintptr_t ra = GETPC();
2257     uint64_t table, entry, raddr;
2258     uint16_t entries, i, index = 0;
2259 
2260     if (r2 & 0xff000) {
2261         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2262     }
2263 
2264     if (!(r2 & 0x800)) {
2265         /* invalidation-and-clearing operation */
2266         table = r1 & ASCE_ORIGIN;
2267         entries = (r2 & 0x7ff) + 1;
2268 
2269         switch (r1 & ASCE_TYPE_MASK) {
2270         case ASCE_TYPE_REGION1:
2271             index = (r2 >> 53) & 0x7ff;
2272             break;
2273         case ASCE_TYPE_REGION2:
2274             index = (r2 >> 42) & 0x7ff;
2275             break;
2276         case ASCE_TYPE_REGION3:
2277             index = (r2 >> 31) & 0x7ff;
2278             break;
2279         case ASCE_TYPE_SEGMENT:
2280             index = (r2 >> 20) & 0x7ff;
2281             break;
2282         }
2283         for (i = 0; i < entries; i++) {
2284             /* addresses are not wrapped in 24/31bit mode but table index is */
2285             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2286             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2287             if (!(entry & REGION_ENTRY_I)) {
2288                 /* we are allowed to not store if already invalid */
2289                 entry |= REGION_ENTRY_I;
2290                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2291             }
2292         }
2293     }
2294 
2295     /* We simply flush the complete tlb, therefore we can ignore r3. */
2296     if (m4 & 1) {
2297         tlb_flush(cs);
2298     } else {
2299         tlb_flush_all_cpus_synced(cs);
2300     }
2301 }
2302 
2303 /* invalidate pte */
2304 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2305                   uint32_t m4)
2306 {
2307     CPUState *cs = env_cpu(env);
2308     const uintptr_t ra = GETPC();
2309     uint64_t page = vaddr & TARGET_PAGE_MASK;
2310     uint64_t pte_addr, pte;
2311 
2312     /* Compute the page table entry address */
2313     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2314     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2315 
2316     /* Mark the page table entry as invalid */
2317     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2318     pte |= PAGE_ENTRY_I;
2319     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2320 
2321     /* XXX we exploit the fact that Linux passes the exact virtual
2322        address here - it's not obliged to! */
2323     if (m4 & 1) {
2324         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2325             tlb_flush_page(cs, page);
2326             /* XXX 31-bit hack */
2327             tlb_flush_page(cs, page ^ 0x80000000);
2328         } else {
2329             /* looks like we don't have a valid virtual address */
2330             tlb_flush(cs);
2331         }
2332     } else {
2333         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2334             tlb_flush_page_all_cpus_synced(cs, page);
2335             /* XXX 31-bit hack */
2336             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2337         } else {
2338             /* looks like we don't have a valid virtual address */
2339             tlb_flush_all_cpus_synced(cs);
2340         }
2341     }
2342 }
2343 
2344 /* flush local tlb */
2345 void HELPER(ptlb)(CPUS390XState *env)
2346 {
2347     tlb_flush(env_cpu(env));
2348 }
2349 
2350 /* flush global tlb */
2351 void HELPER(purge)(CPUS390XState *env)
2352 {
2353     tlb_flush_all_cpus_synced(env_cpu(env));
2354 }
2355 
2356 /* load real address */
2357 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2358 {
2359     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2360     uint64_t ret, tec;
2361     int flags, exc, cc;
2362 
2363     /* XXX incomplete - has more corner cases */
2364     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2365         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2366     }
2367 
2368     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2369     if (exc) {
2370         cc = 3;
2371         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2372     } else {
2373         cc = 0;
2374         ret |= addr & ~TARGET_PAGE_MASK;
2375     }
2376 
2377     env->cc_op = cc;
2378     return ret;
2379 }
2380 #endif
2381 
2382 /* Execute instruction.  This instruction executes an insn modified with
2383    the contents of r1.  It does not change the executed instruction in memory;
2384    it does not change the program counter.
2385 
2386    Perform this by recording the modified instruction in env->ex_value.
2387    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2388 */
2389 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2390 {
2391     uint64_t insn;
2392     uint8_t opc;
2393 
2394     /* EXECUTE targets must be at even addresses.  */
2395     if (addr & 1) {
2396         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2397     }
2398 
2399     insn = cpu_lduw_code(env, addr);
2400     opc = insn >> 8;
2401 
2402     /* Or in the contents of R1[56:63].  */
2403     insn |= r1 & 0xff;
2404 
2405     /* Load the rest of the instruction.  */
2406     insn <<= 48;
2407     switch (get_ilen(opc)) {
2408     case 2:
2409         break;
2410     case 4:
2411         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2412         break;
2413     case 6:
2414         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2415         break;
2416     default:
2417         g_assert_not_reached();
2418     }
2419 
2420     /* The very most common cases can be sped up by avoiding a new TB.  */
2421     if ((opc & 0xf0) == 0xd0) {
2422         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2423                                       uint64_t, uintptr_t);
2424         static const dx_helper dx[16] = {
2425             [0x0] = do_helper_trt_bkwd,
2426             [0x2] = do_helper_mvc,
2427             [0x4] = do_helper_nc,
2428             [0x5] = do_helper_clc,
2429             [0x6] = do_helper_oc,
2430             [0x7] = do_helper_xc,
2431             [0xc] = do_helper_tr,
2432             [0xd] = do_helper_trt_fwd,
2433         };
2434         dx_helper helper = dx[opc & 0xf];
2435 
2436         if (helper) {
2437             uint32_t l = extract64(insn, 48, 8);
2438             uint32_t b1 = extract64(insn, 44, 4);
2439             uint32_t d1 = extract64(insn, 32, 12);
2440             uint32_t b2 = extract64(insn, 28, 4);
2441             uint32_t d2 = extract64(insn, 16, 12);
2442             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2443             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2444 
2445             env->cc_op = helper(env, l, a1, a2, 0);
2446             env->psw.addr += ilen;
2447             return;
2448         }
2449     } else if (opc == 0x0a) {
2450         env->int_svc_code = extract64(insn, 48, 8);
2451         env->int_svc_ilen = ilen;
2452         helper_exception(env, EXCP_SVC);
2453         g_assert_not_reached();
2454     }
2455 
2456     /* Record the insn we want to execute as well as the ilen to use
2457        during the execution of the target insn.  This will also ensure
2458        that ex_value is non-zero, which flags that we are in a state
2459        that requires such execution.  */
2460     env->ex_value = insn | ilen;
2461     env->ex_target = addr;
2462 }
2463 
2464 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2465                        uint64_t len)
2466 {
2467     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2468     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2469     const uint64_t r0 = env->regs[0];
2470     const uintptr_t ra = GETPC();
2471     uint8_t dest_key, dest_as, dest_k, dest_a;
2472     uint8_t src_key, src_as, src_k, src_a;
2473     uint64_t val;
2474     int cc = 0;
2475 
2476     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2477                __func__, dest, src, len);
2478 
2479     if (!(env->psw.mask & PSW_MASK_DAT)) {
2480         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2481     }
2482 
2483     /* OAC (operand access control) for the first operand -> dest */
2484     val = (r0 & 0xffff0000ULL) >> 16;
2485     dest_key = (val >> 12) & 0xf;
2486     dest_as = (val >> 6) & 0x3;
2487     dest_k = (val >> 1) & 0x1;
2488     dest_a = val & 0x1;
2489 
2490     /* OAC (operand access control) for the second operand -> src */
2491     val = (r0 & 0x0000ffffULL);
2492     src_key = (val >> 12) & 0xf;
2493     src_as = (val >> 6) & 0x3;
2494     src_k = (val >> 1) & 0x1;
2495     src_a = val & 0x1;
2496 
2497     if (!dest_k) {
2498         dest_key = psw_key;
2499     }
2500     if (!src_k) {
2501         src_key = psw_key;
2502     }
2503     if (!dest_a) {
2504         dest_as = psw_as;
2505     }
2506     if (!src_a) {
2507         src_as = psw_as;
2508     }
2509 
2510     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2511         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2512     }
2513     if (!(env->cregs[0] & CR0_SECONDARY) &&
2514         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2515         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2516     }
2517     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2518         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2519     }
2520 
2521     len = wrap_length32(env, len);
2522     if (len > 4096) {
2523         cc = 3;
2524         len = 4096;
2525     }
2526 
2527     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2528     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2529         (env->psw.mask & PSW_MASK_PSTATE)) {
2530         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2531                       __func__);
2532         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2533     }
2534 
2535     /* FIXME: Access using correct keys and AR-mode */
2536     if (len) {
2537         S390Access srca, desta;
2538 
2539         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2540                        mmu_idx_from_as(src_as), ra);
2541         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2542                        mmu_idx_from_as(dest_as), ra);
2543 
2544         access_memmove(env, &desta, &srca, ra);
2545     }
2546 
2547     return cc;
2548 }
2549 
2550 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2551    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2552    value >= 0 indicates failure, and the CC value to be returned.  */
2553 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2554                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2555                                  uint32_t *ochar, uint32_t *olen);
2556 
2557 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2558    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2559    indicates failure, and the CC value to be returned.  */
2560 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2561                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2562                                  uint32_t *olen);
2563 
2564 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2565                        bool enh_check, uintptr_t ra,
2566                        uint32_t *ochar, uint32_t *olen)
2567 {
2568     uint8_t s0, s1, s2, s3;
2569     uint32_t c, l;
2570 
2571     if (ilen < 1) {
2572         return 0;
2573     }
2574     s0 = cpu_ldub_data_ra(env, addr, ra);
2575     if (s0 <= 0x7f) {
2576         /* one byte character */
2577         l = 1;
2578         c = s0;
2579     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2580         /* invalid character */
2581         return 2;
2582     } else if (s0 <= 0xdf) {
2583         /* two byte character */
2584         l = 2;
2585         if (ilen < 2) {
2586             return 0;
2587         }
2588         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2589         c = s0 & 0x1f;
2590         c = (c << 6) | (s1 & 0x3f);
2591         if (enh_check && (s1 & 0xc0) != 0x80) {
2592             return 2;
2593         }
2594     } else if (s0 <= 0xef) {
2595         /* three byte character */
2596         l = 3;
2597         if (ilen < 3) {
2598             return 0;
2599         }
2600         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2601         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2602         c = s0 & 0x0f;
2603         c = (c << 6) | (s1 & 0x3f);
2604         c = (c << 6) | (s2 & 0x3f);
2605         /* Fold the byte-by-byte range descriptions in the PoO into
2606            tests against the complete value.  It disallows encodings
2607            that could be smaller, and the UTF-16 surrogates.  */
2608         if (enh_check
2609             && ((s1 & 0xc0) != 0x80
2610                 || (s2 & 0xc0) != 0x80
2611                 || c < 0x1000
2612                 || (c >= 0xd800 && c <= 0xdfff))) {
2613             return 2;
2614         }
2615     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2616         /* four byte character */
2617         l = 4;
2618         if (ilen < 4) {
2619             return 0;
2620         }
2621         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2622         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2623         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2624         c = s0 & 0x07;
2625         c = (c << 6) | (s1 & 0x3f);
2626         c = (c << 6) | (s2 & 0x3f);
2627         c = (c << 6) | (s3 & 0x3f);
2628         /* See above.  */
2629         if (enh_check
2630             && ((s1 & 0xc0) != 0x80
2631                 || (s2 & 0xc0) != 0x80
2632                 || (s3 & 0xc0) != 0x80
2633                 || c < 0x010000
2634                 || c > 0x10ffff)) {
2635             return 2;
2636         }
2637     } else {
2638         /* invalid character */
2639         return 2;
2640     }
2641 
2642     *ochar = c;
2643     *olen = l;
2644     return -1;
2645 }
2646 
2647 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2648                         bool enh_check, uintptr_t ra,
2649                         uint32_t *ochar, uint32_t *olen)
2650 {
2651     uint16_t s0, s1;
2652     uint32_t c, l;
2653 
2654     if (ilen < 2) {
2655         return 0;
2656     }
2657     s0 = cpu_lduw_data_ra(env, addr, ra);
2658     if ((s0 & 0xfc00) != 0xd800) {
2659         /* one word character */
2660         l = 2;
2661         c = s0;
2662     } else {
2663         /* two word character */
2664         l = 4;
2665         if (ilen < 4) {
2666             return 0;
2667         }
2668         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2669         c = extract32(s0, 6, 4) + 1;
2670         c = (c << 6) | (s0 & 0x3f);
2671         c = (c << 10) | (s1 & 0x3ff);
2672         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2673             /* invalid surrogate character */
2674             return 2;
2675         }
2676     }
2677 
2678     *ochar = c;
2679     *olen = l;
2680     return -1;
2681 }
2682 
2683 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2684                         bool enh_check, uintptr_t ra,
2685                         uint32_t *ochar, uint32_t *olen)
2686 {
2687     uint32_t c;
2688 
2689     if (ilen < 4) {
2690         return 0;
2691     }
2692     c = cpu_ldl_data_ra(env, addr, ra);
2693     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2694         /* invalid unicode character */
2695         return 2;
2696     }
2697 
2698     *ochar = c;
2699     *olen = 4;
2700     return -1;
2701 }
2702 
2703 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2704                        uintptr_t ra, uint32_t c, uint32_t *olen)
2705 {
2706     uint8_t d[4];
2707     uint32_t l, i;
2708 
2709     if (c <= 0x7f) {
2710         /* one byte character */
2711         l = 1;
2712         d[0] = c;
2713     } else if (c <= 0x7ff) {
2714         /* two byte character */
2715         l = 2;
2716         d[1] = 0x80 | extract32(c, 0, 6);
2717         d[0] = 0xc0 | extract32(c, 6, 5);
2718     } else if (c <= 0xffff) {
2719         /* three byte character */
2720         l = 3;
2721         d[2] = 0x80 | extract32(c, 0, 6);
2722         d[1] = 0x80 | extract32(c, 6, 6);
2723         d[0] = 0xe0 | extract32(c, 12, 4);
2724     } else {
2725         /* four byte character */
2726         l = 4;
2727         d[3] = 0x80 | extract32(c, 0, 6);
2728         d[2] = 0x80 | extract32(c, 6, 6);
2729         d[1] = 0x80 | extract32(c, 12, 6);
2730         d[0] = 0xf0 | extract32(c, 18, 3);
2731     }
2732 
2733     if (ilen < l) {
2734         return 1;
2735     }
2736     for (i = 0; i < l; ++i) {
2737         cpu_stb_data_ra(env, addr + i, d[i], ra);
2738     }
2739 
2740     *olen = l;
2741     return -1;
2742 }
2743 
2744 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2745                         uintptr_t ra, uint32_t c, uint32_t *olen)
2746 {
2747     uint16_t d0, d1;
2748 
2749     if (c <= 0xffff) {
2750         /* one word character */
2751         if (ilen < 2) {
2752             return 1;
2753         }
2754         cpu_stw_data_ra(env, addr, c, ra);
2755         *olen = 2;
2756     } else {
2757         /* two word character */
2758         if (ilen < 4) {
2759             return 1;
2760         }
2761         d1 = 0xdc00 | extract32(c, 0, 10);
2762         d0 = 0xd800 | extract32(c, 10, 6);
2763         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2764         cpu_stw_data_ra(env, addr + 0, d0, ra);
2765         cpu_stw_data_ra(env, addr + 2, d1, ra);
2766         *olen = 4;
2767     }
2768 
2769     return -1;
2770 }
2771 
2772 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2773                         uintptr_t ra, uint32_t c, uint32_t *olen)
2774 {
2775     if (ilen < 4) {
2776         return 1;
2777     }
2778     cpu_stl_data_ra(env, addr, c, ra);
2779     *olen = 4;
2780     return -1;
2781 }
2782 
2783 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2784                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2785                                        decode_unicode_fn decode,
2786                                        encode_unicode_fn encode)
2787 {
2788     uint64_t dst = get_address(env, r1);
2789     uint64_t dlen = get_length(env, r1 + 1);
2790     uint64_t src = get_address(env, r2);
2791     uint64_t slen = get_length(env, r2 + 1);
2792     bool enh_check = m3 & 1;
2793     int cc, i;
2794 
2795     /* Lest we fail to service interrupts in a timely manner, limit the
2796        amount of work we're willing to do.  For now, let's cap at 256.  */
2797     for (i = 0; i < 256; ++i) {
2798         uint32_t c, ilen, olen;
2799 
2800         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2801         if (unlikely(cc >= 0)) {
2802             break;
2803         }
2804         cc = encode(env, dst, dlen, ra, c, &olen);
2805         if (unlikely(cc >= 0)) {
2806             break;
2807         }
2808 
2809         src += ilen;
2810         slen -= ilen;
2811         dst += olen;
2812         dlen -= olen;
2813         cc = 3;
2814     }
2815 
2816     set_address(env, r1, dst);
2817     set_length(env, r1 + 1, dlen);
2818     set_address(env, r2, src);
2819     set_length(env, r2 + 1, slen);
2820 
2821     return cc;
2822 }
2823 
2824 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2825 {
2826     return convert_unicode(env, r1, r2, m3, GETPC(),
2827                            decode_utf8, encode_utf16);
2828 }
2829 
2830 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2831 {
2832     return convert_unicode(env, r1, r2, m3, GETPC(),
2833                            decode_utf8, encode_utf32);
2834 }
2835 
2836 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2837 {
2838     return convert_unicode(env, r1, r2, m3, GETPC(),
2839                            decode_utf16, encode_utf8);
2840 }
2841 
2842 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2843 {
2844     return convert_unicode(env, r1, r2, m3, GETPC(),
2845                            decode_utf16, encode_utf32);
2846 }
2847 
2848 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2849 {
2850     return convert_unicode(env, r1, r2, m3, GETPC(),
2851                            decode_utf32, encode_utf8);
2852 }
2853 
2854 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2855 {
2856     return convert_unicode(env, r1, r2, m3, GETPC(),
2857                            decode_utf32, encode_utf16);
2858 }
2859 
2860 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2861                         uintptr_t ra)
2862 {
2863     const int mmu_idx = s390x_env_mmu_index(env, false);
2864 
2865     /* test the actual access, not just any access to the page due to LAP */
2866     while (len) {
2867         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2868         const uint64_t curlen = MIN(pagelen, len);
2869 
2870         probe_write(env, addr, curlen, mmu_idx, ra);
2871         addr = wrap_address(env, addr + curlen);
2872         len -= curlen;
2873     }
2874 }
2875 
2876 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2877 {
2878     probe_write_access(env, addr, len, GETPC());
2879 }
2880