xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision f7230e09b1ccfb7055b79dfee981e18d444a118a)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/page-protection.h"
29 #include "exec/cpu_ldst.h"
30 #include "hw/core/tcg-cpu-ops.h"
31 #include "qemu/int128.h"
32 #include "qemu/atomic128.h"
33 
34 #if !defined(CONFIG_USER_ONLY)
35 #include "hw/s390x/storage-keys.h"
36 #include "hw/boards.h"
37 #endif
38 
39 #ifdef CONFIG_USER_ONLY
40 # define user_or_likely(X)    true
41 #else
42 # define user_or_likely(X)    likely(X)
43 #endif
44 
45 /*****************************************************************************/
46 /* Softmmu support */
47 
48 /* #define DEBUG_HELPER */
49 #ifdef DEBUG_HELPER
50 #define HELPER_LOG(x...) qemu_log(x)
51 #else
52 #define HELPER_LOG(x...)
53 #endif
54 
55 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
56 {
57     uint16_t pkm = env->cregs[3] >> 16;
58 
59     if (env->psw.mask & PSW_MASK_PSTATE) {
60         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
61         return pkm & (0x8000 >> psw_key);
62     }
63     return true;
64 }
65 
66 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
67                                    uint64_t src, uint32_t len)
68 {
69     if (!len || src == dest) {
70         return false;
71     }
72     /* Take care of wrapping at the end of address space. */
73     if (unlikely(wrap_address(env, src + len - 1) < src)) {
74         return dest > src || dest <= wrap_address(env, src + len - 1);
75     }
76     return dest > src && dest <= src + len - 1;
77 }
78 
79 /* Trigger a SPECIFICATION exception if an address or a length is not
80    naturally aligned.  */
81 static inline void check_alignment(CPUS390XState *env, uint64_t v,
82                                    int wordsize, uintptr_t ra)
83 {
84     if (v % wordsize) {
85         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
86     }
87 }
88 
89 /* Load a value from memory according to its size.  */
90 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
91                                            int wordsize, uintptr_t ra)
92 {
93     switch (wordsize) {
94     case 1:
95         return cpu_ldub_data_ra(env, addr, ra);
96     case 2:
97         return cpu_lduw_data_ra(env, addr, ra);
98     default:
99         abort();
100     }
101 }
102 
103 /* Store a to memory according to its size.  */
104 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
105                                       uint64_t value, int wordsize,
106                                       uintptr_t ra)
107 {
108     switch (wordsize) {
109     case 1:
110         cpu_stb_data_ra(env, addr, value, ra);
111         break;
112     case 2:
113         cpu_stw_data_ra(env, addr, value, ra);
114         break;
115     default:
116         abort();
117     }
118 }
119 
120 /* An access covers at most 4096 bytes and therefore at most two pages. */
121 typedef struct S390Access {
122     target_ulong vaddr1;
123     target_ulong vaddr2;
124     void *haddr1;
125     void *haddr2;
126     uint16_t size1;
127     uint16_t size2;
128     /*
129      * If we can't access the host page directly, we'll have to do I/O access
130      * via ld/st helpers. These are internal details, so we store the
131      * mmu idx to do the access here instead of passing it around in the
132      * helpers.
133      */
134     int mmu_idx;
135 } S390Access;
136 
137 /*
138  * With nonfault=1, return the PGM_ exception that would have been injected
139  * into the guest; return 0 if no exception was detected.
140  *
141  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
142  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
143  */
144 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
145                                     int size, MMUAccessType access_type,
146                                     int mmu_idx, bool nonfault,
147                                     void **phost, uintptr_t ra)
148 {
149     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
150                                    nonfault, phost, ra);
151 
152     if (unlikely(flags & TLB_INVALID_MASK)) {
153 #ifdef CONFIG_USER_ONLY
154         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
155         env->__excp_addr = addr & TARGET_PAGE_MASK;
156         return (page_get_flags(addr) & PAGE_VALID
157                 ? PGM_PROTECTION : PGM_ADDRESSING);
158 #else
159         return env->tlb_fill_exc;
160 #endif
161     }
162 
163 #ifndef CONFIG_USER_ONLY
164     if (unlikely(flags & TLB_WATCHPOINT)) {
165         /* S390 does not presently use transaction attributes. */
166         cpu_check_watchpoint(env_cpu(env), addr, size,
167                              MEMTXATTRS_UNSPECIFIED,
168                              (access_type == MMU_DATA_STORE
169                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
170     }
171 #endif
172 
173     return 0;
174 }
175 
176 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
177                              bool nonfault, vaddr vaddr1, int size,
178                              MMUAccessType access_type,
179                              int mmu_idx, uintptr_t ra)
180 {
181     int size1, size2, exc;
182 
183     assert(size > 0 && size <= 4096);
184 
185     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
186     size2 = size - size1;
187 
188     memset(access, 0, sizeof(*access));
189     access->vaddr1 = vaddr1;
190     access->size1 = size1;
191     access->size2 = size2;
192     access->mmu_idx = mmu_idx;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &access->haddr1, ra);
196     if (unlikely(exc)) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
202 
203         access->vaddr2 = vaddr2;
204         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
205                                 nonfault, &access->haddr2, ra);
206         if (unlikely(exc)) {
207             return exc;
208         }
209     }
210     return 0;
211 }
212 
213 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
214                                   vaddr vaddr, int size,
215                                   MMUAccessType access_type, int mmu_idx,
216                                   uintptr_t ra)
217 {
218     int exc = access_prepare_nf(ret, env, false, vaddr, size,
219                                 access_type, mmu_idx, ra);
220     assert(!exc);
221 }
222 
223 /* Helper to handle memset on a single page. */
224 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
225                              uint8_t byte, uint16_t size, int mmu_idx,
226                              uintptr_t ra)
227 {
228     if (user_or_likely(haddr)) {
229         memset(haddr, byte, size);
230     } else {
231         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
232         for (int i = 0; i < size; i++) {
233             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
234         }
235     }
236 }
237 
238 static void access_memset(CPUS390XState *env, S390Access *desta,
239                           uint8_t byte, uintptr_t ra)
240 {
241     set_helper_retaddr(ra);
242     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
243                      desta->mmu_idx, ra);
244     if (unlikely(desta->size2)) {
245         do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
246                          desta->size2, desta->mmu_idx, ra);
247     }
248     clear_helper_retaddr();
249 }
250 
251 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
252                                int offset, uintptr_t ra)
253 {
254     target_ulong vaddr = access->vaddr1;
255     void *haddr = access->haddr1;
256 
257     if (unlikely(offset >= access->size1)) {
258         offset -= access->size1;
259         vaddr = access->vaddr2;
260         haddr = access->haddr2;
261     }
262 
263     if (user_or_likely(haddr)) {
264         return ldub_p(haddr + offset);
265     } else {
266         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
267         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
268     }
269 }
270 
271 static void access_set_byte(CPUS390XState *env, S390Access *access,
272                             int offset, uint8_t byte, uintptr_t ra)
273 {
274     target_ulong vaddr = access->vaddr1;
275     void *haddr = access->haddr1;
276 
277     if (unlikely(offset >= access->size1)) {
278         offset -= access->size1;
279         vaddr = access->vaddr2;
280         haddr = access->haddr2;
281     }
282 
283     if (user_or_likely(haddr)) {
284         stb_p(haddr + offset, byte);
285     } else {
286         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
287         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
288     }
289 }
290 
291 /*
292  * Move data with the same semantics as memmove() in case ranges don't overlap
293  * or src > dest. Undefined behavior on destructive overlaps.
294  */
295 static void access_memmove(CPUS390XState *env, S390Access *desta,
296                            S390Access *srca, uintptr_t ra)
297 {
298     int len = desta->size1 + desta->size2;
299 
300     assert(len == srca->size1 + srca->size2);
301 
302     /* Fallback to slow access in case we don't have access to all host pages */
303     if (user_or_likely(desta->haddr1 &&
304                        srca->haddr1 &&
305                        (!desta->size2 || desta->haddr2) &&
306                        (!srca->size2 || srca->haddr2))) {
307         int diff = desta->size1 - srca->size1;
308 
309         if (likely(diff == 0)) {
310             memmove(desta->haddr1, srca->haddr1, srca->size1);
311             if (unlikely(srca->size2)) {
312                 memmove(desta->haddr2, srca->haddr2, srca->size2);
313             }
314         } else if (diff > 0) {
315             memmove(desta->haddr1, srca->haddr1, srca->size1);
316             memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
317             if (likely(desta->size2)) {
318                 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
319             }
320         } else {
321             diff = -diff;
322             memmove(desta->haddr1, srca->haddr1, desta->size1);
323             memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
324             if (likely(srca->size2)) {
325                 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
326             }
327         }
328     } else {
329         for (int i = 0; i < len; i++) {
330             uint8_t byte = access_get_byte(env, srca, i, ra);
331             access_set_byte(env, desta, i, byte, ra);
332         }
333     }
334 }
335 
336 static int mmu_idx_from_as(uint8_t as)
337 {
338     switch (as) {
339     case AS_PRIMARY:
340         return MMU_PRIMARY_IDX;
341     case AS_SECONDARY:
342         return MMU_SECONDARY_IDX;
343     case AS_HOME:
344         return MMU_HOME_IDX;
345     default:
346         /* FIXME AS_ACCREG */
347         g_assert_not_reached();
348     }
349 }
350 
351 /* and on array */
352 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
353                              uint64_t src, uintptr_t ra)
354 {
355     const int mmu_idx = s390x_env_mmu_index(env, false);
356     S390Access srca1, srca2, desta;
357     uint32_t i;
358     uint8_t c = 0;
359 
360     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
361                __func__, l, dest, src);
362 
363     /* NC always processes one more byte than specified - maximum is 256 */
364     l++;
365 
366     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
367     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
368     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
369     set_helper_retaddr(ra);
370 
371     for (i = 0; i < l; i++) {
372         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
373                           access_get_byte(env, &srca2, i, ra);
374 
375         c |= x;
376         access_set_byte(env, &desta, i, x, ra);
377     }
378 
379     clear_helper_retaddr();
380     return c != 0;
381 }
382 
383 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
384                     uint64_t src)
385 {
386     return do_helper_nc(env, l, dest, src, GETPC());
387 }
388 
389 /* xor on array */
390 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
391                              uint64_t src, uintptr_t ra)
392 {
393     const int mmu_idx = s390x_env_mmu_index(env, false);
394     S390Access srca1, srca2, desta;
395     uint32_t i;
396     uint8_t c = 0;
397 
398     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
399                __func__, l, dest, src);
400 
401     /* XC always processes one more byte than specified - maximum is 256 */
402     l++;
403 
404     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
405     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
406     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
407 
408     /* xor with itself is the same as memset(0) */
409     if (src == dest) {
410         access_memset(env, &desta, 0, ra);
411         return 0;
412     }
413 
414     set_helper_retaddr(ra);
415     for (i = 0; i < l; i++) {
416         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
417                           access_get_byte(env, &srca2, i, ra);
418 
419         c |= x;
420         access_set_byte(env, &desta, i, x, ra);
421     }
422     clear_helper_retaddr();
423     return c != 0;
424 }
425 
426 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
427                     uint64_t src)
428 {
429     return do_helper_xc(env, l, dest, src, GETPC());
430 }
431 
432 /* or on array */
433 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
434                              uint64_t src, uintptr_t ra)
435 {
436     const int mmu_idx = s390x_env_mmu_index(env, false);
437     S390Access srca1, srca2, desta;
438     uint32_t i;
439     uint8_t c = 0;
440 
441     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
442                __func__, l, dest, src);
443 
444     /* OC always processes one more byte than specified - maximum is 256 */
445     l++;
446 
447     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
448     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
449     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
450     set_helper_retaddr(ra);
451 
452     for (i = 0; i < l; i++) {
453         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
454                           access_get_byte(env, &srca2, i, ra);
455 
456         c |= x;
457         access_set_byte(env, &desta, i, x, ra);
458     }
459 
460     clear_helper_retaddr();
461     return c != 0;
462 }
463 
464 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
465                     uint64_t src)
466 {
467     return do_helper_oc(env, l, dest, src, GETPC());
468 }
469 
470 /* memmove */
471 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
472                               uint64_t src, uintptr_t ra)
473 {
474     const int mmu_idx = s390x_env_mmu_index(env, false);
475     S390Access srca, desta;
476     uint32_t i;
477 
478     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
479                __func__, l, dest, src);
480 
481     /* MVC always copies one more byte than specified - maximum is 256 */
482     l++;
483 
484     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
485     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
486 
487     /*
488      * "When the operands overlap, the result is obtained as if the operands
489      * were processed one byte at a time". Only non-destructive overlaps
490      * behave like memmove().
491      */
492     if (dest == src + 1) {
493         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
494     } else if (!is_destructive_overlap(env, dest, src, l)) {
495         access_memmove(env, &desta, &srca, ra);
496     } else {
497         set_helper_retaddr(ra);
498         for (i = 0; i < l; i++) {
499             uint8_t byte = access_get_byte(env, &srca, i, ra);
500 
501             access_set_byte(env, &desta, i, byte, ra);
502         }
503         clear_helper_retaddr();
504     }
505 
506     return env->cc_op;
507 }
508 
509 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
510 {
511     do_helper_mvc(env, l, dest, src, GETPC());
512 }
513 
514 /* move right to left */
515 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
516 {
517     const int mmu_idx = s390x_env_mmu_index(env, false);
518     const uint64_t ra = GETPC();
519     S390Access srca, desta;
520     int32_t i;
521 
522     /* MVCRL always copies one more byte than specified - maximum is 256 */
523     l &= 0xff;
524     l++;
525 
526     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
527     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
528 
529     set_helper_retaddr(ra);
530     for (i = l - 1; i >= 0; i--) {
531         uint8_t byte = access_get_byte(env, &srca, i, ra);
532         access_set_byte(env, &desta, i, byte, ra);
533     }
534     clear_helper_retaddr();
535 }
536 
537 /* move inverse  */
538 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
539 {
540     const int mmu_idx = s390x_env_mmu_index(env, false);
541     S390Access srca, desta;
542     uintptr_t ra = GETPC();
543     int i;
544 
545     /* MVCIN always copies one more byte than specified - maximum is 256 */
546     l++;
547 
548     src = wrap_address(env, src - l + 1);
549     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
550     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
551 
552     set_helper_retaddr(ra);
553     for (i = 0; i < l; i++) {
554         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
555         access_set_byte(env, &desta, i, x, ra);
556     }
557     clear_helper_retaddr();
558 }
559 
560 /* move numerics  */
561 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
562 {
563     const int mmu_idx = s390x_env_mmu_index(env, false);
564     S390Access srca1, srca2, desta;
565     uintptr_t ra = GETPC();
566     int i;
567 
568     /* MVN always copies one more byte than specified - maximum is 256 */
569     l++;
570 
571     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
572     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
573     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
574 
575     set_helper_retaddr(ra);
576     for (i = 0; i < l; i++) {
577         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
578                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
579 
580         access_set_byte(env, &desta, i, x, ra);
581     }
582     clear_helper_retaddr();
583 }
584 
585 /* move with offset  */
586 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
587 {
588     const int mmu_idx = s390x_env_mmu_index(env, false);
589     /* MVO always processes one more byte than specified - maximum is 16 */
590     const int len_dest = (l >> 4) + 1;
591     const int len_src = (l & 0xf) + 1;
592     uintptr_t ra = GETPC();
593     uint8_t byte_dest, byte_src;
594     S390Access srca, desta;
595     int i, j;
596 
597     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
598     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
599 
600     /* Handle rightmost byte */
601     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
602 
603     set_helper_retaddr(ra);
604     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
605     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
606     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
607 
608     /* Process remaining bytes from right to left */
609     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
610         byte_dest = byte_src >> 4;
611         if (j >= 0) {
612             byte_src = access_get_byte(env, &srca, j, ra);
613         } else {
614             byte_src = 0;
615         }
616         byte_dest |= byte_src << 4;
617         access_set_byte(env, &desta, i, byte_dest, ra);
618     }
619     clear_helper_retaddr();
620 }
621 
622 /* move zones  */
623 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
624 {
625     const int mmu_idx = s390x_env_mmu_index(env, false);
626     S390Access srca1, srca2, desta;
627     uintptr_t ra = GETPC();
628     int i;
629 
630     /* MVZ always copies one more byte than specified - maximum is 256 */
631     l++;
632 
633     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
634     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
635     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
636 
637     set_helper_retaddr(ra);
638     for (i = 0; i < l; i++) {
639         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
640                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
641 
642         access_set_byte(env, &desta, i, x, ra);
643     }
644     clear_helper_retaddr();
645 }
646 
647 /* compare unsigned byte arrays */
648 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
649                               uint64_t s2, uintptr_t ra)
650 {
651     uint32_t i;
652     uint32_t cc = 0;
653 
654     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
655                __func__, l, s1, s2);
656 
657     for (i = 0; i <= l; i++) {
658         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
659         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
660         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
661         if (x < y) {
662             cc = 1;
663             break;
664         } else if (x > y) {
665             cc = 2;
666             break;
667         }
668     }
669 
670     HELPER_LOG("\n");
671     return cc;
672 }
673 
674 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
675 {
676     return do_helper_clc(env, l, s1, s2, GETPC());
677 }
678 
679 /* compare logical under mask */
680 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
681                      uint64_t addr)
682 {
683     uintptr_t ra = GETPC();
684     uint32_t cc = 0;
685 
686     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
687                mask, addr);
688 
689     if (!mask) {
690         /* Recognize access exceptions for the first byte */
691         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
692     }
693 
694     while (mask) {
695         if (mask & 8) {
696             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
697             uint8_t r = extract32(r1, 24, 8);
698             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
699                        addr);
700             if (r < d) {
701                 cc = 1;
702                 break;
703             } else if (r > d) {
704                 cc = 2;
705                 break;
706             }
707             addr++;
708         }
709         mask = (mask << 1) & 0xf;
710         r1 <<= 8;
711     }
712 
713     HELPER_LOG("\n");
714     return cc;
715 }
716 
717 static inline uint64_t get_address(CPUS390XState *env, int reg)
718 {
719     return wrap_address(env, env->regs[reg]);
720 }
721 
722 /*
723  * Store the address to the given register, zeroing out unused leftmost
724  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
725  */
726 static inline void set_address_zero(CPUS390XState *env, int reg,
727                                     uint64_t address)
728 {
729     if (env->psw.mask & PSW_MASK_64) {
730         env->regs[reg] = address;
731     } else {
732         if (!(env->psw.mask & PSW_MASK_32)) {
733             address &= 0x00ffffff;
734         } else {
735             address &= 0x7fffffff;
736         }
737         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
738     }
739 }
740 
741 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
742 {
743     if (env->psw.mask & PSW_MASK_64) {
744         /* 64-Bit mode */
745         env->regs[reg] = address;
746     } else {
747         if (!(env->psw.mask & PSW_MASK_32)) {
748             /* 24-Bit mode. According to the PoO it is implementation
749             dependent if bits 32-39 remain unchanged or are set to
750             zeros.  Choose the former so that the function can also be
751             used for TRT.  */
752             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
753         } else {
754             /* 31-Bit mode. According to the PoO it is implementation
755             dependent if bit 32 remains unchanged or is set to zero.
756             Choose the latter so that the function can also be used for
757             TRT.  */
758             address &= 0x7fffffff;
759             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
760         }
761     }
762 }
763 
764 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
765 {
766     if (!(env->psw.mask & PSW_MASK_64)) {
767         return (uint32_t)length;
768     }
769     return length;
770 }
771 
772 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
773 {
774     if (!(env->psw.mask & PSW_MASK_64)) {
775         /* 24-Bit and 31-Bit mode */
776         length &= 0x7fffffff;
777     }
778     return length;
779 }
780 
781 static inline uint64_t get_length(CPUS390XState *env, int reg)
782 {
783     return wrap_length31(env, env->regs[reg]);
784 }
785 
786 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
787 {
788     if (env->psw.mask & PSW_MASK_64) {
789         /* 64-Bit mode */
790         env->regs[reg] = length;
791     } else {
792         /* 24-Bit and 31-Bit mode */
793         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
794     }
795 }
796 
797 /* search string (c is byte to search, r2 is string, r1 end of string) */
798 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
799 {
800     uintptr_t ra = GETPC();
801     uint64_t end, str;
802     uint32_t len;
803     uint8_t v, c = env->regs[0];
804 
805     /* Bits 32-55 must contain all 0.  */
806     if (env->regs[0] & 0xffffff00u) {
807         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
808     }
809 
810     str = get_address(env, r2);
811     end = get_address(env, r1);
812 
813     /* Lest we fail to service interrupts in a timely manner, limit the
814        amount of work we're willing to do.  For now, let's cap at 8k.  */
815     for (len = 0; len < 0x2000; ++len) {
816         if (str + len == end) {
817             /* Character not found.  R1 & R2 are unmodified.  */
818             env->cc_op = 2;
819             return;
820         }
821         v = cpu_ldub_data_ra(env, str + len, ra);
822         if (v == c) {
823             /* Character found.  Set R1 to the location; R2 is unmodified.  */
824             env->cc_op = 1;
825             set_address(env, r1, str + len);
826             return;
827         }
828     }
829 
830     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
831     env->cc_op = 3;
832     set_address(env, r2, str + len);
833 }
834 
835 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
836 {
837     uintptr_t ra = GETPC();
838     uint32_t len;
839     uint16_t v, c = env->regs[0];
840     uint64_t end, str, adj_end;
841 
842     /* Bits 32-47 of R0 must be zero.  */
843     if (env->regs[0] & 0xffff0000u) {
844         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
845     }
846 
847     str = get_address(env, r2);
848     end = get_address(env, r1);
849 
850     /* If the LSB of the two addresses differ, use one extra byte.  */
851     adj_end = end + ((str ^ end) & 1);
852 
853     /* Lest we fail to service interrupts in a timely manner, limit the
854        amount of work we're willing to do.  For now, let's cap at 8k.  */
855     for (len = 0; len < 0x2000; len += 2) {
856         if (str + len == adj_end) {
857             /* End of input found.  */
858             env->cc_op = 2;
859             return;
860         }
861         v = cpu_lduw_data_ra(env, str + len, ra);
862         if (v == c) {
863             /* Character found.  Set R1 to the location; R2 is unmodified.  */
864             env->cc_op = 1;
865             set_address(env, r1, str + len);
866             return;
867         }
868     }
869 
870     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
871     env->cc_op = 3;
872     set_address(env, r2, str + len);
873 }
874 
875 /* unsigned string compare (c is string terminator) */
876 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
877 {
878     uintptr_t ra = GETPC();
879     uint32_t len;
880 
881     c = c & 0xff;
882     s1 = wrap_address(env, s1);
883     s2 = wrap_address(env, s2);
884 
885     /* Lest we fail to service interrupts in a timely manner, limit the
886        amount of work we're willing to do.  For now, let's cap at 8k.  */
887     for (len = 0; len < 0x2000; ++len) {
888         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
889         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
890         if (v1 == v2) {
891             if (v1 == c) {
892                 /* Equal.  CC=0, and don't advance the registers.  */
893                 env->cc_op = 0;
894                 return int128_make128(s2, s1);
895             }
896         } else {
897             /* Unequal.  CC={1,2}, and advance the registers.  Note that
898                the terminator need not be zero, but the string that contains
899                the terminator is by definition "low".  */
900             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
901             return int128_make128(s2 + len, s1 + len);
902         }
903     }
904 
905     /* CPU-determined bytes equal; advance the registers.  */
906     env->cc_op = 3;
907     return int128_make128(s2 + len, s1 + len);
908 }
909 
910 /* move page */
911 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
912 {
913     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
914     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
915     const int mmu_idx = s390x_env_mmu_index(env, false);
916     const bool f = extract64(r0, 11, 1);
917     const bool s = extract64(r0, 10, 1);
918     const bool cco = extract64(r0, 8, 1);
919     uintptr_t ra = GETPC();
920     S390Access srca, desta;
921     int exc;
922 
923     if ((f && s) || extract64(r0, 12, 4)) {
924         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
925     }
926 
927     /*
928      * We always manually handle exceptions such that we can properly store
929      * r1/r2 to the lowcore on page-translation exceptions.
930      *
931      * TODO: Access key handling
932      */
933     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
934                             MMU_DATA_LOAD, mmu_idx, ra);
935     if (exc) {
936         if (cco) {
937             return 2;
938         }
939         goto inject_exc;
940     }
941     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
942                             MMU_DATA_STORE, mmu_idx, ra);
943     if (exc) {
944         if (cco && exc != PGM_PROTECTION) {
945             return 1;
946         }
947         goto inject_exc;
948     }
949     access_memmove(env, &desta, &srca, ra);
950     return 0; /* data moved */
951 inject_exc:
952 #if !defined(CONFIG_USER_ONLY)
953     if (exc != PGM_ADDRESSING) {
954         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
955                  env->tlb_fill_tec);
956     }
957     if (exc == PGM_PAGE_TRANS) {
958         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
959                  r1 << 4 | r2);
960     }
961 #endif
962     tcg_s390_program_interrupt(env, exc, ra);
963 }
964 
965 /* string copy */
966 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
967 {
968     const int mmu_idx = s390x_env_mmu_index(env, false);
969     const uint64_t d = get_address(env, r1);
970     const uint64_t s = get_address(env, r2);
971     const uint8_t c = env->regs[0];
972     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
973     S390Access srca, desta;
974     uintptr_t ra = GETPC();
975     int i;
976 
977     if (env->regs[0] & 0xffffff00ull) {
978         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
979     }
980 
981     /*
982      * Our access should not exceed single pages, as we must not report access
983      * exceptions exceeding the actually copied range (which we don't know at
984      * this point). We might over-indicate watchpoints within the pages
985      * (if we ever care, we have to limit processing to a single byte).
986      */
987     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
988     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
989 
990     set_helper_retaddr(ra);
991     for (i = 0; i < len; i++) {
992         const uint8_t v = access_get_byte(env, &srca, i, ra);
993 
994         access_set_byte(env, &desta, i, v, ra);
995         if (v == c) {
996             clear_helper_retaddr();
997             set_address_zero(env, r1, d + i);
998             return 1;
999         }
1000     }
1001     clear_helper_retaddr();
1002     set_address_zero(env, r1, d + len);
1003     set_address_zero(env, r2, s + len);
1004     return 3;
1005 }
1006 
1007 /* load access registers r1 to r3 from memory at a2 */
1008 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1009 {
1010     uintptr_t ra = GETPC();
1011     int i;
1012 
1013     if (a2 & 0x3) {
1014         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1015     }
1016 
1017     for (i = r1;; i = (i + 1) % 16) {
1018         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1019         a2 += 4;
1020 
1021         if (i == r3) {
1022             break;
1023         }
1024     }
1025 }
1026 
1027 /* store access registers r1 to r3 in memory at a2 */
1028 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1029 {
1030     uintptr_t ra = GETPC();
1031     int i;
1032 
1033     if (a2 & 0x3) {
1034         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1035     }
1036 
1037     for (i = r1;; i = (i + 1) % 16) {
1038         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1039         a2 += 4;
1040 
1041         if (i == r3) {
1042             break;
1043         }
1044     }
1045 }
1046 
1047 /* move long helper */
1048 static inline uint32_t do_mvcl(CPUS390XState *env,
1049                                uint64_t *dest, uint64_t *destlen,
1050                                uint64_t *src, uint64_t *srclen,
1051                                uint16_t pad, int wordsize, uintptr_t ra)
1052 {
1053     const int mmu_idx = s390x_env_mmu_index(env, false);
1054     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1055     S390Access srca, desta;
1056     int i, cc;
1057 
1058     if (*destlen == *srclen) {
1059         cc = 0;
1060     } else if (*destlen < *srclen) {
1061         cc = 1;
1062     } else {
1063         cc = 2;
1064     }
1065 
1066     if (!*destlen) {
1067         return cc;
1068     }
1069 
1070     /*
1071      * Only perform one type of type of operation (move/pad) at a time.
1072      * Stay within single pages.
1073      */
1074     if (*srclen) {
1075         /* Copy the src array */
1076         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1077         *destlen -= len;
1078         *srclen -= len;
1079         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1080         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1081         access_memmove(env, &desta, &srca, ra);
1082         *src = wrap_address(env, *src + len);
1083         *dest = wrap_address(env, *dest + len);
1084     } else if (wordsize == 1) {
1085         /* Pad the remaining area */
1086         *destlen -= len;
1087         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1088         access_memset(env, &desta, pad, ra);
1089         *dest = wrap_address(env, *dest + len);
1090     } else {
1091         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1092         set_helper_retaddr(ra);
1093 
1094         /* The remaining length selects the padding byte. */
1095         for (i = 0; i < len; (*destlen)--, i++) {
1096             if (*destlen & 1) {
1097                 access_set_byte(env, &desta, i, pad, ra);
1098             } else {
1099                 access_set_byte(env, &desta, i, pad >> 8, ra);
1100             }
1101         }
1102         clear_helper_retaddr();
1103         *dest = wrap_address(env, *dest + len);
1104     }
1105 
1106     return *destlen ? 3 : cc;
1107 }
1108 
1109 /* move long */
1110 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1111 {
1112     const int mmu_idx = s390x_env_mmu_index(env, false);
1113     uintptr_t ra = GETPC();
1114     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1115     uint64_t dest = get_address(env, r1);
1116     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1117     uint64_t src = get_address(env, r2);
1118     uint8_t pad = env->regs[r2 + 1] >> 24;
1119     CPUState *cs = env_cpu(env);
1120     S390Access srca, desta;
1121     uint32_t cc, cur_len;
1122 
1123     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1124         cc = 3;
1125     } else if (srclen == destlen) {
1126         cc = 0;
1127     } else if (destlen < srclen) {
1128         cc = 1;
1129     } else {
1130         cc = 2;
1131     }
1132 
1133     /* We might have to zero-out some bits even if there was no action. */
1134     if (unlikely(!destlen || cc == 3)) {
1135         set_address_zero(env, r2, src);
1136         set_address_zero(env, r1, dest);
1137         return cc;
1138     } else if (!srclen) {
1139         set_address_zero(env, r2, src);
1140     }
1141 
1142     /*
1143      * Only perform one type of type of operation (move/pad) in one step.
1144      * Stay within single pages.
1145      */
1146     while (destlen) {
1147         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1148         if (!srclen) {
1149             access_prepare(&desta, env, dest, cur_len,
1150                            MMU_DATA_STORE, mmu_idx, ra);
1151             access_memset(env, &desta, pad, ra);
1152         } else {
1153             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1154 
1155             access_prepare(&srca, env, src, cur_len,
1156                            MMU_DATA_LOAD, mmu_idx, ra);
1157             access_prepare(&desta, env, dest, cur_len,
1158                            MMU_DATA_STORE, mmu_idx, ra);
1159             access_memmove(env, &desta, &srca, ra);
1160             src = wrap_address(env, src + cur_len);
1161             srclen -= cur_len;
1162             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1163             set_address_zero(env, r2, src);
1164         }
1165         dest = wrap_address(env, dest + cur_len);
1166         destlen -= cur_len;
1167         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1168         set_address_zero(env, r1, dest);
1169 
1170         /*
1171          * MVCL is interruptible. Return to the main loop if requested after
1172          * writing back all state to registers. If no interrupt will get
1173          * injected, we'll end up back in this handler and continue processing
1174          * the remaining parts.
1175          */
1176         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1177             cpu_loop_exit_restore(cs, ra);
1178         }
1179     }
1180     return cc;
1181 }
1182 
1183 /* move long extended */
1184 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1185                        uint32_t r3)
1186 {
1187     uintptr_t ra = GETPC();
1188     uint64_t destlen = get_length(env, r1 + 1);
1189     uint64_t dest = get_address(env, r1);
1190     uint64_t srclen = get_length(env, r3 + 1);
1191     uint64_t src = get_address(env, r3);
1192     uint8_t pad = a2;
1193     uint32_t cc;
1194 
1195     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1196 
1197     set_length(env, r1 + 1, destlen);
1198     set_length(env, r3 + 1, srclen);
1199     set_address(env, r1, dest);
1200     set_address(env, r3, src);
1201 
1202     return cc;
1203 }
1204 
1205 /* move long unicode */
1206 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1207                        uint32_t r3)
1208 {
1209     uintptr_t ra = GETPC();
1210     uint64_t destlen = get_length(env, r1 + 1);
1211     uint64_t dest = get_address(env, r1);
1212     uint64_t srclen = get_length(env, r3 + 1);
1213     uint64_t src = get_address(env, r3);
1214     uint16_t pad = a2;
1215     uint32_t cc;
1216 
1217     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1218 
1219     set_length(env, r1 + 1, destlen);
1220     set_length(env, r3 + 1, srclen);
1221     set_address(env, r1, dest);
1222     set_address(env, r3, src);
1223 
1224     return cc;
1225 }
1226 
1227 /* compare logical long helper */
1228 static inline uint32_t do_clcl(CPUS390XState *env,
1229                                uint64_t *src1, uint64_t *src1len,
1230                                uint64_t *src3, uint64_t *src3len,
1231                                uint16_t pad, uint64_t limit,
1232                                int wordsize, uintptr_t ra)
1233 {
1234     uint64_t len = MAX(*src1len, *src3len);
1235     uint32_t cc = 0;
1236 
1237     check_alignment(env, *src1len | *src3len, wordsize, ra);
1238 
1239     if (!len) {
1240         return cc;
1241     }
1242 
1243     /* Lest we fail to service interrupts in a timely manner, limit the
1244        amount of work we're willing to do.  */
1245     if (len > limit) {
1246         len = limit;
1247         cc = 3;
1248     }
1249 
1250     for (; len; len -= wordsize) {
1251         uint16_t v1 = pad;
1252         uint16_t v3 = pad;
1253 
1254         if (*src1len) {
1255             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1256         }
1257         if (*src3len) {
1258             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1259         }
1260 
1261         if (v1 != v3) {
1262             cc = (v1 < v3) ? 1 : 2;
1263             break;
1264         }
1265 
1266         if (*src1len) {
1267             *src1 += wordsize;
1268             *src1len -= wordsize;
1269         }
1270         if (*src3len) {
1271             *src3 += wordsize;
1272             *src3len -= wordsize;
1273         }
1274     }
1275 
1276     return cc;
1277 }
1278 
1279 
1280 /* compare logical long */
1281 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1282 {
1283     uintptr_t ra = GETPC();
1284     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1285     uint64_t src1 = get_address(env, r1);
1286     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1287     uint64_t src3 = get_address(env, r2);
1288     uint8_t pad = env->regs[r2 + 1] >> 24;
1289     uint32_t cc;
1290 
1291     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1292 
1293     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1294     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1295     set_address(env, r1, src1);
1296     set_address(env, r2, src3);
1297 
1298     return cc;
1299 }
1300 
1301 /* compare logical long extended memcompare insn with padding */
1302 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1303                        uint32_t r3)
1304 {
1305     uintptr_t ra = GETPC();
1306     uint64_t src1len = get_length(env, r1 + 1);
1307     uint64_t src1 = get_address(env, r1);
1308     uint64_t src3len = get_length(env, r3 + 1);
1309     uint64_t src3 = get_address(env, r3);
1310     uint8_t pad = a2;
1311     uint32_t cc;
1312 
1313     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1314 
1315     set_length(env, r1 + 1, src1len);
1316     set_length(env, r3 + 1, src3len);
1317     set_address(env, r1, src1);
1318     set_address(env, r3, src3);
1319 
1320     return cc;
1321 }
1322 
1323 /* compare logical long unicode memcompare insn with padding */
1324 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1325                        uint32_t r3)
1326 {
1327     uintptr_t ra = GETPC();
1328     uint64_t src1len = get_length(env, r1 + 1);
1329     uint64_t src1 = get_address(env, r1);
1330     uint64_t src3len = get_length(env, r3 + 1);
1331     uint64_t src3 = get_address(env, r3);
1332     uint16_t pad = a2;
1333     uint32_t cc = 0;
1334 
1335     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1336 
1337     set_length(env, r1 + 1, src1len);
1338     set_length(env, r3 + 1, src3len);
1339     set_address(env, r1, src1);
1340     set_address(env, r3, src3);
1341 
1342     return cc;
1343 }
1344 
1345 /* checksum */
1346 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1347                     uint64_t src, uint64_t src_len)
1348 {
1349     uintptr_t ra = GETPC();
1350     uint64_t max_len, len;
1351     uint64_t cksm = (uint32_t)r1;
1352 
1353     /* Lest we fail to service interrupts in a timely manner, limit the
1354        amount of work we're willing to do.  For now, let's cap at 8k.  */
1355     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1356 
1357     /* Process full words as available.  */
1358     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1359         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1360     }
1361 
1362     switch (max_len - len) {
1363     case 1:
1364         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1365         len += 1;
1366         break;
1367     case 2:
1368         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1369         len += 2;
1370         break;
1371     case 3:
1372         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1373         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1374         len += 3;
1375         break;
1376     }
1377 
1378     /* Fold the carry from the checksum.  Note that we can see carry-out
1379        during folding more than once (but probably not more than twice).  */
1380     while (cksm > 0xffffffffull) {
1381         cksm = (uint32_t)cksm + (cksm >> 32);
1382     }
1383 
1384     /* Indicate whether or not we've processed everything.  */
1385     env->cc_op = (len == src_len ? 0 : 3);
1386 
1387     /* Return both cksm and processed length.  */
1388     return int128_make128(cksm, len);
1389 }
1390 
1391 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1392 {
1393     uintptr_t ra = GETPC();
1394     int len_dest = len >> 4;
1395     int len_src = len & 0xf;
1396     uint8_t b;
1397 
1398     dest += len_dest;
1399     src += len_src;
1400 
1401     /* last byte is special, it only flips the nibbles */
1402     b = cpu_ldub_data_ra(env, src, ra);
1403     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1404     src--;
1405     len_src--;
1406 
1407     /* now pack every value */
1408     while (len_dest > 0) {
1409         b = 0;
1410 
1411         if (len_src >= 0) {
1412             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1413             src--;
1414             len_src--;
1415         }
1416         if (len_src >= 0) {
1417             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1418             src--;
1419             len_src--;
1420         }
1421 
1422         len_dest--;
1423         dest--;
1424         cpu_stb_data_ra(env, dest, b, ra);
1425     }
1426 }
1427 
1428 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1429                            uint32_t srclen, int ssize, uintptr_t ra)
1430 {
1431     int i;
1432     /* The destination operand is always 16 bytes long.  */
1433     const int destlen = 16;
1434 
1435     /* The operands are processed from right to left.  */
1436     src += srclen - 1;
1437     dest += destlen - 1;
1438 
1439     for (i = 0; i < destlen; i++) {
1440         uint8_t b = 0;
1441 
1442         /* Start with a positive sign */
1443         if (i == 0) {
1444             b = 0xc;
1445         } else if (srclen > ssize) {
1446             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1447             src -= ssize;
1448             srclen -= ssize;
1449         }
1450 
1451         if (srclen > ssize) {
1452             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1453             src -= ssize;
1454             srclen -= ssize;
1455         }
1456 
1457         cpu_stb_data_ra(env, dest, b, ra);
1458         dest--;
1459     }
1460 }
1461 
1462 
1463 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1464                  uint32_t srclen)
1465 {
1466     do_pkau(env, dest, src, srclen, 1, GETPC());
1467 }
1468 
1469 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1470                  uint32_t srclen)
1471 {
1472     do_pkau(env, dest, src, srclen, 2, GETPC());
1473 }
1474 
1475 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1476                   uint64_t src)
1477 {
1478     uintptr_t ra = GETPC();
1479     int len_dest = len >> 4;
1480     int len_src = len & 0xf;
1481     uint8_t b;
1482     int second_nibble = 0;
1483 
1484     dest += len_dest;
1485     src += len_src;
1486 
1487     /* last byte is special, it only flips the nibbles */
1488     b = cpu_ldub_data_ra(env, src, ra);
1489     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1490     src--;
1491     len_src--;
1492 
1493     /* now pad every nibble with 0xf0 */
1494 
1495     while (len_dest > 0) {
1496         uint8_t cur_byte = 0;
1497 
1498         if (len_src > 0) {
1499             cur_byte = cpu_ldub_data_ra(env, src, ra);
1500         }
1501 
1502         len_dest--;
1503         dest--;
1504 
1505         /* only advance one nibble at a time */
1506         if (second_nibble) {
1507             cur_byte >>= 4;
1508             len_src--;
1509             src--;
1510         }
1511         second_nibble = !second_nibble;
1512 
1513         /* digit */
1514         cur_byte = (cur_byte & 0xf);
1515         /* zone bits */
1516         cur_byte |= 0xf0;
1517 
1518         cpu_stb_data_ra(env, dest, cur_byte, ra);
1519     }
1520 }
1521 
1522 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1523                                  uint32_t destlen, int dsize, uint64_t src,
1524                                  uintptr_t ra)
1525 {
1526     int i;
1527     uint32_t cc;
1528     uint8_t b;
1529     /* The source operand is always 16 bytes long.  */
1530     const int srclen = 16;
1531 
1532     /* The operands are processed from right to left.  */
1533     src += srclen - 1;
1534     dest += destlen - dsize;
1535 
1536     /* Check for the sign.  */
1537     b = cpu_ldub_data_ra(env, src, ra);
1538     src--;
1539     switch (b & 0xf) {
1540     case 0xa:
1541     case 0xc:
1542     case 0xe ... 0xf:
1543         cc = 0;  /* plus */
1544         break;
1545     case 0xb:
1546     case 0xd:
1547         cc = 1;  /* minus */
1548         break;
1549     default:
1550     case 0x0 ... 0x9:
1551         cc = 3;  /* invalid */
1552         break;
1553     }
1554 
1555     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1556     for (i = 0; i < destlen; i += dsize) {
1557         if (i == (31 * dsize)) {
1558             /* If length is 32/64 bytes, the leftmost byte is 0. */
1559             b = 0;
1560         } else if (i % (2 * dsize)) {
1561             b = cpu_ldub_data_ra(env, src, ra);
1562             src--;
1563         } else {
1564             b >>= 4;
1565         }
1566         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1567         dest -= dsize;
1568     }
1569 
1570     return cc;
1571 }
1572 
1573 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1574                        uint64_t src)
1575 {
1576     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1577 }
1578 
1579 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1580                        uint64_t src)
1581 {
1582     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1583 }
1584 
1585 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1586 {
1587     uintptr_t ra = GETPC();
1588     uint32_t cc = 0;
1589     int i;
1590 
1591     for (i = 0; i < destlen; i++) {
1592         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1593         /* digit */
1594         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1595 
1596         if (i == (destlen - 1)) {
1597             /* sign */
1598             cc |= (b & 0xf) < 0xa ? 1 : 0;
1599         } else {
1600             /* digit */
1601             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1602         }
1603     }
1604 
1605     return cc;
1606 }
1607 
1608 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1609                              uint64_t trans, uintptr_t ra)
1610 {
1611     uint32_t i;
1612 
1613     for (i = 0; i <= len; i++) {
1614         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1615         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1616         cpu_stb_data_ra(env, array + i, new_byte, ra);
1617     }
1618 
1619     return env->cc_op;
1620 }
1621 
1622 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1623                 uint64_t trans)
1624 {
1625     do_helper_tr(env, len, array, trans, GETPC());
1626 }
1627 
1628 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1629                    uint64_t len, uint64_t trans)
1630 {
1631     uintptr_t ra = GETPC();
1632     uint8_t end = env->regs[0] & 0xff;
1633     uint64_t l = len;
1634     uint64_t i;
1635     uint32_t cc = 0;
1636 
1637     if (!(env->psw.mask & PSW_MASK_64)) {
1638         array &= 0x7fffffff;
1639         l = (uint32_t)l;
1640     }
1641 
1642     /* Lest we fail to service interrupts in a timely manner, limit the
1643        amount of work we're willing to do.  For now, let's cap at 8k.  */
1644     if (l > 0x2000) {
1645         l = 0x2000;
1646         cc = 3;
1647     }
1648 
1649     for (i = 0; i < l; i++) {
1650         uint8_t byte, new_byte;
1651 
1652         byte = cpu_ldub_data_ra(env, array + i, ra);
1653 
1654         if (byte == end) {
1655             cc = 1;
1656             break;
1657         }
1658 
1659         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1660         cpu_stb_data_ra(env, array + i, new_byte, ra);
1661     }
1662 
1663     env->cc_op = cc;
1664     return int128_make128(len - i, array + i);
1665 }
1666 
1667 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1668                                      uint64_t array, uint64_t trans,
1669                                      int inc, uintptr_t ra)
1670 {
1671     int i;
1672 
1673     for (i = 0; i <= len; i++) {
1674         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1675         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1676 
1677         if (sbyte != 0) {
1678             set_address(env, 1, array + i * inc);
1679             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1680             return (i == len) ? 2 : 1;
1681         }
1682     }
1683 
1684     return 0;
1685 }
1686 
1687 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1688                                   uint64_t array, uint64_t trans,
1689                                   uintptr_t ra)
1690 {
1691     return do_helper_trt(env, len, array, trans, 1, ra);
1692 }
1693 
1694 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1695                      uint64_t trans)
1696 {
1697     return do_helper_trt(env, len, array, trans, 1, GETPC());
1698 }
1699 
1700 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1701                                    uint64_t array, uint64_t trans,
1702                                    uintptr_t ra)
1703 {
1704     return do_helper_trt(env, len, array, trans, -1, ra);
1705 }
1706 
1707 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1708                       uint64_t trans)
1709 {
1710     return do_helper_trt(env, len, array, trans, -1, GETPC());
1711 }
1712 
1713 /* Translate one/two to one/two */
1714 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1715                       uint32_t tst, uint32_t sizes)
1716 {
1717     uintptr_t ra = GETPC();
1718     int dsize = (sizes & 1) ? 1 : 2;
1719     int ssize = (sizes & 2) ? 1 : 2;
1720     uint64_t tbl = get_address(env, 1);
1721     uint64_t dst = get_address(env, r1);
1722     uint64_t len = get_length(env, r1 + 1);
1723     uint64_t src = get_address(env, r2);
1724     uint32_t cc = 3;
1725     int i;
1726 
1727     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1728        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1729        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1730     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1731         tbl &= -4096;
1732     } else {
1733         tbl &= -8;
1734     }
1735 
1736     check_alignment(env, len, ssize, ra);
1737 
1738     /* Lest we fail to service interrupts in a timely manner, */
1739     /* limit the amount of work we're willing to do.   */
1740     for (i = 0; i < 0x2000; i++) {
1741         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1742         uint64_t tble = tbl + (sval * dsize);
1743         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1744         if (dval == tst) {
1745             cc = 1;
1746             break;
1747         }
1748         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1749 
1750         len -= ssize;
1751         src += ssize;
1752         dst += dsize;
1753 
1754         if (len == 0) {
1755             cc = 0;
1756             break;
1757         }
1758     }
1759 
1760     set_address(env, r1, dst);
1761     set_length(env, r1 + 1, len);
1762     set_address(env, r2, src);
1763 
1764     return cc;
1765 }
1766 
1767 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1768                         uint64_t a2, bool parallel)
1769 {
1770     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1771     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1772     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1773     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1774     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1775     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1776     uintptr_t ra = GETPC();
1777     uint32_t fc = extract32(env->regs[0], 0, 8);
1778     uint32_t sc = extract32(env->regs[0], 8, 8);
1779     uint64_t pl = get_address(env, 1) & -16;
1780     uint64_t svh, svl;
1781     uint32_t cc;
1782 
1783     /* Sanity check the function code and storage characteristic.  */
1784     if (fc > 1 || sc > 3) {
1785         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1786             goto spec_exception;
1787         }
1788         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1789             goto spec_exception;
1790         }
1791     }
1792 
1793     /* Sanity check the alignments.  */
1794     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1795         goto spec_exception;
1796     }
1797 
1798     /* Sanity check writability of the store address.  */
1799     probe_write(env, a2, 1 << sc, mem_idx, ra);
1800 
1801     /*
1802      * Note that the compare-and-swap is atomic, and the store is atomic,
1803      * but the complete operation is not.  Therefore we do not need to
1804      * assert serial context in order to implement this.  That said,
1805      * restart early if we can't support either operation that is supposed
1806      * to be atomic.
1807      */
1808     if (parallel) {
1809         uint32_t max = 2;
1810 #ifdef CONFIG_ATOMIC64
1811         max = 3;
1812 #endif
1813         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1814             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1815             cpu_loop_exit_atomic(env_cpu(env), ra);
1816         }
1817     }
1818 
1819     /*
1820      * All loads happen before all stores.  For simplicity, load the entire
1821      * store value area from the parameter list.
1822      */
1823     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1824     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1825 
1826     switch (fc) {
1827     case 0:
1828         {
1829             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1830             uint32_t cv = env->regs[r3];
1831             uint32_t ov;
1832 
1833             if (parallel) {
1834                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1835             } else {
1836                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1837                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1838             }
1839             cc = (ov != cv);
1840             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1841         }
1842         break;
1843 
1844     case 1:
1845         {
1846             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1847             uint64_t cv = env->regs[r3];
1848             uint64_t ov;
1849 
1850             if (parallel) {
1851 #ifdef CONFIG_ATOMIC64
1852                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1853 #else
1854                 /* Note that we asserted !parallel above.  */
1855                 g_assert_not_reached();
1856 #endif
1857             } else {
1858                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1859                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1860             }
1861             cc = (ov != cv);
1862             env->regs[r3] = ov;
1863         }
1864         break;
1865 
1866     case 2:
1867         {
1868             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1869             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1870             Int128 ov;
1871 
1872             if (!parallel) {
1873                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1874                 cc = !int128_eq(ov, cv);
1875                 if (cc) {
1876                     nv = ov;
1877                 }
1878                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1879             } else if (HAVE_CMPXCHG128) {
1880                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1881                 cc = !int128_eq(ov, cv);
1882             } else {
1883                 /* Note that we asserted !parallel above.  */
1884                 g_assert_not_reached();
1885             }
1886 
1887             env->regs[r3 + 0] = int128_gethi(ov);
1888             env->regs[r3 + 1] = int128_getlo(ov);
1889         }
1890         break;
1891 
1892     default:
1893         g_assert_not_reached();
1894     }
1895 
1896     /* Store only if the comparison succeeded.  Note that above we use a pair
1897        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1898        from the most-significant bits of svh.  */
1899     if (cc == 0) {
1900         switch (sc) {
1901         case 0:
1902             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1903             break;
1904         case 1:
1905             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1906             break;
1907         case 2:
1908             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1909             break;
1910         case 3:
1911             cpu_stq_mmu(env, a2, svh, oi8, ra);
1912             break;
1913         case 4:
1914             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1915             break;
1916         default:
1917             g_assert_not_reached();
1918         }
1919     }
1920 
1921     return cc;
1922 
1923  spec_exception:
1924     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1925 }
1926 
1927 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1928 {
1929     return do_csst(env, r3, a1, a2, false);
1930 }
1931 
1932 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1933                                uint64_t a2)
1934 {
1935     return do_csst(env, r3, a1, a2, true);
1936 }
1937 
1938 #if !defined(CONFIG_USER_ONLY)
1939 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1940 {
1941     uintptr_t ra = GETPC();
1942     bool PERchanged = false;
1943     uint64_t src = a2;
1944     uint32_t i;
1945 
1946     if (src & 0x7) {
1947         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1948     }
1949 
1950     for (i = r1;; i = (i + 1) % 16) {
1951         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1952         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1953             PERchanged = true;
1954         }
1955         env->cregs[i] = val;
1956         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1957                    i, src, val);
1958         src += sizeof(uint64_t);
1959 
1960         if (i == r3) {
1961             break;
1962         }
1963     }
1964 
1965     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1966         s390_cpu_recompute_watchpoints(env_cpu(env));
1967     }
1968 
1969     tlb_flush(env_cpu(env));
1970 }
1971 
1972 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1973 {
1974     uintptr_t ra = GETPC();
1975     bool PERchanged = false;
1976     uint64_t src = a2;
1977     uint32_t i;
1978 
1979     if (src & 0x3) {
1980         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1981     }
1982 
1983     for (i = r1;; i = (i + 1) % 16) {
1984         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1985         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1986             PERchanged = true;
1987         }
1988         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1989         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1990         src += sizeof(uint32_t);
1991 
1992         if (i == r3) {
1993             break;
1994         }
1995     }
1996 
1997     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1998         s390_cpu_recompute_watchpoints(env_cpu(env));
1999     }
2000 
2001     tlb_flush(env_cpu(env));
2002 }
2003 
2004 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2005 {
2006     uintptr_t ra = GETPC();
2007     uint64_t dest = a2;
2008     uint32_t i;
2009 
2010     if (dest & 0x7) {
2011         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2012     }
2013 
2014     for (i = r1;; i = (i + 1) % 16) {
2015         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2016         dest += sizeof(uint64_t);
2017 
2018         if (i == r3) {
2019             break;
2020         }
2021     }
2022 }
2023 
2024 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2025 {
2026     uintptr_t ra = GETPC();
2027     uint64_t dest = a2;
2028     uint32_t i;
2029 
2030     if (dest & 0x3) {
2031         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2032     }
2033 
2034     for (i = r1;; i = (i + 1) % 16) {
2035         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2036         dest += sizeof(uint32_t);
2037 
2038         if (i == r3) {
2039             break;
2040         }
2041     }
2042 }
2043 
2044 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2045 {
2046     uintptr_t ra = GETPC();
2047     int i;
2048 
2049     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2050 
2051     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2052         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2053     }
2054 
2055     return 0;
2056 }
2057 
2058 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2059 {
2060     S390CPU *cpu = env_archcpu(env);
2061     CPUState *cs = env_cpu(env);
2062 
2063     /*
2064      * TODO: we currently don't handle all access protection types
2065      * (including access-list and key-controlled) as well as AR mode.
2066      */
2067     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2068         /* Fetching permitted; storing permitted */
2069         return 0;
2070     }
2071 
2072     if (env->int_pgm_code == PGM_PROTECTION) {
2073         /* retry if reading is possible */
2074         cs->exception_index = -1;
2075         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2076             /* Fetching permitted; storing not permitted */
2077             return 1;
2078         }
2079     }
2080 
2081     switch (env->int_pgm_code) {
2082     case PGM_PROTECTION:
2083         /* Fetching not permitted; storing not permitted */
2084         cs->exception_index = -1;
2085         return 2;
2086     case PGM_ADDRESSING:
2087     case PGM_TRANS_SPEC:
2088         /* exceptions forwarded to the guest */
2089         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2090         return 0;
2091     }
2092 
2093     /* Translation not available */
2094     cs->exception_index = -1;
2095     return 3;
2096 }
2097 
2098 /* insert storage key extended */
2099 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2100 {
2101     static S390SKeysState *ss;
2102     static S390SKeysClass *skeyclass;
2103     uint64_t addr = wrap_address(env, r2);
2104     uint8_t key;
2105     int rc;
2106 
2107     addr = mmu_real2abs(env, addr);
2108     if (!mmu_absolute_addr_valid(addr, false)) {
2109         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2110     }
2111 
2112     if (unlikely(!ss)) {
2113         ss = s390_get_skeys_device();
2114         skeyclass = S390_SKEYS_GET_CLASS(ss);
2115         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2116             tlb_flush_all_cpus_synced(env_cpu(env));
2117         }
2118     }
2119 
2120     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2121     if (rc) {
2122         return 0;
2123     }
2124     return key;
2125 }
2126 
2127 /* set storage key extended */
2128 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2129 {
2130     static S390SKeysState *ss;
2131     static S390SKeysClass *skeyclass;
2132     uint64_t addr = wrap_address(env, r2);
2133     uint8_t key;
2134 
2135     addr = mmu_real2abs(env, addr);
2136     if (!mmu_absolute_addr_valid(addr, false)) {
2137         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2138     }
2139 
2140     if (unlikely(!ss)) {
2141         ss = s390_get_skeys_device();
2142         skeyclass = S390_SKEYS_GET_CLASS(ss);
2143         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2144             tlb_flush_all_cpus_synced(env_cpu(env));
2145         }
2146     }
2147 
2148     key = r1 & 0xfe;
2149     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2150    /*
2151     * As we can only flush by virtual address and not all the entries
2152     * that point to a physical address we have to flush the whole TLB.
2153     */
2154     tlb_flush_all_cpus_synced(env_cpu(env));
2155 }
2156 
2157 /* reset reference bit extended */
2158 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2159 {
2160     uint64_t addr = wrap_address(env, r2);
2161     static S390SKeysState *ss;
2162     static S390SKeysClass *skeyclass;
2163     uint8_t re, key;
2164     int rc;
2165 
2166     addr = mmu_real2abs(env, addr);
2167     if (!mmu_absolute_addr_valid(addr, false)) {
2168         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2169     }
2170 
2171     if (unlikely(!ss)) {
2172         ss = s390_get_skeys_device();
2173         skeyclass = S390_SKEYS_GET_CLASS(ss);
2174         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2175             tlb_flush_all_cpus_synced(env_cpu(env));
2176         }
2177     }
2178 
2179     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2180     if (rc) {
2181         return 0;
2182     }
2183 
2184     re = key & (SK_R | SK_C);
2185     key &= ~SK_R;
2186 
2187     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2188     if (rc) {
2189         return 0;
2190     }
2191    /*
2192     * As we can only flush by virtual address and not all the entries
2193     * that point to a physical address we have to flush the whole TLB.
2194     */
2195     tlb_flush_all_cpus_synced(env_cpu(env));
2196 
2197     /*
2198      * cc
2199      *
2200      * 0  Reference bit zero; change bit zero
2201      * 1  Reference bit zero; change bit one
2202      * 2  Reference bit one; change bit zero
2203      * 3  Reference bit one; change bit one
2204      */
2205 
2206     return re >> 1;
2207 }
2208 
2209 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2210                       uint64_t key)
2211 {
2212     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2213     S390Access srca, desta;
2214     uintptr_t ra = GETPC();
2215     int cc = 0;
2216 
2217     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2218                __func__, l, a1, a2);
2219 
2220     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2221         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2222         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2223     }
2224 
2225     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2226         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2227     }
2228 
2229     l = wrap_length32(env, l);
2230     if (l > 256) {
2231         /* max 256 */
2232         l = 256;
2233         cc = 3;
2234     } else if (!l) {
2235         return cc;
2236     }
2237 
2238     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2239     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2240     access_memmove(env, &desta, &srca, ra);
2241     return cc;
2242 }
2243 
2244 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2245                       uint64_t key)
2246 {
2247     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2248     S390Access srca, desta;
2249     uintptr_t ra = GETPC();
2250     int cc = 0;
2251 
2252     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2253                __func__, l, a1, a2);
2254 
2255     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2256         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2257         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2258     }
2259 
2260     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2261         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2262     }
2263 
2264     l = wrap_length32(env, l);
2265     if (l > 256) {
2266         /* max 256 */
2267         l = 256;
2268         cc = 3;
2269     } else if (!l) {
2270         return cc;
2271     }
2272     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2273     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2274     access_memmove(env, &desta, &srca, ra);
2275     return cc;
2276 }
2277 
2278 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2279 {
2280     CPUState *cs = env_cpu(env);
2281     const uintptr_t ra = GETPC();
2282     uint64_t table, entry, raddr;
2283     uint16_t entries, i, index = 0;
2284 
2285     if (r2 & 0xff000) {
2286         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2287     }
2288 
2289     if (!(r2 & 0x800)) {
2290         /* invalidation-and-clearing operation */
2291         table = r1 & ASCE_ORIGIN;
2292         entries = (r2 & 0x7ff) + 1;
2293 
2294         switch (r1 & ASCE_TYPE_MASK) {
2295         case ASCE_TYPE_REGION1:
2296             index = (r2 >> 53) & 0x7ff;
2297             break;
2298         case ASCE_TYPE_REGION2:
2299             index = (r2 >> 42) & 0x7ff;
2300             break;
2301         case ASCE_TYPE_REGION3:
2302             index = (r2 >> 31) & 0x7ff;
2303             break;
2304         case ASCE_TYPE_SEGMENT:
2305             index = (r2 >> 20) & 0x7ff;
2306             break;
2307         }
2308         for (i = 0; i < entries; i++) {
2309             /* addresses are not wrapped in 24/31bit mode but table index is */
2310             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2311             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2312             if (!(entry & REGION_ENTRY_I)) {
2313                 /* we are allowed to not store if already invalid */
2314                 entry |= REGION_ENTRY_I;
2315                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2316             }
2317         }
2318     }
2319 
2320     /* We simply flush the complete tlb, therefore we can ignore r3. */
2321     if (m4 & 1) {
2322         tlb_flush(cs);
2323     } else {
2324         tlb_flush_all_cpus_synced(cs);
2325     }
2326 }
2327 
2328 /* invalidate pte */
2329 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2330                   uint32_t m4)
2331 {
2332     CPUState *cs = env_cpu(env);
2333     const uintptr_t ra = GETPC();
2334     uint64_t page = vaddr & TARGET_PAGE_MASK;
2335     uint64_t pte_addr, pte;
2336 
2337     /* Compute the page table entry address */
2338     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2339     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2340 
2341     /* Mark the page table entry as invalid */
2342     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2343     pte |= PAGE_ENTRY_I;
2344     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2345 
2346     /* XXX we exploit the fact that Linux passes the exact virtual
2347        address here - it's not obliged to! */
2348     if (m4 & 1) {
2349         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2350             tlb_flush_page(cs, page);
2351             /* XXX 31-bit hack */
2352             tlb_flush_page(cs, page ^ 0x80000000);
2353         } else {
2354             /* looks like we don't have a valid virtual address */
2355             tlb_flush(cs);
2356         }
2357     } else {
2358         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2359             tlb_flush_page_all_cpus_synced(cs, page);
2360             /* XXX 31-bit hack */
2361             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2362         } else {
2363             /* looks like we don't have a valid virtual address */
2364             tlb_flush_all_cpus_synced(cs);
2365         }
2366     }
2367 }
2368 
2369 /* flush local tlb */
2370 void HELPER(ptlb)(CPUS390XState *env)
2371 {
2372     tlb_flush(env_cpu(env));
2373 }
2374 
2375 /* flush global tlb */
2376 void HELPER(purge)(CPUS390XState *env)
2377 {
2378     tlb_flush_all_cpus_synced(env_cpu(env));
2379 }
2380 
2381 /* load real address */
2382 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2383 {
2384     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2385     uint64_t ret, tec;
2386     int flags, exc, cc;
2387 
2388     /* XXX incomplete - has more corner cases */
2389     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2390         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2391     }
2392 
2393     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2394     if (exc) {
2395         cc = 3;
2396         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2397     } else {
2398         cc = 0;
2399         ret |= addr & ~TARGET_PAGE_MASK;
2400     }
2401 
2402     env->cc_op = cc;
2403     return ret;
2404 }
2405 #endif
2406 
2407 /* Execute instruction.  This instruction executes an insn modified with
2408    the contents of r1.  It does not change the executed instruction in memory;
2409    it does not change the program counter.
2410 
2411    Perform this by recording the modified instruction in env->ex_value.
2412    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2413 */
2414 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2415 {
2416     uint64_t insn;
2417     uint8_t opc;
2418 
2419     /* EXECUTE targets must be at even addresses.  */
2420     if (addr & 1) {
2421         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2422     }
2423 
2424     insn = cpu_lduw_code(env, addr);
2425     opc = insn >> 8;
2426 
2427     /* Or in the contents of R1[56:63].  */
2428     insn |= r1 & 0xff;
2429 
2430     /* Load the rest of the instruction.  */
2431     insn <<= 48;
2432     switch (get_ilen(opc)) {
2433     case 2:
2434         break;
2435     case 4:
2436         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2437         break;
2438     case 6:
2439         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2440         break;
2441     default:
2442         g_assert_not_reached();
2443     }
2444 
2445     /* The very most common cases can be sped up by avoiding a new TB.  */
2446     if ((opc & 0xf0) == 0xd0) {
2447         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2448                                       uint64_t, uintptr_t);
2449         static const dx_helper dx[16] = {
2450             [0x0] = do_helper_trt_bkwd,
2451             [0x2] = do_helper_mvc,
2452             [0x4] = do_helper_nc,
2453             [0x5] = do_helper_clc,
2454             [0x6] = do_helper_oc,
2455             [0x7] = do_helper_xc,
2456             [0xc] = do_helper_tr,
2457             [0xd] = do_helper_trt_fwd,
2458         };
2459         dx_helper helper = dx[opc & 0xf];
2460 
2461         if (helper) {
2462             uint32_t l = extract64(insn, 48, 8);
2463             uint32_t b1 = extract64(insn, 44, 4);
2464             uint32_t d1 = extract64(insn, 32, 12);
2465             uint32_t b2 = extract64(insn, 28, 4);
2466             uint32_t d2 = extract64(insn, 16, 12);
2467             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2468             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2469 
2470             env->cc_op = helper(env, l, a1, a2, 0);
2471             env->psw.addr += ilen;
2472             return;
2473         }
2474     } else if (opc == 0x0a) {
2475         env->int_svc_code = extract64(insn, 48, 8);
2476         env->int_svc_ilen = ilen;
2477         helper_exception(env, EXCP_SVC);
2478         g_assert_not_reached();
2479     }
2480 
2481     /* Record the insn we want to execute as well as the ilen to use
2482        during the execution of the target insn.  This will also ensure
2483        that ex_value is non-zero, which flags that we are in a state
2484        that requires such execution.  */
2485     env->ex_value = insn | ilen;
2486     env->ex_target = addr;
2487 }
2488 
2489 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2490                        uint64_t len)
2491 {
2492     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2493     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2494     const uint64_t r0 = env->regs[0];
2495     const uintptr_t ra = GETPC();
2496     uint8_t dest_key, dest_as, dest_k, dest_a;
2497     uint8_t src_key, src_as, src_k, src_a;
2498     uint64_t val;
2499     int cc = 0;
2500 
2501     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2502                __func__, dest, src, len);
2503 
2504     if (!(env->psw.mask & PSW_MASK_DAT)) {
2505         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2506     }
2507 
2508     /* OAC (operand access control) for the first operand -> dest */
2509     val = (r0 & 0xffff0000ULL) >> 16;
2510     dest_key = (val >> 12) & 0xf;
2511     dest_as = (val >> 6) & 0x3;
2512     dest_k = (val >> 1) & 0x1;
2513     dest_a = val & 0x1;
2514 
2515     /* OAC (operand access control) for the second operand -> src */
2516     val = (r0 & 0x0000ffffULL);
2517     src_key = (val >> 12) & 0xf;
2518     src_as = (val >> 6) & 0x3;
2519     src_k = (val >> 1) & 0x1;
2520     src_a = val & 0x1;
2521 
2522     if (!dest_k) {
2523         dest_key = psw_key;
2524     }
2525     if (!src_k) {
2526         src_key = psw_key;
2527     }
2528     if (!dest_a) {
2529         dest_as = psw_as;
2530     }
2531     if (!src_a) {
2532         src_as = psw_as;
2533     }
2534 
2535     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2536         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2537     }
2538     if (!(env->cregs[0] & CR0_SECONDARY) &&
2539         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2540         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2541     }
2542     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2543         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2544     }
2545 
2546     len = wrap_length32(env, len);
2547     if (len > 4096) {
2548         cc = 3;
2549         len = 4096;
2550     }
2551 
2552     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2553     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2554         (env->psw.mask & PSW_MASK_PSTATE)) {
2555         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2556                       __func__);
2557         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2558     }
2559 
2560     /* FIXME: Access using correct keys and AR-mode */
2561     if (len) {
2562         S390Access srca, desta;
2563 
2564         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2565                        mmu_idx_from_as(src_as), ra);
2566         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2567                        mmu_idx_from_as(dest_as), ra);
2568 
2569         access_memmove(env, &desta, &srca, ra);
2570     }
2571 
2572     return cc;
2573 }
2574 
2575 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2576    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2577    value >= 0 indicates failure, and the CC value to be returned.  */
2578 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2579                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2580                                  uint32_t *ochar, uint32_t *olen);
2581 
2582 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2583    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2584    indicates failure, and the CC value to be returned.  */
2585 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2586                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2587                                  uint32_t *olen);
2588 
2589 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2590                        bool enh_check, uintptr_t ra,
2591                        uint32_t *ochar, uint32_t *olen)
2592 {
2593     uint8_t s0, s1, s2, s3;
2594     uint32_t c, l;
2595 
2596     if (ilen < 1) {
2597         return 0;
2598     }
2599     s0 = cpu_ldub_data_ra(env, addr, ra);
2600     if (s0 <= 0x7f) {
2601         /* one byte character */
2602         l = 1;
2603         c = s0;
2604     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2605         /* invalid character */
2606         return 2;
2607     } else if (s0 <= 0xdf) {
2608         /* two byte character */
2609         l = 2;
2610         if (ilen < 2) {
2611             return 0;
2612         }
2613         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2614         c = s0 & 0x1f;
2615         c = (c << 6) | (s1 & 0x3f);
2616         if (enh_check && (s1 & 0xc0) != 0x80) {
2617             return 2;
2618         }
2619     } else if (s0 <= 0xef) {
2620         /* three byte character */
2621         l = 3;
2622         if (ilen < 3) {
2623             return 0;
2624         }
2625         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2626         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2627         c = s0 & 0x0f;
2628         c = (c << 6) | (s1 & 0x3f);
2629         c = (c << 6) | (s2 & 0x3f);
2630         /* Fold the byte-by-byte range descriptions in the PoO into
2631            tests against the complete value.  It disallows encodings
2632            that could be smaller, and the UTF-16 surrogates.  */
2633         if (enh_check
2634             && ((s1 & 0xc0) != 0x80
2635                 || (s2 & 0xc0) != 0x80
2636                 || c < 0x1000
2637                 || (c >= 0xd800 && c <= 0xdfff))) {
2638             return 2;
2639         }
2640     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2641         /* four byte character */
2642         l = 4;
2643         if (ilen < 4) {
2644             return 0;
2645         }
2646         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2647         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2648         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2649         c = s0 & 0x07;
2650         c = (c << 6) | (s1 & 0x3f);
2651         c = (c << 6) | (s2 & 0x3f);
2652         c = (c << 6) | (s3 & 0x3f);
2653         /* See above.  */
2654         if (enh_check
2655             && ((s1 & 0xc0) != 0x80
2656                 || (s2 & 0xc0) != 0x80
2657                 || (s3 & 0xc0) != 0x80
2658                 || c < 0x010000
2659                 || c > 0x10ffff)) {
2660             return 2;
2661         }
2662     } else {
2663         /* invalid character */
2664         return 2;
2665     }
2666 
2667     *ochar = c;
2668     *olen = l;
2669     return -1;
2670 }
2671 
2672 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2673                         bool enh_check, uintptr_t ra,
2674                         uint32_t *ochar, uint32_t *olen)
2675 {
2676     uint16_t s0, s1;
2677     uint32_t c, l;
2678 
2679     if (ilen < 2) {
2680         return 0;
2681     }
2682     s0 = cpu_lduw_data_ra(env, addr, ra);
2683     if ((s0 & 0xfc00) != 0xd800) {
2684         /* one word character */
2685         l = 2;
2686         c = s0;
2687     } else {
2688         /* two word character */
2689         l = 4;
2690         if (ilen < 4) {
2691             return 0;
2692         }
2693         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2694         c = extract32(s0, 6, 4) + 1;
2695         c = (c << 6) | (s0 & 0x3f);
2696         c = (c << 10) | (s1 & 0x3ff);
2697         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2698             /* invalid surrogate character */
2699             return 2;
2700         }
2701     }
2702 
2703     *ochar = c;
2704     *olen = l;
2705     return -1;
2706 }
2707 
2708 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2709                         bool enh_check, uintptr_t ra,
2710                         uint32_t *ochar, uint32_t *olen)
2711 {
2712     uint32_t c;
2713 
2714     if (ilen < 4) {
2715         return 0;
2716     }
2717     c = cpu_ldl_data_ra(env, addr, ra);
2718     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2719         /* invalid unicode character */
2720         return 2;
2721     }
2722 
2723     *ochar = c;
2724     *olen = 4;
2725     return -1;
2726 }
2727 
2728 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2729                        uintptr_t ra, uint32_t c, uint32_t *olen)
2730 {
2731     uint8_t d[4];
2732     uint32_t l, i;
2733 
2734     if (c <= 0x7f) {
2735         /* one byte character */
2736         l = 1;
2737         d[0] = c;
2738     } else if (c <= 0x7ff) {
2739         /* two byte character */
2740         l = 2;
2741         d[1] = 0x80 | extract32(c, 0, 6);
2742         d[0] = 0xc0 | extract32(c, 6, 5);
2743     } else if (c <= 0xffff) {
2744         /* three byte character */
2745         l = 3;
2746         d[2] = 0x80 | extract32(c, 0, 6);
2747         d[1] = 0x80 | extract32(c, 6, 6);
2748         d[0] = 0xe0 | extract32(c, 12, 4);
2749     } else {
2750         /* four byte character */
2751         l = 4;
2752         d[3] = 0x80 | extract32(c, 0, 6);
2753         d[2] = 0x80 | extract32(c, 6, 6);
2754         d[1] = 0x80 | extract32(c, 12, 6);
2755         d[0] = 0xf0 | extract32(c, 18, 3);
2756     }
2757 
2758     if (ilen < l) {
2759         return 1;
2760     }
2761     for (i = 0; i < l; ++i) {
2762         cpu_stb_data_ra(env, addr + i, d[i], ra);
2763     }
2764 
2765     *olen = l;
2766     return -1;
2767 }
2768 
2769 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2770                         uintptr_t ra, uint32_t c, uint32_t *olen)
2771 {
2772     uint16_t d0, d1;
2773 
2774     if (c <= 0xffff) {
2775         /* one word character */
2776         if (ilen < 2) {
2777             return 1;
2778         }
2779         cpu_stw_data_ra(env, addr, c, ra);
2780         *olen = 2;
2781     } else {
2782         /* two word character */
2783         if (ilen < 4) {
2784             return 1;
2785         }
2786         d1 = 0xdc00 | extract32(c, 0, 10);
2787         d0 = 0xd800 | extract32(c, 10, 6);
2788         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2789         cpu_stw_data_ra(env, addr + 0, d0, ra);
2790         cpu_stw_data_ra(env, addr + 2, d1, ra);
2791         *olen = 4;
2792     }
2793 
2794     return -1;
2795 }
2796 
2797 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2798                         uintptr_t ra, uint32_t c, uint32_t *olen)
2799 {
2800     if (ilen < 4) {
2801         return 1;
2802     }
2803     cpu_stl_data_ra(env, addr, c, ra);
2804     *olen = 4;
2805     return -1;
2806 }
2807 
2808 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2809                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2810                                        decode_unicode_fn decode,
2811                                        encode_unicode_fn encode)
2812 {
2813     uint64_t dst = get_address(env, r1);
2814     uint64_t dlen = get_length(env, r1 + 1);
2815     uint64_t src = get_address(env, r2);
2816     uint64_t slen = get_length(env, r2 + 1);
2817     bool enh_check = m3 & 1;
2818     int cc, i;
2819 
2820     /* Lest we fail to service interrupts in a timely manner, limit the
2821        amount of work we're willing to do.  For now, let's cap at 256.  */
2822     for (i = 0; i < 256; ++i) {
2823         uint32_t c, ilen, olen;
2824 
2825         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2826         if (unlikely(cc >= 0)) {
2827             break;
2828         }
2829         cc = encode(env, dst, dlen, ra, c, &olen);
2830         if (unlikely(cc >= 0)) {
2831             break;
2832         }
2833 
2834         src += ilen;
2835         slen -= ilen;
2836         dst += olen;
2837         dlen -= olen;
2838         cc = 3;
2839     }
2840 
2841     set_address(env, r1, dst);
2842     set_length(env, r1 + 1, dlen);
2843     set_address(env, r2, src);
2844     set_length(env, r2 + 1, slen);
2845 
2846     return cc;
2847 }
2848 
2849 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2850 {
2851     return convert_unicode(env, r1, r2, m3, GETPC(),
2852                            decode_utf8, encode_utf16);
2853 }
2854 
2855 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2856 {
2857     return convert_unicode(env, r1, r2, m3, GETPC(),
2858                            decode_utf8, encode_utf32);
2859 }
2860 
2861 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2862 {
2863     return convert_unicode(env, r1, r2, m3, GETPC(),
2864                            decode_utf16, encode_utf8);
2865 }
2866 
2867 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2868 {
2869     return convert_unicode(env, r1, r2, m3, GETPC(),
2870                            decode_utf16, encode_utf32);
2871 }
2872 
2873 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2874 {
2875     return convert_unicode(env, r1, r2, m3, GETPC(),
2876                            decode_utf32, encode_utf8);
2877 }
2878 
2879 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2880 {
2881     return convert_unicode(env, r1, r2, m3, GETPC(),
2882                            decode_utf32, encode_utf16);
2883 }
2884 
2885 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2886                         uintptr_t ra)
2887 {
2888     const int mmu_idx = s390x_env_mmu_index(env, false);
2889 
2890     /* test the actual access, not just any access to the page due to LAP */
2891     while (len) {
2892         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2893         const uint64_t curlen = MIN(pagelen, len);
2894 
2895         probe_write(env, addr, curlen, mmu_idx, ra);
2896         addr = wrap_address(env, addr + curlen);
2897         len -= curlen;
2898     }
2899 }
2900 
2901 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2902 {
2903     probe_write_access(env, addr, len, GETPC());
2904 }
2905