xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 96b1416fda52cb37eaa6d2316d9946b1078c6210)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x8000 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     void *haddr1;
118     void *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers.
126      */
127     int mmu_idx;
128 } S390Access;
129 
130 /*
131  * With nonfault=1, return the PGM_ exception that would have been injected
132  * into the guest; return 0 if no exception was detected.
133  *
134  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
135  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
136  */
137 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
138                                     int size, MMUAccessType access_type,
139                                     int mmu_idx, bool nonfault,
140                                     void **phost, uintptr_t ra)
141 {
142     int flags = probe_access_flags(env, addr, access_type, mmu_idx,
143                                    nonfault, phost, ra);
144 
145     if (unlikely(flags & TLB_INVALID_MASK)) {
146         assert(!nonfault);
147 #ifdef CONFIG_USER_ONLY
148         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
149         env->__excp_addr = addr & TARGET_PAGE_MASK;
150         return (page_get_flags(addr) & PAGE_VALID
151                 ? PGM_PROTECTION : PGM_ADDRESSING);
152 #else
153         return env->tlb_fill_exc;
154 #endif
155     }
156 
157 #ifndef CONFIG_USER_ONLY
158     if (unlikely(flags & TLB_WATCHPOINT)) {
159         /* S390 does not presently use transaction attributes. */
160         cpu_check_watchpoint(env_cpu(env), addr, size,
161                              MEMTXATTRS_UNSPECIFIED,
162                              (access_type == MMU_DATA_STORE
163                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
164     }
165 #endif
166 
167     return 0;
168 }
169 
170 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
171                              bool nonfault, vaddr vaddr1, int size,
172                              MMUAccessType access_type,
173                              int mmu_idx, uintptr_t ra)
174 {
175     int size1, size2, exc;
176 
177     assert(size > 0 && size <= 4096);
178 
179     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
180     size2 = size - size1;
181 
182     memset(access, 0, sizeof(*access));
183     access->vaddr1 = vaddr1;
184     access->size1 = size1;
185     access->size2 = size2;
186     access->mmu_idx = mmu_idx;
187 
188     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
189                             &access->haddr1, ra);
190     if (unlikely(exc)) {
191         return exc;
192     }
193     if (unlikely(size2)) {
194         /* The access crosses page boundaries. */
195         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
196 
197         access->vaddr2 = vaddr2;
198         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
199                                 nonfault, &access->haddr2, ra);
200         if (unlikely(exc)) {
201             return exc;
202         }
203     }
204     return 0;
205 }
206 
207 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
208                                   vaddr vaddr, int size,
209                                   MMUAccessType access_type, int mmu_idx,
210                                   uintptr_t ra)
211 {
212     int exc = access_prepare_nf(ret, env, false, vaddr, size,
213                                 access_type, mmu_idx, ra);
214     assert(!exc);
215 }
216 
217 /* Helper to handle memset on a single page. */
218 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
219                              uint8_t byte, uint16_t size, int mmu_idx,
220                              uintptr_t ra)
221 {
222 #ifdef CONFIG_USER_ONLY
223     memset(haddr, byte, size);
224 #else
225     if (likely(haddr)) {
226         memset(haddr, byte, size);
227     } else {
228         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
229         for (int i = 0; i < size; i++) {
230             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
231         }
232     }
233 #endif
234 }
235 
236 static void access_memset(CPUS390XState *env, S390Access *desta,
237                           uint8_t byte, uintptr_t ra)
238 {
239 
240     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
241                      desta->mmu_idx, ra);
242     if (likely(!desta->size2)) {
243         return;
244     }
245     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
246                      desta->mmu_idx, ra);
247 }
248 
249 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr,
250                                   void *haddr, int offset,
251                                   int mmu_idx, uintptr_t ra)
252 {
253 #ifdef CONFIG_USER_ONLY
254     return ldub_p(haddr + offset);
255 #else
256     if (likely(haddr)) {
257         return ldub_p(haddr + offset);
258     } else {
259         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
260         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
261     }
262 #endif
263 }
264 
265 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
266                                int offset, uintptr_t ra)
267 {
268     if (offset < access->size1) {
269         return do_access_get_byte(env, access->vaddr1, access->haddr1,
270                                   offset, access->mmu_idx, ra);
271     }
272     return do_access_get_byte(env, access->vaddr2, access->haddr2,
273                               offset - access->size1, access->mmu_idx, ra);
274 }
275 
276 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, void *haddr,
277                                int offset, uint8_t byte, int mmu_idx,
278                                uintptr_t ra)
279 {
280 #ifdef CONFIG_USER_ONLY
281     stb_p(haddr + offset, byte);
282 #else
283 
284     if (likely(haddr)) {
285         stb_p(haddr + offset, byte);
286     } else {
287         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
288         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
289     }
290 #endif
291 }
292 
293 static void access_set_byte(CPUS390XState *env, S390Access *access,
294                             int offset, uint8_t byte, uintptr_t ra)
295 {
296     if (offset < access->size1) {
297         do_access_set_byte(env, access->vaddr1, access->haddr1, offset, byte,
298                            access->mmu_idx, ra);
299     } else {
300         do_access_set_byte(env, access->vaddr2, access->haddr2,
301                            offset - access->size1, byte, access->mmu_idx, ra);
302     }
303 }
304 
305 /*
306  * Move data with the same semantics as memmove() in case ranges don't overlap
307  * or src > dest. Undefined behavior on destructive overlaps.
308  */
309 static void access_memmove(CPUS390XState *env, S390Access *desta,
310                            S390Access *srca, uintptr_t ra)
311 {
312     int diff;
313 
314     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
315 
316     /* Fallback to slow access in case we don't have access to all host pages */
317     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
318                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
319         int i;
320 
321         for (i = 0; i < desta->size1 + desta->size2; i++) {
322             uint8_t byte = access_get_byte(env, srca, i, ra);
323 
324             access_set_byte(env, desta, i, byte, ra);
325         }
326         return;
327     }
328 
329     if (srca->size1 == desta->size1) {
330         memmove(desta->haddr1, srca->haddr1, srca->size1);
331         if (unlikely(srca->size2)) {
332             memmove(desta->haddr2, srca->haddr2, srca->size2);
333         }
334     } else if (srca->size1 < desta->size1) {
335         diff = desta->size1 - srca->size1;
336         memmove(desta->haddr1, srca->haddr1, srca->size1);
337         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
338         if (likely(desta->size2)) {
339             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
340         }
341     } else {
342         diff = srca->size1 - desta->size1;
343         memmove(desta->haddr1, srca->haddr1, desta->size1);
344         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
345         if (likely(srca->size2)) {
346             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
347         }
348     }
349 }
350 
351 static int mmu_idx_from_as(uint8_t as)
352 {
353     switch (as) {
354     case AS_PRIMARY:
355         return MMU_PRIMARY_IDX;
356     case AS_SECONDARY:
357         return MMU_SECONDARY_IDX;
358     case AS_HOME:
359         return MMU_HOME_IDX;
360     default:
361         /* FIXME AS_ACCREG */
362         g_assert_not_reached();
363     }
364 }
365 
366 /* and on array */
367 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
368                              uint64_t src, uintptr_t ra)
369 {
370     const int mmu_idx = cpu_mmu_index(env, false);
371     S390Access srca1, srca2, desta;
372     uint32_t i;
373     uint8_t c = 0;
374 
375     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
376                __func__, l, dest, src);
377 
378     /* NC always processes one more byte than specified - maximum is 256 */
379     l++;
380 
381     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
382     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
383     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
384     for (i = 0; i < l; i++) {
385         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
386                           access_get_byte(env, &srca2, i, ra);
387 
388         c |= x;
389         access_set_byte(env, &desta, i, x, ra);
390     }
391     return c != 0;
392 }
393 
394 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
395                     uint64_t src)
396 {
397     return do_helper_nc(env, l, dest, src, GETPC());
398 }
399 
400 /* xor on array */
401 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
402                              uint64_t src, uintptr_t ra)
403 {
404     const int mmu_idx = cpu_mmu_index(env, false);
405     S390Access srca1, srca2, desta;
406     uint32_t i;
407     uint8_t c = 0;
408 
409     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
410                __func__, l, dest, src);
411 
412     /* XC always processes one more byte than specified - maximum is 256 */
413     l++;
414 
415     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
416     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
417     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
418 
419     /* xor with itself is the same as memset(0) */
420     if (src == dest) {
421         access_memset(env, &desta, 0, ra);
422         return 0;
423     }
424 
425     for (i = 0; i < l; i++) {
426         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
427                           access_get_byte(env, &srca2, i, ra);
428 
429         c |= x;
430         access_set_byte(env, &desta, i, x, ra);
431     }
432     return c != 0;
433 }
434 
435 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
436                     uint64_t src)
437 {
438     return do_helper_xc(env, l, dest, src, GETPC());
439 }
440 
441 /* or on array */
442 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
443                              uint64_t src, uintptr_t ra)
444 {
445     const int mmu_idx = cpu_mmu_index(env, false);
446     S390Access srca1, srca2, desta;
447     uint32_t i;
448     uint8_t c = 0;
449 
450     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
451                __func__, l, dest, src);
452 
453     /* OC always processes one more byte than specified - maximum is 256 */
454     l++;
455 
456     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
457     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
458     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
459     for (i = 0; i < l; i++) {
460         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
461                           access_get_byte(env, &srca2, i, ra);
462 
463         c |= x;
464         access_set_byte(env, &desta, i, x, ra);
465     }
466     return c != 0;
467 }
468 
469 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
470                     uint64_t src)
471 {
472     return do_helper_oc(env, l, dest, src, GETPC());
473 }
474 
475 /* memmove */
476 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
477                               uint64_t src, uintptr_t ra)
478 {
479     const int mmu_idx = cpu_mmu_index(env, false);
480     S390Access srca, desta;
481     uint32_t i;
482 
483     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
484                __func__, l, dest, src);
485 
486     /* MVC always copies one more byte than specified - maximum is 256 */
487     l++;
488 
489     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
490     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
491 
492     /*
493      * "When the operands overlap, the result is obtained as if the operands
494      * were processed one byte at a time". Only non-destructive overlaps
495      * behave like memmove().
496      */
497     if (dest == src + 1) {
498         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
499     } else if (!is_destructive_overlap(env, dest, src, l)) {
500         access_memmove(env, &desta, &srca, ra);
501     } else {
502         for (i = 0; i < l; i++) {
503             uint8_t byte = access_get_byte(env, &srca, i, ra);
504 
505             access_set_byte(env, &desta, i, byte, ra);
506         }
507     }
508 
509     return env->cc_op;
510 }
511 
512 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
513 {
514     do_helper_mvc(env, l, dest, src, GETPC());
515 }
516 
517 /* move right to left */
518 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
519 {
520     const int mmu_idx = cpu_mmu_index(env, false);
521     const uint64_t ra = GETPC();
522     S390Access srca, desta;
523     int32_t i;
524 
525     /* MVCRL always copies one more byte than specified - maximum is 256 */
526     l++;
527 
528     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
529     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
530 
531     for (i = l - 1; i >= 0; i--) {
532         uint8_t byte = access_get_byte(env, &srca, i, ra);
533         access_set_byte(env, &desta, i, byte, ra);
534     }
535 }
536 
537 /* move inverse  */
538 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
539 {
540     const int mmu_idx = cpu_mmu_index(env, false);
541     S390Access srca, desta;
542     uintptr_t ra = GETPC();
543     int i;
544 
545     /* MVCIN always copies one more byte than specified - maximum is 256 */
546     l++;
547 
548     src = wrap_address(env, src - l + 1);
549     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
550     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
551     for (i = 0; i < l; i++) {
552         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
553 
554         access_set_byte(env, &desta, i, x, ra);
555     }
556 }
557 
558 /* move numerics  */
559 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
560 {
561     const int mmu_idx = cpu_mmu_index(env, false);
562     S390Access srca1, srca2, desta;
563     uintptr_t ra = GETPC();
564     int i;
565 
566     /* MVN always copies one more byte than specified - maximum is 256 */
567     l++;
568 
569     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
570     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
571     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
572     for (i = 0; i < l; i++) {
573         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
574                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
575 
576         access_set_byte(env, &desta, i, x, ra);
577     }
578 }
579 
580 /* move with offset  */
581 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
582 {
583     const int mmu_idx = cpu_mmu_index(env, false);
584     /* MVO always processes one more byte than specified - maximum is 16 */
585     const int len_dest = (l >> 4) + 1;
586     const int len_src = (l & 0xf) + 1;
587     uintptr_t ra = GETPC();
588     uint8_t byte_dest, byte_src;
589     S390Access srca, desta;
590     int i, j;
591 
592     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
593     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
594 
595     /* Handle rightmost byte */
596     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
597     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
598     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
599     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
600 
601     /* Process remaining bytes from right to left */
602     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
603         byte_dest = byte_src >> 4;
604         if (j >= 0) {
605             byte_src = access_get_byte(env, &srca, j, ra);
606         } else {
607             byte_src = 0;
608         }
609         byte_dest |= byte_src << 4;
610         access_set_byte(env, &desta, i, byte_dest, ra);
611     }
612 }
613 
614 /* move zones  */
615 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
616 {
617     const int mmu_idx = cpu_mmu_index(env, false);
618     S390Access srca1, srca2, desta;
619     uintptr_t ra = GETPC();
620     int i;
621 
622     /* MVZ always copies one more byte than specified - maximum is 256 */
623     l++;
624 
625     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
626     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
627     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
628     for (i = 0; i < l; i++) {
629         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
630                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
631 
632         access_set_byte(env, &desta, i, x, ra);
633     }
634 }
635 
636 /* compare unsigned byte arrays */
637 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
638                               uint64_t s2, uintptr_t ra)
639 {
640     uint32_t i;
641     uint32_t cc = 0;
642 
643     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
644                __func__, l, s1, s2);
645 
646     for (i = 0; i <= l; i++) {
647         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
648         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
649         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
650         if (x < y) {
651             cc = 1;
652             break;
653         } else if (x > y) {
654             cc = 2;
655             break;
656         }
657     }
658 
659     HELPER_LOG("\n");
660     return cc;
661 }
662 
663 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
664 {
665     return do_helper_clc(env, l, s1, s2, GETPC());
666 }
667 
668 /* compare logical under mask */
669 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
670                      uint64_t addr)
671 {
672     uintptr_t ra = GETPC();
673     uint32_t cc = 0;
674 
675     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
676                mask, addr);
677 
678     while (mask) {
679         if (mask & 8) {
680             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
681             uint8_t r = extract32(r1, 24, 8);
682             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
683                        addr);
684             if (r < d) {
685                 cc = 1;
686                 break;
687             } else if (r > d) {
688                 cc = 2;
689                 break;
690             }
691             addr++;
692         }
693         mask = (mask << 1) & 0xf;
694         r1 <<= 8;
695     }
696 
697     HELPER_LOG("\n");
698     return cc;
699 }
700 
701 static inline uint64_t get_address(CPUS390XState *env, int reg)
702 {
703     return wrap_address(env, env->regs[reg]);
704 }
705 
706 /*
707  * Store the address to the given register, zeroing out unused leftmost
708  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
709  */
710 static inline void set_address_zero(CPUS390XState *env, int reg,
711                                     uint64_t address)
712 {
713     if (env->psw.mask & PSW_MASK_64) {
714         env->regs[reg] = address;
715     } else {
716         if (!(env->psw.mask & PSW_MASK_32)) {
717             address &= 0x00ffffff;
718         } else {
719             address &= 0x7fffffff;
720         }
721         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
722     }
723 }
724 
725 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
726 {
727     if (env->psw.mask & PSW_MASK_64) {
728         /* 64-Bit mode */
729         env->regs[reg] = address;
730     } else {
731         if (!(env->psw.mask & PSW_MASK_32)) {
732             /* 24-Bit mode. According to the PoO it is implementation
733             dependent if bits 32-39 remain unchanged or are set to
734             zeros.  Choose the former so that the function can also be
735             used for TRT.  */
736             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
737         } else {
738             /* 31-Bit mode. According to the PoO it is implementation
739             dependent if bit 32 remains unchanged or is set to zero.
740             Choose the latter so that the function can also be used for
741             TRT.  */
742             address &= 0x7fffffff;
743             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
744         }
745     }
746 }
747 
748 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
749 {
750     if (!(env->psw.mask & PSW_MASK_64)) {
751         return (uint32_t)length;
752     }
753     return length;
754 }
755 
756 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
757 {
758     if (!(env->psw.mask & PSW_MASK_64)) {
759         /* 24-Bit and 31-Bit mode */
760         length &= 0x7fffffff;
761     }
762     return length;
763 }
764 
765 static inline uint64_t get_length(CPUS390XState *env, int reg)
766 {
767     return wrap_length31(env, env->regs[reg]);
768 }
769 
770 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
771 {
772     if (env->psw.mask & PSW_MASK_64) {
773         /* 64-Bit mode */
774         env->regs[reg] = length;
775     } else {
776         /* 24-Bit and 31-Bit mode */
777         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
778     }
779 }
780 
781 /* search string (c is byte to search, r2 is string, r1 end of string) */
782 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
783 {
784     uintptr_t ra = GETPC();
785     uint64_t end, str;
786     uint32_t len;
787     uint8_t v, c = env->regs[0];
788 
789     /* Bits 32-55 must contain all 0.  */
790     if (env->regs[0] & 0xffffff00u) {
791         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
792     }
793 
794     str = get_address(env, r2);
795     end = get_address(env, r1);
796 
797     /* Lest we fail to service interrupts in a timely manner, limit the
798        amount of work we're willing to do.  For now, let's cap at 8k.  */
799     for (len = 0; len < 0x2000; ++len) {
800         if (str + len == end) {
801             /* Character not found.  R1 & R2 are unmodified.  */
802             env->cc_op = 2;
803             return;
804         }
805         v = cpu_ldub_data_ra(env, str + len, ra);
806         if (v == c) {
807             /* Character found.  Set R1 to the location; R2 is unmodified.  */
808             env->cc_op = 1;
809             set_address(env, r1, str + len);
810             return;
811         }
812     }
813 
814     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
815     env->cc_op = 3;
816     set_address(env, r2, str + len);
817 }
818 
819 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
820 {
821     uintptr_t ra = GETPC();
822     uint32_t len;
823     uint16_t v, c = env->regs[0];
824     uint64_t end, str, adj_end;
825 
826     /* Bits 32-47 of R0 must be zero.  */
827     if (env->regs[0] & 0xffff0000u) {
828         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
829     }
830 
831     str = get_address(env, r2);
832     end = get_address(env, r1);
833 
834     /* If the LSB of the two addresses differ, use one extra byte.  */
835     adj_end = end + ((str ^ end) & 1);
836 
837     /* Lest we fail to service interrupts in a timely manner, limit the
838        amount of work we're willing to do.  For now, let's cap at 8k.  */
839     for (len = 0; len < 0x2000; len += 2) {
840         if (str + len == adj_end) {
841             /* End of input found.  */
842             env->cc_op = 2;
843             return;
844         }
845         v = cpu_lduw_data_ra(env, str + len, ra);
846         if (v == c) {
847             /* Character found.  Set R1 to the location; R2 is unmodified.  */
848             env->cc_op = 1;
849             set_address(env, r1, str + len);
850             return;
851         }
852     }
853 
854     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
855     env->cc_op = 3;
856     set_address(env, r2, str + len);
857 }
858 
859 /* unsigned string compare (c is string terminator) */
860 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
861 {
862     uintptr_t ra = GETPC();
863     uint32_t len;
864 
865     c = c & 0xff;
866     s1 = wrap_address(env, s1);
867     s2 = wrap_address(env, s2);
868 
869     /* Lest we fail to service interrupts in a timely manner, limit the
870        amount of work we're willing to do.  For now, let's cap at 8k.  */
871     for (len = 0; len < 0x2000; ++len) {
872         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
873         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
874         if (v1 == v2) {
875             if (v1 == c) {
876                 /* Equal.  CC=0, and don't advance the registers.  */
877                 env->cc_op = 0;
878                 return int128_make128(s2, s1);
879             }
880         } else {
881             /* Unequal.  CC={1,2}, and advance the registers.  Note that
882                the terminator need not be zero, but the string that contains
883                the terminator is by definition "low".  */
884             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
885             return int128_make128(s2 + len, s1 + len);
886         }
887     }
888 
889     /* CPU-determined bytes equal; advance the registers.  */
890     env->cc_op = 3;
891     return int128_make128(s2 + len, s1 + len);
892 }
893 
894 /* move page */
895 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
896 {
897     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
898     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
899     const int mmu_idx = cpu_mmu_index(env, false);
900     const bool f = extract64(r0, 11, 1);
901     const bool s = extract64(r0, 10, 1);
902     const bool cco = extract64(r0, 8, 1);
903     uintptr_t ra = GETPC();
904     S390Access srca, desta;
905     int exc;
906 
907     if ((f && s) || extract64(r0, 12, 4)) {
908         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
909     }
910 
911     /*
912      * We always manually handle exceptions such that we can properly store
913      * r1/r2 to the lowcore on page-translation exceptions.
914      *
915      * TODO: Access key handling
916      */
917     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
918                             MMU_DATA_LOAD, mmu_idx, ra);
919     if (exc) {
920         if (cco) {
921             return 2;
922         }
923         goto inject_exc;
924     }
925     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
926                             MMU_DATA_STORE, mmu_idx, ra);
927     if (exc) {
928         if (cco && exc != PGM_PROTECTION) {
929             return 1;
930         }
931         goto inject_exc;
932     }
933     access_memmove(env, &desta, &srca, ra);
934     return 0; /* data moved */
935 inject_exc:
936 #if !defined(CONFIG_USER_ONLY)
937     if (exc != PGM_ADDRESSING) {
938         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
939                  env->tlb_fill_tec);
940     }
941     if (exc == PGM_PAGE_TRANS) {
942         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
943                  r1 << 4 | r2);
944     }
945 #endif
946     tcg_s390_program_interrupt(env, exc, ra);
947 }
948 
949 /* string copy */
950 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
951 {
952     const int mmu_idx = cpu_mmu_index(env, false);
953     const uint64_t d = get_address(env, r1);
954     const uint64_t s = get_address(env, r2);
955     const uint8_t c = env->regs[0];
956     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
957     S390Access srca, desta;
958     uintptr_t ra = GETPC();
959     int i;
960 
961     if (env->regs[0] & 0xffffff00ull) {
962         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
963     }
964 
965     /*
966      * Our access should not exceed single pages, as we must not report access
967      * exceptions exceeding the actually copied range (which we don't know at
968      * this point). We might over-indicate watchpoints within the pages
969      * (if we ever care, we have to limit processing to a single byte).
970      */
971     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
972     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
973     for (i = 0; i < len; i++) {
974         const uint8_t v = access_get_byte(env, &srca, i, ra);
975 
976         access_set_byte(env, &desta, i, v, ra);
977         if (v == c) {
978             set_address_zero(env, r1, d + i);
979             return 1;
980         }
981     }
982     set_address_zero(env, r1, d + len);
983     set_address_zero(env, r2, s + len);
984     return 3;
985 }
986 
987 /* load access registers r1 to r3 from memory at a2 */
988 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
989 {
990     uintptr_t ra = GETPC();
991     int i;
992 
993     if (a2 & 0x3) {
994         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
995     }
996 
997     for (i = r1;; i = (i + 1) % 16) {
998         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
999         a2 += 4;
1000 
1001         if (i == r3) {
1002             break;
1003         }
1004     }
1005 }
1006 
1007 /* store access registers r1 to r3 in memory at a2 */
1008 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1009 {
1010     uintptr_t ra = GETPC();
1011     int i;
1012 
1013     if (a2 & 0x3) {
1014         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1015     }
1016 
1017     for (i = r1;; i = (i + 1) % 16) {
1018         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1019         a2 += 4;
1020 
1021         if (i == r3) {
1022             break;
1023         }
1024     }
1025 }
1026 
1027 /* move long helper */
1028 static inline uint32_t do_mvcl(CPUS390XState *env,
1029                                uint64_t *dest, uint64_t *destlen,
1030                                uint64_t *src, uint64_t *srclen,
1031                                uint16_t pad, int wordsize, uintptr_t ra)
1032 {
1033     const int mmu_idx = cpu_mmu_index(env, false);
1034     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1035     S390Access srca, desta;
1036     int i, cc;
1037 
1038     if (*destlen == *srclen) {
1039         cc = 0;
1040     } else if (*destlen < *srclen) {
1041         cc = 1;
1042     } else {
1043         cc = 2;
1044     }
1045 
1046     if (!*destlen) {
1047         return cc;
1048     }
1049 
1050     /*
1051      * Only perform one type of type of operation (move/pad) at a time.
1052      * Stay within single pages.
1053      */
1054     if (*srclen) {
1055         /* Copy the src array */
1056         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1057         *destlen -= len;
1058         *srclen -= len;
1059         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1060         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1061         access_memmove(env, &desta, &srca, ra);
1062         *src = wrap_address(env, *src + len);
1063         *dest = wrap_address(env, *dest + len);
1064     } else if (wordsize == 1) {
1065         /* Pad the remaining area */
1066         *destlen -= len;
1067         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1068         access_memset(env, &desta, pad, ra);
1069         *dest = wrap_address(env, *dest + len);
1070     } else {
1071         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1072 
1073         /* The remaining length selects the padding byte. */
1074         for (i = 0; i < len; (*destlen)--, i++) {
1075             if (*destlen & 1) {
1076                 access_set_byte(env, &desta, i, pad, ra);
1077             } else {
1078                 access_set_byte(env, &desta, i, pad >> 8, ra);
1079             }
1080         }
1081         *dest = wrap_address(env, *dest + len);
1082     }
1083 
1084     return *destlen ? 3 : cc;
1085 }
1086 
1087 /* move long */
1088 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1089 {
1090     const int mmu_idx = cpu_mmu_index(env, false);
1091     uintptr_t ra = GETPC();
1092     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1093     uint64_t dest = get_address(env, r1);
1094     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1095     uint64_t src = get_address(env, r2);
1096     uint8_t pad = env->regs[r2 + 1] >> 24;
1097     CPUState *cs = env_cpu(env);
1098     S390Access srca, desta;
1099     uint32_t cc, cur_len;
1100 
1101     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1102         cc = 3;
1103     } else if (srclen == destlen) {
1104         cc = 0;
1105     } else if (destlen < srclen) {
1106         cc = 1;
1107     } else {
1108         cc = 2;
1109     }
1110 
1111     /* We might have to zero-out some bits even if there was no action. */
1112     if (unlikely(!destlen || cc == 3)) {
1113         set_address_zero(env, r2, src);
1114         set_address_zero(env, r1, dest);
1115         return cc;
1116     } else if (!srclen) {
1117         set_address_zero(env, r2, src);
1118     }
1119 
1120     /*
1121      * Only perform one type of type of operation (move/pad) in one step.
1122      * Stay within single pages.
1123      */
1124     while (destlen) {
1125         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1126         if (!srclen) {
1127             access_prepare(&desta, env, dest, cur_len,
1128                            MMU_DATA_STORE, mmu_idx, ra);
1129             access_memset(env, &desta, pad, ra);
1130         } else {
1131             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1132 
1133             access_prepare(&srca, env, src, cur_len,
1134                            MMU_DATA_LOAD, mmu_idx, ra);
1135             access_prepare(&desta, env, dest, cur_len,
1136                            MMU_DATA_STORE, mmu_idx, ra);
1137             access_memmove(env, &desta, &srca, ra);
1138             src = wrap_address(env, src + cur_len);
1139             srclen -= cur_len;
1140             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1141             set_address_zero(env, r2, src);
1142         }
1143         dest = wrap_address(env, dest + cur_len);
1144         destlen -= cur_len;
1145         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1146         set_address_zero(env, r1, dest);
1147 
1148         /*
1149          * MVCL is interruptible. Return to the main loop if requested after
1150          * writing back all state to registers. If no interrupt will get
1151          * injected, we'll end up back in this handler and continue processing
1152          * the remaining parts.
1153          */
1154         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1155             cpu_loop_exit_restore(cs, ra);
1156         }
1157     }
1158     return cc;
1159 }
1160 
1161 /* move long extended */
1162 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1163                        uint32_t r3)
1164 {
1165     uintptr_t ra = GETPC();
1166     uint64_t destlen = get_length(env, r1 + 1);
1167     uint64_t dest = get_address(env, r1);
1168     uint64_t srclen = get_length(env, r3 + 1);
1169     uint64_t src = get_address(env, r3);
1170     uint8_t pad = a2;
1171     uint32_t cc;
1172 
1173     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1174 
1175     set_length(env, r1 + 1, destlen);
1176     set_length(env, r3 + 1, srclen);
1177     set_address(env, r1, dest);
1178     set_address(env, r3, src);
1179 
1180     return cc;
1181 }
1182 
1183 /* move long unicode */
1184 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1185                        uint32_t r3)
1186 {
1187     uintptr_t ra = GETPC();
1188     uint64_t destlen = get_length(env, r1 + 1);
1189     uint64_t dest = get_address(env, r1);
1190     uint64_t srclen = get_length(env, r3 + 1);
1191     uint64_t src = get_address(env, r3);
1192     uint16_t pad = a2;
1193     uint32_t cc;
1194 
1195     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1196 
1197     set_length(env, r1 + 1, destlen);
1198     set_length(env, r3 + 1, srclen);
1199     set_address(env, r1, dest);
1200     set_address(env, r3, src);
1201 
1202     return cc;
1203 }
1204 
1205 /* compare logical long helper */
1206 static inline uint32_t do_clcl(CPUS390XState *env,
1207                                uint64_t *src1, uint64_t *src1len,
1208                                uint64_t *src3, uint64_t *src3len,
1209                                uint16_t pad, uint64_t limit,
1210                                int wordsize, uintptr_t ra)
1211 {
1212     uint64_t len = MAX(*src1len, *src3len);
1213     uint32_t cc = 0;
1214 
1215     check_alignment(env, *src1len | *src3len, wordsize, ra);
1216 
1217     if (!len) {
1218         return cc;
1219     }
1220 
1221     /* Lest we fail to service interrupts in a timely manner, limit the
1222        amount of work we're willing to do.  */
1223     if (len > limit) {
1224         len = limit;
1225         cc = 3;
1226     }
1227 
1228     for (; len; len -= wordsize) {
1229         uint16_t v1 = pad;
1230         uint16_t v3 = pad;
1231 
1232         if (*src1len) {
1233             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1234         }
1235         if (*src3len) {
1236             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1237         }
1238 
1239         if (v1 != v3) {
1240             cc = (v1 < v3) ? 1 : 2;
1241             break;
1242         }
1243 
1244         if (*src1len) {
1245             *src1 += wordsize;
1246             *src1len -= wordsize;
1247         }
1248         if (*src3len) {
1249             *src3 += wordsize;
1250             *src3len -= wordsize;
1251         }
1252     }
1253 
1254     return cc;
1255 }
1256 
1257 
1258 /* compare logical long */
1259 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1260 {
1261     uintptr_t ra = GETPC();
1262     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1263     uint64_t src1 = get_address(env, r1);
1264     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1265     uint64_t src3 = get_address(env, r2);
1266     uint8_t pad = env->regs[r2 + 1] >> 24;
1267     uint32_t cc;
1268 
1269     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1270 
1271     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1272     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1273     set_address(env, r1, src1);
1274     set_address(env, r2, src3);
1275 
1276     return cc;
1277 }
1278 
1279 /* compare logical long extended memcompare insn with padding */
1280 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1281                        uint32_t r3)
1282 {
1283     uintptr_t ra = GETPC();
1284     uint64_t src1len = get_length(env, r1 + 1);
1285     uint64_t src1 = get_address(env, r1);
1286     uint64_t src3len = get_length(env, r3 + 1);
1287     uint64_t src3 = get_address(env, r3);
1288     uint8_t pad = a2;
1289     uint32_t cc;
1290 
1291     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1292 
1293     set_length(env, r1 + 1, src1len);
1294     set_length(env, r3 + 1, src3len);
1295     set_address(env, r1, src1);
1296     set_address(env, r3, src3);
1297 
1298     return cc;
1299 }
1300 
1301 /* compare logical long unicode memcompare insn with padding */
1302 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1303                        uint32_t r3)
1304 {
1305     uintptr_t ra = GETPC();
1306     uint64_t src1len = get_length(env, r1 + 1);
1307     uint64_t src1 = get_address(env, r1);
1308     uint64_t src3len = get_length(env, r3 + 1);
1309     uint64_t src3 = get_address(env, r3);
1310     uint16_t pad = a2;
1311     uint32_t cc = 0;
1312 
1313     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1314 
1315     set_length(env, r1 + 1, src1len);
1316     set_length(env, r3 + 1, src3len);
1317     set_address(env, r1, src1);
1318     set_address(env, r3, src3);
1319 
1320     return cc;
1321 }
1322 
1323 /* checksum */
1324 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1325                     uint64_t src, uint64_t src_len)
1326 {
1327     uintptr_t ra = GETPC();
1328     uint64_t max_len, len;
1329     uint64_t cksm = (uint32_t)r1;
1330 
1331     /* Lest we fail to service interrupts in a timely manner, limit the
1332        amount of work we're willing to do.  For now, let's cap at 8k.  */
1333     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1334 
1335     /* Process full words as available.  */
1336     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1337         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1338     }
1339 
1340     switch (max_len - len) {
1341     case 1:
1342         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1343         len += 1;
1344         break;
1345     case 2:
1346         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1347         len += 2;
1348         break;
1349     case 3:
1350         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1351         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1352         len += 3;
1353         break;
1354     }
1355 
1356     /* Fold the carry from the checksum.  Note that we can see carry-out
1357        during folding more than once (but probably not more than twice).  */
1358     while (cksm > 0xffffffffull) {
1359         cksm = (uint32_t)cksm + (cksm >> 32);
1360     }
1361 
1362     /* Indicate whether or not we've processed everything.  */
1363     env->cc_op = (len == src_len ? 0 : 3);
1364 
1365     /* Return both cksm and processed length.  */
1366     return int128_make128(cksm, len);
1367 }
1368 
1369 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1370 {
1371     uintptr_t ra = GETPC();
1372     int len_dest = len >> 4;
1373     int len_src = len & 0xf;
1374     uint8_t b;
1375 
1376     dest += len_dest;
1377     src += len_src;
1378 
1379     /* last byte is special, it only flips the nibbles */
1380     b = cpu_ldub_data_ra(env, src, ra);
1381     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1382     src--;
1383     len_src--;
1384 
1385     /* now pack every value */
1386     while (len_dest > 0) {
1387         b = 0;
1388 
1389         if (len_src >= 0) {
1390             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1391             src--;
1392             len_src--;
1393         }
1394         if (len_src >= 0) {
1395             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1396             src--;
1397             len_src--;
1398         }
1399 
1400         len_dest--;
1401         dest--;
1402         cpu_stb_data_ra(env, dest, b, ra);
1403     }
1404 }
1405 
1406 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1407                            uint32_t srclen, int ssize, uintptr_t ra)
1408 {
1409     int i;
1410     /* The destination operand is always 16 bytes long.  */
1411     const int destlen = 16;
1412 
1413     /* The operands are processed from right to left.  */
1414     src += srclen - 1;
1415     dest += destlen - 1;
1416 
1417     for (i = 0; i < destlen; i++) {
1418         uint8_t b = 0;
1419 
1420         /* Start with a positive sign */
1421         if (i == 0) {
1422             b = 0xc;
1423         } else if (srclen > ssize) {
1424             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1425             src -= ssize;
1426             srclen -= ssize;
1427         }
1428 
1429         if (srclen > ssize) {
1430             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1431             src -= ssize;
1432             srclen -= ssize;
1433         }
1434 
1435         cpu_stb_data_ra(env, dest, b, ra);
1436         dest--;
1437     }
1438 }
1439 
1440 
1441 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1442                  uint32_t srclen)
1443 {
1444     do_pkau(env, dest, src, srclen, 1, GETPC());
1445 }
1446 
1447 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1448                  uint32_t srclen)
1449 {
1450     do_pkau(env, dest, src, srclen, 2, GETPC());
1451 }
1452 
1453 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1454                   uint64_t src)
1455 {
1456     uintptr_t ra = GETPC();
1457     int len_dest = len >> 4;
1458     int len_src = len & 0xf;
1459     uint8_t b;
1460     int second_nibble = 0;
1461 
1462     dest += len_dest;
1463     src += len_src;
1464 
1465     /* last byte is special, it only flips the nibbles */
1466     b = cpu_ldub_data_ra(env, src, ra);
1467     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1468     src--;
1469     len_src--;
1470 
1471     /* now pad every nibble with 0xf0 */
1472 
1473     while (len_dest > 0) {
1474         uint8_t cur_byte = 0;
1475 
1476         if (len_src > 0) {
1477             cur_byte = cpu_ldub_data_ra(env, src, ra);
1478         }
1479 
1480         len_dest--;
1481         dest--;
1482 
1483         /* only advance one nibble at a time */
1484         if (second_nibble) {
1485             cur_byte >>= 4;
1486             len_src--;
1487             src--;
1488         }
1489         second_nibble = !second_nibble;
1490 
1491         /* digit */
1492         cur_byte = (cur_byte & 0xf);
1493         /* zone bits */
1494         cur_byte |= 0xf0;
1495 
1496         cpu_stb_data_ra(env, dest, cur_byte, ra);
1497     }
1498 }
1499 
1500 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1501                                  uint32_t destlen, int dsize, uint64_t src,
1502                                  uintptr_t ra)
1503 {
1504     int i;
1505     uint32_t cc;
1506     uint8_t b;
1507     /* The source operand is always 16 bytes long.  */
1508     const int srclen = 16;
1509 
1510     /* The operands are processed from right to left.  */
1511     src += srclen - 1;
1512     dest += destlen - dsize;
1513 
1514     /* Check for the sign.  */
1515     b = cpu_ldub_data_ra(env, src, ra);
1516     src--;
1517     switch (b & 0xf) {
1518     case 0xa:
1519     case 0xc:
1520     case 0xe ... 0xf:
1521         cc = 0;  /* plus */
1522         break;
1523     case 0xb:
1524     case 0xd:
1525         cc = 1;  /* minus */
1526         break;
1527     default:
1528     case 0x0 ... 0x9:
1529         cc = 3;  /* invalid */
1530         break;
1531     }
1532 
1533     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1534     for (i = 0; i < destlen; i += dsize) {
1535         if (i == (31 * dsize)) {
1536             /* If length is 32/64 bytes, the leftmost byte is 0. */
1537             b = 0;
1538         } else if (i % (2 * dsize)) {
1539             b = cpu_ldub_data_ra(env, src, ra);
1540             src--;
1541         } else {
1542             b >>= 4;
1543         }
1544         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1545         dest -= dsize;
1546     }
1547 
1548     return cc;
1549 }
1550 
1551 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1552                        uint64_t src)
1553 {
1554     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1555 }
1556 
1557 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1558                        uint64_t src)
1559 {
1560     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1561 }
1562 
1563 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1564 {
1565     uintptr_t ra = GETPC();
1566     uint32_t cc = 0;
1567     int i;
1568 
1569     for (i = 0; i < destlen; i++) {
1570         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1571         /* digit */
1572         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1573 
1574         if (i == (destlen - 1)) {
1575             /* sign */
1576             cc |= (b & 0xf) < 0xa ? 1 : 0;
1577         } else {
1578             /* digit */
1579             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1580         }
1581     }
1582 
1583     return cc;
1584 }
1585 
1586 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1587                              uint64_t trans, uintptr_t ra)
1588 {
1589     uint32_t i;
1590 
1591     for (i = 0; i <= len; i++) {
1592         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1593         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1594         cpu_stb_data_ra(env, array + i, new_byte, ra);
1595     }
1596 
1597     return env->cc_op;
1598 }
1599 
1600 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1601                 uint64_t trans)
1602 {
1603     do_helper_tr(env, len, array, trans, GETPC());
1604 }
1605 
1606 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1607                    uint64_t len, uint64_t trans)
1608 {
1609     uintptr_t ra = GETPC();
1610     uint8_t end = env->regs[0] & 0xff;
1611     uint64_t l = len;
1612     uint64_t i;
1613     uint32_t cc = 0;
1614 
1615     if (!(env->psw.mask & PSW_MASK_64)) {
1616         array &= 0x7fffffff;
1617         l = (uint32_t)l;
1618     }
1619 
1620     /* Lest we fail to service interrupts in a timely manner, limit the
1621        amount of work we're willing to do.  For now, let's cap at 8k.  */
1622     if (l > 0x2000) {
1623         l = 0x2000;
1624         cc = 3;
1625     }
1626 
1627     for (i = 0; i < l; i++) {
1628         uint8_t byte, new_byte;
1629 
1630         byte = cpu_ldub_data_ra(env, array + i, ra);
1631 
1632         if (byte == end) {
1633             cc = 1;
1634             break;
1635         }
1636 
1637         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1638         cpu_stb_data_ra(env, array + i, new_byte, ra);
1639     }
1640 
1641     env->cc_op = cc;
1642     return int128_make128(len - i, array + i);
1643 }
1644 
1645 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1646                                      uint64_t array, uint64_t trans,
1647                                      int inc, uintptr_t ra)
1648 {
1649     int i;
1650 
1651     for (i = 0; i <= len; i++) {
1652         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1653         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1654 
1655         if (sbyte != 0) {
1656             set_address(env, 1, array + i * inc);
1657             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1658             return (i == len) ? 2 : 1;
1659         }
1660     }
1661 
1662     return 0;
1663 }
1664 
1665 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1666                                   uint64_t array, uint64_t trans,
1667                                   uintptr_t ra)
1668 {
1669     return do_helper_trt(env, len, array, trans, 1, ra);
1670 }
1671 
1672 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1673                      uint64_t trans)
1674 {
1675     return do_helper_trt(env, len, array, trans, 1, GETPC());
1676 }
1677 
1678 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1679                                    uint64_t array, uint64_t trans,
1680                                    uintptr_t ra)
1681 {
1682     return do_helper_trt(env, len, array, trans, -1, ra);
1683 }
1684 
1685 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1686                       uint64_t trans)
1687 {
1688     return do_helper_trt(env, len, array, trans, -1, GETPC());
1689 }
1690 
1691 /* Translate one/two to one/two */
1692 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1693                       uint32_t tst, uint32_t sizes)
1694 {
1695     uintptr_t ra = GETPC();
1696     int dsize = (sizes & 1) ? 1 : 2;
1697     int ssize = (sizes & 2) ? 1 : 2;
1698     uint64_t tbl = get_address(env, 1);
1699     uint64_t dst = get_address(env, r1);
1700     uint64_t len = get_length(env, r1 + 1);
1701     uint64_t src = get_address(env, r2);
1702     uint32_t cc = 3;
1703     int i;
1704 
1705     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1706        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1707        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1708     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1709         tbl &= -4096;
1710     } else {
1711         tbl &= -8;
1712     }
1713 
1714     check_alignment(env, len, ssize, ra);
1715 
1716     /* Lest we fail to service interrupts in a timely manner, */
1717     /* limit the amount of work we're willing to do.   */
1718     for (i = 0; i < 0x2000; i++) {
1719         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1720         uint64_t tble = tbl + (sval * dsize);
1721         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1722         if (dval == tst) {
1723             cc = 1;
1724             break;
1725         }
1726         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1727 
1728         len -= ssize;
1729         src += ssize;
1730         dst += dsize;
1731 
1732         if (len == 0) {
1733             cc = 0;
1734             break;
1735         }
1736     }
1737 
1738     set_address(env, r1, dst);
1739     set_length(env, r1 + 1, len);
1740     set_address(env, r2, src);
1741 
1742     return cc;
1743 }
1744 
1745 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1746                         uint64_t a2, bool parallel)
1747 {
1748     uint32_t mem_idx = cpu_mmu_index(env, false);
1749     uintptr_t ra = GETPC();
1750     uint32_t fc = extract32(env->regs[0], 0, 8);
1751     uint32_t sc = extract32(env->regs[0], 8, 8);
1752     uint64_t pl = get_address(env, 1) & -16;
1753     uint64_t svh, svl;
1754     uint32_t cc;
1755 
1756     /* Sanity check the function code and storage characteristic.  */
1757     if (fc > 1 || sc > 3) {
1758         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1759             goto spec_exception;
1760         }
1761         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1762             goto spec_exception;
1763         }
1764     }
1765 
1766     /* Sanity check the alignments.  */
1767     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1768         goto spec_exception;
1769     }
1770 
1771     /* Sanity check writability of the store address.  */
1772     probe_write(env, a2, 1 << sc, mem_idx, ra);
1773 
1774     /*
1775      * Note that the compare-and-swap is atomic, and the store is atomic,
1776      * but the complete operation is not.  Therefore we do not need to
1777      * assert serial context in order to implement this.  That said,
1778      * restart early if we can't support either operation that is supposed
1779      * to be atomic.
1780      */
1781     if (parallel) {
1782         uint32_t max = 2;
1783 #ifdef CONFIG_ATOMIC64
1784         max = 3;
1785 #endif
1786         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1787             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1788             cpu_loop_exit_atomic(env_cpu(env), ra);
1789         }
1790     }
1791 
1792     /* All loads happen before all stores.  For simplicity, load the entire
1793        store value area from the parameter list.  */
1794     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1795     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1796 
1797     switch (fc) {
1798     case 0:
1799         {
1800             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1801             uint32_t cv = env->regs[r3];
1802             uint32_t ov;
1803 
1804             if (parallel) {
1805 #ifdef CONFIG_USER_ONLY
1806                 uint32_t *haddr = g2h(env_cpu(env), a1);
1807                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1808 #else
1809                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1810                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1811 #endif
1812             } else {
1813                 ov = cpu_ldl_data_ra(env, a1, ra);
1814                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1815             }
1816             cc = (ov != cv);
1817             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1818         }
1819         break;
1820 
1821     case 1:
1822         {
1823             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1824             uint64_t cv = env->regs[r3];
1825             uint64_t ov;
1826 
1827             if (parallel) {
1828 #ifdef CONFIG_ATOMIC64
1829                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1830                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1831 #else
1832                 /* Note that we asserted !parallel above.  */
1833                 g_assert_not_reached();
1834 #endif
1835             } else {
1836                 ov = cpu_ldq_data_ra(env, a1, ra);
1837                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1838             }
1839             cc = (ov != cv);
1840             env->regs[r3] = ov;
1841         }
1842         break;
1843 
1844     case 2:
1845         {
1846             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1847             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1848             Int128 nv = int128_make128(nvl, nvh);
1849             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1850             Int128 ov;
1851 
1852             if (!parallel) {
1853                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1854                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1855 
1856                 ov = int128_make128(ol, oh);
1857                 cc = !int128_eq(ov, cv);
1858                 if (cc) {
1859                     nv = ov;
1860                 }
1861 
1862                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1863                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1864             } else if (HAVE_CMPXCHG128) {
1865                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1866                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1867                 cc = !int128_eq(ov, cv);
1868             } else {
1869                 /* Note that we asserted !parallel above.  */
1870                 g_assert_not_reached();
1871             }
1872 
1873             env->regs[r3 + 0] = int128_gethi(ov);
1874             env->regs[r3 + 1] = int128_getlo(ov);
1875         }
1876         break;
1877 
1878     default:
1879         g_assert_not_reached();
1880     }
1881 
1882     /* Store only if the comparison succeeded.  Note that above we use a pair
1883        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1884        from the most-significant bits of svh.  */
1885     if (cc == 0) {
1886         switch (sc) {
1887         case 0:
1888             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1889             break;
1890         case 1:
1891             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1892             break;
1893         case 2:
1894             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1895             break;
1896         case 3:
1897             cpu_stq_data_ra(env, a2, svh, ra);
1898             break;
1899         case 4:
1900             if (!parallel) {
1901                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1902                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1903             } else if (HAVE_ATOMIC128) {
1904                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1905                 Int128 sv = int128_make128(svl, svh);
1906                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1907             } else {
1908                 /* Note that we asserted !parallel above.  */
1909                 g_assert_not_reached();
1910             }
1911             break;
1912         default:
1913             g_assert_not_reached();
1914         }
1915     }
1916 
1917     return cc;
1918 
1919  spec_exception:
1920     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1921 }
1922 
1923 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1924 {
1925     return do_csst(env, r3, a1, a2, false);
1926 }
1927 
1928 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1929                                uint64_t a2)
1930 {
1931     return do_csst(env, r3, a1, a2, true);
1932 }
1933 
1934 #if !defined(CONFIG_USER_ONLY)
1935 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1936 {
1937     uintptr_t ra = GETPC();
1938     bool PERchanged = false;
1939     uint64_t src = a2;
1940     uint32_t i;
1941 
1942     if (src & 0x7) {
1943         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1944     }
1945 
1946     for (i = r1;; i = (i + 1) % 16) {
1947         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1948         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1949             PERchanged = true;
1950         }
1951         env->cregs[i] = val;
1952         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1953                    i, src, val);
1954         src += sizeof(uint64_t);
1955 
1956         if (i == r3) {
1957             break;
1958         }
1959     }
1960 
1961     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1962         s390_cpu_recompute_watchpoints(env_cpu(env));
1963     }
1964 
1965     tlb_flush(env_cpu(env));
1966 }
1967 
1968 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1969 {
1970     uintptr_t ra = GETPC();
1971     bool PERchanged = false;
1972     uint64_t src = a2;
1973     uint32_t i;
1974 
1975     if (src & 0x3) {
1976         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1977     }
1978 
1979     for (i = r1;; i = (i + 1) % 16) {
1980         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1981         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1982             PERchanged = true;
1983         }
1984         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1985         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1986         src += sizeof(uint32_t);
1987 
1988         if (i == r3) {
1989             break;
1990         }
1991     }
1992 
1993     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1994         s390_cpu_recompute_watchpoints(env_cpu(env));
1995     }
1996 
1997     tlb_flush(env_cpu(env));
1998 }
1999 
2000 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2001 {
2002     uintptr_t ra = GETPC();
2003     uint64_t dest = a2;
2004     uint32_t i;
2005 
2006     if (dest & 0x7) {
2007         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2008     }
2009 
2010     for (i = r1;; i = (i + 1) % 16) {
2011         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2012         dest += sizeof(uint64_t);
2013 
2014         if (i == r3) {
2015             break;
2016         }
2017     }
2018 }
2019 
2020 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2021 {
2022     uintptr_t ra = GETPC();
2023     uint64_t dest = a2;
2024     uint32_t i;
2025 
2026     if (dest & 0x3) {
2027         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2028     }
2029 
2030     for (i = r1;; i = (i + 1) % 16) {
2031         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2032         dest += sizeof(uint32_t);
2033 
2034         if (i == r3) {
2035             break;
2036         }
2037     }
2038 }
2039 
2040 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2041 {
2042     uintptr_t ra = GETPC();
2043     int i;
2044 
2045     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2046 
2047     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2048         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2049     }
2050 
2051     return 0;
2052 }
2053 
2054 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2055 {
2056     S390CPU *cpu = env_archcpu(env);
2057     CPUState *cs = env_cpu(env);
2058 
2059     /*
2060      * TODO: we currently don't handle all access protection types
2061      * (including access-list and key-controlled) as well as AR mode.
2062      */
2063     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2064         /* Fetching permitted; storing permitted */
2065         return 0;
2066     }
2067 
2068     if (env->int_pgm_code == PGM_PROTECTION) {
2069         /* retry if reading is possible */
2070         cs->exception_index = -1;
2071         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2072             /* Fetching permitted; storing not permitted */
2073             return 1;
2074         }
2075     }
2076 
2077     switch (env->int_pgm_code) {
2078     case PGM_PROTECTION:
2079         /* Fetching not permitted; storing not permitted */
2080         cs->exception_index = -1;
2081         return 2;
2082     case PGM_ADDRESSING:
2083     case PGM_TRANS_SPEC:
2084         /* exceptions forwarded to the guest */
2085         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2086         return 0;
2087     }
2088 
2089     /* Translation not available */
2090     cs->exception_index = -1;
2091     return 3;
2092 }
2093 
2094 /* insert storage key extended */
2095 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2096 {
2097     static S390SKeysState *ss;
2098     static S390SKeysClass *skeyclass;
2099     uint64_t addr = wrap_address(env, r2);
2100     uint8_t key;
2101     int rc;
2102 
2103     addr = mmu_real2abs(env, addr);
2104     if (!mmu_absolute_addr_valid(addr, false)) {
2105         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2106     }
2107 
2108     if (unlikely(!ss)) {
2109         ss = s390_get_skeys_device();
2110         skeyclass = S390_SKEYS_GET_CLASS(ss);
2111         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2112             tlb_flush_all_cpus_synced(env_cpu(env));
2113         }
2114     }
2115 
2116     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2117     if (rc) {
2118         trace_get_skeys_nonzero(rc);
2119         return 0;
2120     }
2121     return key;
2122 }
2123 
2124 /* set storage key extended */
2125 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2126 {
2127     static S390SKeysState *ss;
2128     static S390SKeysClass *skeyclass;
2129     uint64_t addr = wrap_address(env, r2);
2130     uint8_t key;
2131     int rc;
2132 
2133     addr = mmu_real2abs(env, addr);
2134     if (!mmu_absolute_addr_valid(addr, false)) {
2135         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2136     }
2137 
2138     if (unlikely(!ss)) {
2139         ss = s390_get_skeys_device();
2140         skeyclass = S390_SKEYS_GET_CLASS(ss);
2141         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2142             tlb_flush_all_cpus_synced(env_cpu(env));
2143         }
2144     }
2145 
2146     key = r1 & 0xfe;
2147     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2148     if (rc) {
2149         trace_set_skeys_nonzero(rc);
2150     }
2151    /*
2152     * As we can only flush by virtual address and not all the entries
2153     * that point to a physical address we have to flush the whole TLB.
2154     */
2155     tlb_flush_all_cpus_synced(env_cpu(env));
2156 }
2157 
2158 /* reset reference bit extended */
2159 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2160 {
2161     uint64_t addr = wrap_address(env, r2);
2162     static S390SKeysState *ss;
2163     static S390SKeysClass *skeyclass;
2164     uint8_t re, key;
2165     int rc;
2166 
2167     addr = mmu_real2abs(env, addr);
2168     if (!mmu_absolute_addr_valid(addr, false)) {
2169         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2170     }
2171 
2172     if (unlikely(!ss)) {
2173         ss = s390_get_skeys_device();
2174         skeyclass = S390_SKEYS_GET_CLASS(ss);
2175         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2176             tlb_flush_all_cpus_synced(env_cpu(env));
2177         }
2178     }
2179 
2180     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2181     if (rc) {
2182         trace_get_skeys_nonzero(rc);
2183         return 0;
2184     }
2185 
2186     re = key & (SK_R | SK_C);
2187     key &= ~SK_R;
2188 
2189     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2190     if (rc) {
2191         trace_set_skeys_nonzero(rc);
2192         return 0;
2193     }
2194    /*
2195     * As we can only flush by virtual address and not all the entries
2196     * that point to a physical address we have to flush the whole TLB.
2197     */
2198     tlb_flush_all_cpus_synced(env_cpu(env));
2199 
2200     /*
2201      * cc
2202      *
2203      * 0  Reference bit zero; change bit zero
2204      * 1  Reference bit zero; change bit one
2205      * 2  Reference bit one; change bit zero
2206      * 3  Reference bit one; change bit one
2207      */
2208 
2209     return re >> 1;
2210 }
2211 
2212 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2213                       uint64_t key)
2214 {
2215     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2216     S390Access srca, desta;
2217     uintptr_t ra = GETPC();
2218     int cc = 0;
2219 
2220     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2221                __func__, l, a1, a2);
2222 
2223     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2224         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2225         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2226     }
2227 
2228     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2229         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2230     }
2231 
2232     l = wrap_length32(env, l);
2233     if (l > 256) {
2234         /* max 256 */
2235         l = 256;
2236         cc = 3;
2237     } else if (!l) {
2238         return cc;
2239     }
2240 
2241     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2242     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2243     access_memmove(env, &desta, &srca, ra);
2244     return cc;
2245 }
2246 
2247 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2248                       uint64_t key)
2249 {
2250     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2251     S390Access srca, desta;
2252     uintptr_t ra = GETPC();
2253     int cc = 0;
2254 
2255     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2256                __func__, l, a1, a2);
2257 
2258     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2259         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2260         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2261     }
2262 
2263     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2264         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2265     }
2266 
2267     l = wrap_length32(env, l);
2268     if (l > 256) {
2269         /* max 256 */
2270         l = 256;
2271         cc = 3;
2272     } else if (!l) {
2273         return cc;
2274     }
2275     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2276     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2277     access_memmove(env, &desta, &srca, ra);
2278     return cc;
2279 }
2280 
2281 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2282 {
2283     CPUState *cs = env_cpu(env);
2284     const uintptr_t ra = GETPC();
2285     uint64_t table, entry, raddr;
2286     uint16_t entries, i, index = 0;
2287 
2288     if (r2 & 0xff000) {
2289         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2290     }
2291 
2292     if (!(r2 & 0x800)) {
2293         /* invalidation-and-clearing operation */
2294         table = r1 & ASCE_ORIGIN;
2295         entries = (r2 & 0x7ff) + 1;
2296 
2297         switch (r1 & ASCE_TYPE_MASK) {
2298         case ASCE_TYPE_REGION1:
2299             index = (r2 >> 53) & 0x7ff;
2300             break;
2301         case ASCE_TYPE_REGION2:
2302             index = (r2 >> 42) & 0x7ff;
2303             break;
2304         case ASCE_TYPE_REGION3:
2305             index = (r2 >> 31) & 0x7ff;
2306             break;
2307         case ASCE_TYPE_SEGMENT:
2308             index = (r2 >> 20) & 0x7ff;
2309             break;
2310         }
2311         for (i = 0; i < entries; i++) {
2312             /* addresses are not wrapped in 24/31bit mode but table index is */
2313             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2314             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2315             if (!(entry & REGION_ENTRY_I)) {
2316                 /* we are allowed to not store if already invalid */
2317                 entry |= REGION_ENTRY_I;
2318                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2319             }
2320         }
2321     }
2322 
2323     /* We simply flush the complete tlb, therefore we can ignore r3. */
2324     if (m4 & 1) {
2325         tlb_flush(cs);
2326     } else {
2327         tlb_flush_all_cpus_synced(cs);
2328     }
2329 }
2330 
2331 /* invalidate pte */
2332 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2333                   uint32_t m4)
2334 {
2335     CPUState *cs = env_cpu(env);
2336     const uintptr_t ra = GETPC();
2337     uint64_t page = vaddr & TARGET_PAGE_MASK;
2338     uint64_t pte_addr, pte;
2339 
2340     /* Compute the page table entry address */
2341     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2342     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2343 
2344     /* Mark the page table entry as invalid */
2345     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2346     pte |= PAGE_ENTRY_I;
2347     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2348 
2349     /* XXX we exploit the fact that Linux passes the exact virtual
2350        address here - it's not obliged to! */
2351     if (m4 & 1) {
2352         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2353             tlb_flush_page(cs, page);
2354             /* XXX 31-bit hack */
2355             tlb_flush_page(cs, page ^ 0x80000000);
2356         } else {
2357             /* looks like we don't have a valid virtual address */
2358             tlb_flush(cs);
2359         }
2360     } else {
2361         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2362             tlb_flush_page_all_cpus_synced(cs, page);
2363             /* XXX 31-bit hack */
2364             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2365         } else {
2366             /* looks like we don't have a valid virtual address */
2367             tlb_flush_all_cpus_synced(cs);
2368         }
2369     }
2370 }
2371 
2372 /* flush local tlb */
2373 void HELPER(ptlb)(CPUS390XState *env)
2374 {
2375     tlb_flush(env_cpu(env));
2376 }
2377 
2378 /* flush global tlb */
2379 void HELPER(purge)(CPUS390XState *env)
2380 {
2381     tlb_flush_all_cpus_synced(env_cpu(env));
2382 }
2383 
2384 /* load real address */
2385 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2386 {
2387     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2388     uint64_t ret, tec;
2389     int flags, exc, cc;
2390 
2391     /* XXX incomplete - has more corner cases */
2392     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2393         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2394     }
2395 
2396     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2397     if (exc) {
2398         cc = 3;
2399         ret = exc | 0x80000000;
2400     } else {
2401         cc = 0;
2402         ret |= addr & ~TARGET_PAGE_MASK;
2403     }
2404 
2405     env->cc_op = cc;
2406     return ret;
2407 }
2408 #endif
2409 
2410 /* load pair from quadword */
2411 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2412 {
2413     uintptr_t ra = GETPC();
2414     uint64_t hi, lo;
2415 
2416     check_alignment(env, addr, 16, ra);
2417     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2418     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2419 
2420     env->retxl = lo;
2421     return hi;
2422 }
2423 
2424 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2425 {
2426     uintptr_t ra = GETPC();
2427     uint64_t hi, lo;
2428     int mem_idx;
2429     MemOpIdx oi;
2430     Int128 v;
2431 
2432     assert(HAVE_ATOMIC128);
2433 
2434     mem_idx = cpu_mmu_index(env, false);
2435     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2436     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2437     hi = int128_gethi(v);
2438     lo = int128_getlo(v);
2439 
2440     env->retxl = lo;
2441     return hi;
2442 }
2443 
2444 /* store pair to quadword */
2445 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2446                   uint64_t low, uint64_t high)
2447 {
2448     uintptr_t ra = GETPC();
2449 
2450     check_alignment(env, addr, 16, ra);
2451     cpu_stq_data_ra(env, addr + 0, high, ra);
2452     cpu_stq_data_ra(env, addr + 8, low, ra);
2453 }
2454 
2455 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2456                            uint64_t low, uint64_t high)
2457 {
2458     uintptr_t ra = GETPC();
2459     int mem_idx;
2460     MemOpIdx oi;
2461     Int128 v;
2462 
2463     assert(HAVE_ATOMIC128);
2464 
2465     mem_idx = cpu_mmu_index(env, false);
2466     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2467     v = int128_make128(low, high);
2468     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2469 }
2470 
2471 /* Execute instruction.  This instruction executes an insn modified with
2472    the contents of r1.  It does not change the executed instruction in memory;
2473    it does not change the program counter.
2474 
2475    Perform this by recording the modified instruction in env->ex_value.
2476    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2477 */
2478 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2479 {
2480     uint64_t insn = cpu_lduw_code(env, addr);
2481     uint8_t opc = insn >> 8;
2482 
2483     /* Or in the contents of R1[56:63].  */
2484     insn |= r1 & 0xff;
2485 
2486     /* Load the rest of the instruction.  */
2487     insn <<= 48;
2488     switch (get_ilen(opc)) {
2489     case 2:
2490         break;
2491     case 4:
2492         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2493         break;
2494     case 6:
2495         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2496         break;
2497     default:
2498         g_assert_not_reached();
2499     }
2500 
2501     /* The very most common cases can be sped up by avoiding a new TB.  */
2502     if ((opc & 0xf0) == 0xd0) {
2503         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2504                                       uint64_t, uintptr_t);
2505         static const dx_helper dx[16] = {
2506             [0x0] = do_helper_trt_bkwd,
2507             [0x2] = do_helper_mvc,
2508             [0x4] = do_helper_nc,
2509             [0x5] = do_helper_clc,
2510             [0x6] = do_helper_oc,
2511             [0x7] = do_helper_xc,
2512             [0xc] = do_helper_tr,
2513             [0xd] = do_helper_trt_fwd,
2514         };
2515         dx_helper helper = dx[opc & 0xf];
2516 
2517         if (helper) {
2518             uint32_t l = extract64(insn, 48, 8);
2519             uint32_t b1 = extract64(insn, 44, 4);
2520             uint32_t d1 = extract64(insn, 32, 12);
2521             uint32_t b2 = extract64(insn, 28, 4);
2522             uint32_t d2 = extract64(insn, 16, 12);
2523             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2524             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2525 
2526             env->cc_op = helper(env, l, a1, a2, 0);
2527             env->psw.addr += ilen;
2528             return;
2529         }
2530     } else if (opc == 0x0a) {
2531         env->int_svc_code = extract64(insn, 48, 8);
2532         env->int_svc_ilen = ilen;
2533         helper_exception(env, EXCP_SVC);
2534         g_assert_not_reached();
2535     }
2536 
2537     /* Record the insn we want to execute as well as the ilen to use
2538        during the execution of the target insn.  This will also ensure
2539        that ex_value is non-zero, which flags that we are in a state
2540        that requires such execution.  */
2541     env->ex_value = insn | ilen;
2542 }
2543 
2544 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2545                        uint64_t len)
2546 {
2547     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2548     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2549     const uint64_t r0 = env->regs[0];
2550     const uintptr_t ra = GETPC();
2551     uint8_t dest_key, dest_as, dest_k, dest_a;
2552     uint8_t src_key, src_as, src_k, src_a;
2553     uint64_t val;
2554     int cc = 0;
2555 
2556     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2557                __func__, dest, src, len);
2558 
2559     if (!(env->psw.mask & PSW_MASK_DAT)) {
2560         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2561     }
2562 
2563     /* OAC (operand access control) for the first operand -> dest */
2564     val = (r0 & 0xffff0000ULL) >> 16;
2565     dest_key = (val >> 12) & 0xf;
2566     dest_as = (val >> 6) & 0x3;
2567     dest_k = (val >> 1) & 0x1;
2568     dest_a = val & 0x1;
2569 
2570     /* OAC (operand access control) for the second operand -> src */
2571     val = (r0 & 0x0000ffffULL);
2572     src_key = (val >> 12) & 0xf;
2573     src_as = (val >> 6) & 0x3;
2574     src_k = (val >> 1) & 0x1;
2575     src_a = val & 0x1;
2576 
2577     if (!dest_k) {
2578         dest_key = psw_key;
2579     }
2580     if (!src_k) {
2581         src_key = psw_key;
2582     }
2583     if (!dest_a) {
2584         dest_as = psw_as;
2585     }
2586     if (!src_a) {
2587         src_as = psw_as;
2588     }
2589 
2590     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2591         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2592     }
2593     if (!(env->cregs[0] & CR0_SECONDARY) &&
2594         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2595         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2596     }
2597     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2598         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2599     }
2600 
2601     len = wrap_length32(env, len);
2602     if (len > 4096) {
2603         cc = 3;
2604         len = 4096;
2605     }
2606 
2607     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2608     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2609         (env->psw.mask & PSW_MASK_PSTATE)) {
2610         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2611                       __func__);
2612         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2613     }
2614 
2615     /* FIXME: Access using correct keys and AR-mode */
2616     if (len) {
2617         S390Access srca, desta;
2618 
2619         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2620                        mmu_idx_from_as(src_as), ra);
2621         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2622                        mmu_idx_from_as(dest_as), ra);
2623 
2624         access_memmove(env, &desta, &srca, ra);
2625     }
2626 
2627     return cc;
2628 }
2629 
2630 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2631    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2632    value >= 0 indicates failure, and the CC value to be returned.  */
2633 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2634                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2635                                  uint32_t *ochar, uint32_t *olen);
2636 
2637 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2638    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2639    indicates failure, and the CC value to be returned.  */
2640 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2641                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2642                                  uint32_t *olen);
2643 
2644 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2645                        bool enh_check, uintptr_t ra,
2646                        uint32_t *ochar, uint32_t *olen)
2647 {
2648     uint8_t s0, s1, s2, s3;
2649     uint32_t c, l;
2650 
2651     if (ilen < 1) {
2652         return 0;
2653     }
2654     s0 = cpu_ldub_data_ra(env, addr, ra);
2655     if (s0 <= 0x7f) {
2656         /* one byte character */
2657         l = 1;
2658         c = s0;
2659     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2660         /* invalid character */
2661         return 2;
2662     } else if (s0 <= 0xdf) {
2663         /* two byte character */
2664         l = 2;
2665         if (ilen < 2) {
2666             return 0;
2667         }
2668         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2669         c = s0 & 0x1f;
2670         c = (c << 6) | (s1 & 0x3f);
2671         if (enh_check && (s1 & 0xc0) != 0x80) {
2672             return 2;
2673         }
2674     } else if (s0 <= 0xef) {
2675         /* three byte character */
2676         l = 3;
2677         if (ilen < 3) {
2678             return 0;
2679         }
2680         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2681         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2682         c = s0 & 0x0f;
2683         c = (c << 6) | (s1 & 0x3f);
2684         c = (c << 6) | (s2 & 0x3f);
2685         /* Fold the byte-by-byte range descriptions in the PoO into
2686            tests against the complete value.  It disallows encodings
2687            that could be smaller, and the UTF-16 surrogates.  */
2688         if (enh_check
2689             && ((s1 & 0xc0) != 0x80
2690                 || (s2 & 0xc0) != 0x80
2691                 || c < 0x1000
2692                 || (c >= 0xd800 && c <= 0xdfff))) {
2693             return 2;
2694         }
2695     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2696         /* four byte character */
2697         l = 4;
2698         if (ilen < 4) {
2699             return 0;
2700         }
2701         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2702         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2703         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2704         c = s0 & 0x07;
2705         c = (c << 6) | (s1 & 0x3f);
2706         c = (c << 6) | (s2 & 0x3f);
2707         c = (c << 6) | (s3 & 0x3f);
2708         /* See above.  */
2709         if (enh_check
2710             && ((s1 & 0xc0) != 0x80
2711                 || (s2 & 0xc0) != 0x80
2712                 || (s3 & 0xc0) != 0x80
2713                 || c < 0x010000
2714                 || c > 0x10ffff)) {
2715             return 2;
2716         }
2717     } else {
2718         /* invalid character */
2719         return 2;
2720     }
2721 
2722     *ochar = c;
2723     *olen = l;
2724     return -1;
2725 }
2726 
2727 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2728                         bool enh_check, uintptr_t ra,
2729                         uint32_t *ochar, uint32_t *olen)
2730 {
2731     uint16_t s0, s1;
2732     uint32_t c, l;
2733 
2734     if (ilen < 2) {
2735         return 0;
2736     }
2737     s0 = cpu_lduw_data_ra(env, addr, ra);
2738     if ((s0 & 0xfc00) != 0xd800) {
2739         /* one word character */
2740         l = 2;
2741         c = s0;
2742     } else {
2743         /* two word character */
2744         l = 4;
2745         if (ilen < 4) {
2746             return 0;
2747         }
2748         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2749         c = extract32(s0, 6, 4) + 1;
2750         c = (c << 6) | (s0 & 0x3f);
2751         c = (c << 10) | (s1 & 0x3ff);
2752         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2753             /* invalid surrogate character */
2754             return 2;
2755         }
2756     }
2757 
2758     *ochar = c;
2759     *olen = l;
2760     return -1;
2761 }
2762 
2763 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2764                         bool enh_check, uintptr_t ra,
2765                         uint32_t *ochar, uint32_t *olen)
2766 {
2767     uint32_t c;
2768 
2769     if (ilen < 4) {
2770         return 0;
2771     }
2772     c = cpu_ldl_data_ra(env, addr, ra);
2773     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2774         /* invalid unicode character */
2775         return 2;
2776     }
2777 
2778     *ochar = c;
2779     *olen = 4;
2780     return -1;
2781 }
2782 
2783 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2784                        uintptr_t ra, uint32_t c, uint32_t *olen)
2785 {
2786     uint8_t d[4];
2787     uint32_t l, i;
2788 
2789     if (c <= 0x7f) {
2790         /* one byte character */
2791         l = 1;
2792         d[0] = c;
2793     } else if (c <= 0x7ff) {
2794         /* two byte character */
2795         l = 2;
2796         d[1] = 0x80 | extract32(c, 0, 6);
2797         d[0] = 0xc0 | extract32(c, 6, 5);
2798     } else if (c <= 0xffff) {
2799         /* three byte character */
2800         l = 3;
2801         d[2] = 0x80 | extract32(c, 0, 6);
2802         d[1] = 0x80 | extract32(c, 6, 6);
2803         d[0] = 0xe0 | extract32(c, 12, 4);
2804     } else {
2805         /* four byte character */
2806         l = 4;
2807         d[3] = 0x80 | extract32(c, 0, 6);
2808         d[2] = 0x80 | extract32(c, 6, 6);
2809         d[1] = 0x80 | extract32(c, 12, 6);
2810         d[0] = 0xf0 | extract32(c, 18, 3);
2811     }
2812 
2813     if (ilen < l) {
2814         return 1;
2815     }
2816     for (i = 0; i < l; ++i) {
2817         cpu_stb_data_ra(env, addr + i, d[i], ra);
2818     }
2819 
2820     *olen = l;
2821     return -1;
2822 }
2823 
2824 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2825                         uintptr_t ra, uint32_t c, uint32_t *olen)
2826 {
2827     uint16_t d0, d1;
2828 
2829     if (c <= 0xffff) {
2830         /* one word character */
2831         if (ilen < 2) {
2832             return 1;
2833         }
2834         cpu_stw_data_ra(env, addr, c, ra);
2835         *olen = 2;
2836     } else {
2837         /* two word character */
2838         if (ilen < 4) {
2839             return 1;
2840         }
2841         d1 = 0xdc00 | extract32(c, 0, 10);
2842         d0 = 0xd800 | extract32(c, 10, 6);
2843         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2844         cpu_stw_data_ra(env, addr + 0, d0, ra);
2845         cpu_stw_data_ra(env, addr + 2, d1, ra);
2846         *olen = 4;
2847     }
2848 
2849     return -1;
2850 }
2851 
2852 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2853                         uintptr_t ra, uint32_t c, uint32_t *olen)
2854 {
2855     if (ilen < 4) {
2856         return 1;
2857     }
2858     cpu_stl_data_ra(env, addr, c, ra);
2859     *olen = 4;
2860     return -1;
2861 }
2862 
2863 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2864                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2865                                        decode_unicode_fn decode,
2866                                        encode_unicode_fn encode)
2867 {
2868     uint64_t dst = get_address(env, r1);
2869     uint64_t dlen = get_length(env, r1 + 1);
2870     uint64_t src = get_address(env, r2);
2871     uint64_t slen = get_length(env, r2 + 1);
2872     bool enh_check = m3 & 1;
2873     int cc, i;
2874 
2875     /* Lest we fail to service interrupts in a timely manner, limit the
2876        amount of work we're willing to do.  For now, let's cap at 256.  */
2877     for (i = 0; i < 256; ++i) {
2878         uint32_t c, ilen, olen;
2879 
2880         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2881         if (unlikely(cc >= 0)) {
2882             break;
2883         }
2884         cc = encode(env, dst, dlen, ra, c, &olen);
2885         if (unlikely(cc >= 0)) {
2886             break;
2887         }
2888 
2889         src += ilen;
2890         slen -= ilen;
2891         dst += olen;
2892         dlen -= olen;
2893         cc = 3;
2894     }
2895 
2896     set_address(env, r1, dst);
2897     set_length(env, r1 + 1, dlen);
2898     set_address(env, r2, src);
2899     set_length(env, r2 + 1, slen);
2900 
2901     return cc;
2902 }
2903 
2904 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2905 {
2906     return convert_unicode(env, r1, r2, m3, GETPC(),
2907                            decode_utf8, encode_utf16);
2908 }
2909 
2910 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2911 {
2912     return convert_unicode(env, r1, r2, m3, GETPC(),
2913                            decode_utf8, encode_utf32);
2914 }
2915 
2916 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2917 {
2918     return convert_unicode(env, r1, r2, m3, GETPC(),
2919                            decode_utf16, encode_utf8);
2920 }
2921 
2922 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2923 {
2924     return convert_unicode(env, r1, r2, m3, GETPC(),
2925                            decode_utf16, encode_utf32);
2926 }
2927 
2928 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2929 {
2930     return convert_unicode(env, r1, r2, m3, GETPC(),
2931                            decode_utf32, encode_utf8);
2932 }
2933 
2934 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2935 {
2936     return convert_unicode(env, r1, r2, m3, GETPC(),
2937                            decode_utf32, encode_utf16);
2938 }
2939 
2940 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2941                         uintptr_t ra)
2942 {
2943     /* test the actual access, not just any access to the page due to LAP */
2944     while (len) {
2945         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2946         const uint64_t curlen = MIN(pagelen, len);
2947 
2948         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2949         addr = wrap_address(env, addr + curlen);
2950         len -= curlen;
2951     }
2952 }
2953 
2954 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2955 {
2956     probe_write_access(env, addr, len, GETPC());
2957 }
2958