xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision bebc8ade7014ca1f8afbc9d1bd297460f2e88461)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x8000 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     void *haddr1;
118     void *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
128      * pages, we might trigger a new MMU translation - very unlikely that
129      * the mapping changes in between and we would trigger a fault.
130      */
131     int mmu_idx;
132 } S390Access;
133 
134 /*
135  * With nonfault=1, return the PGM_ exception that would have been injected
136  * into the guest; return 0 if no exception was detected.
137  *
138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
140  */
141 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
142                                     int size, MMUAccessType access_type,
143                                     int mmu_idx, bool nonfault,
144                                     void **phost, uintptr_t ra)
145 {
146     int flags = probe_access_flags(env, addr, access_type, mmu_idx,
147                                    nonfault, phost, ra);
148 
149     if (unlikely(flags & TLB_INVALID_MASK)) {
150         assert(!nonfault);
151 #ifdef CONFIG_USER_ONLY
152         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
153         env->__excp_addr = addr & TARGET_PAGE_MASK;
154         return (page_get_flags(addr) & PAGE_VALID
155                 ? PGM_PROTECTION : PGM_ADDRESSING);
156 #else
157         return env->tlb_fill_exc;
158 #endif
159     }
160 
161 #ifndef CONFIG_USER_ONLY
162     if (unlikely(flags & TLB_WATCHPOINT)) {
163         /* S390 does not presently use transaction attributes. */
164         cpu_check_watchpoint(env_cpu(env), addr, size,
165                              MEMTXATTRS_UNSPECIFIED,
166                              (access_type == MMU_DATA_STORE
167                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
168     }
169 #endif
170 
171     return 0;
172 }
173 
174 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
175                              bool nonfault, vaddr vaddr1, int size,
176                              MMUAccessType access_type,
177                              int mmu_idx, uintptr_t ra)
178 {
179     void *haddr1, *haddr2 = NULL;
180     int size1, size2, exc;
181     vaddr vaddr2 = 0;
182 
183     assert(size > 0 && size <= 4096);
184 
185     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
186     size2 = size - size1;
187 
188     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
189                             &haddr1, ra);
190     if (exc) {
191         return exc;
192     }
193     if (unlikely(size2)) {
194         /* The access crosses page boundaries. */
195         vaddr2 = wrap_address(env, vaddr1 + size1);
196         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
197                                 nonfault, &haddr2, ra);
198         if (exc) {
199             return exc;
200         }
201     }
202 
203     *access = (S390Access) {
204         .vaddr1 = vaddr1,
205         .vaddr2 = vaddr2,
206         .haddr1 = haddr1,
207         .haddr2 = haddr2,
208         .size1 = size1,
209         .size2 = size2,
210         .mmu_idx = mmu_idx
211     };
212     return 0;
213 }
214 
215 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
216                                   vaddr vaddr, int size,
217                                   MMUAccessType access_type, int mmu_idx,
218                                   uintptr_t ra)
219 {
220     int exc = access_prepare_nf(ret, env, false, vaddr, size,
221                                 access_type, mmu_idx, ra);
222     assert(!exc);
223 }
224 
225 /* Helper to handle memset on a single page. */
226 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
227                              uint8_t byte, uint16_t size, int mmu_idx,
228                              uintptr_t ra)
229 {
230 #ifdef CONFIG_USER_ONLY
231     g_assert(haddr);
232     memset(haddr, byte, size);
233 #else
234     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235     int i;
236 
237     if (likely(haddr)) {
238         memset(haddr, byte, size);
239     } else {
240         /*
241          * Do a single access and test if we can then get access to the
242          * page. This is especially relevant to speed up TLB_NOTDIRTY.
243          */
244         g_assert(size > 0);
245         cpu_stb_mmu(env, vaddr, byte, oi, ra);
246         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
247         if (likely(haddr)) {
248             memset(haddr + 1, byte, size - 1);
249         } else {
250             for (i = 1; i < size; i++) {
251                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
252             }
253         }
254     }
255 #endif
256 }
257 
258 static void access_memset(CPUS390XState *env, S390Access *desta,
259                           uint8_t byte, uintptr_t ra)
260 {
261 
262     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
263                      desta->mmu_idx, ra);
264     if (likely(!desta->size2)) {
265         return;
266     }
267     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
268                      desta->mmu_idx, ra);
269 }
270 
271 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr,
272                                   void **haddr, int offset,
273                                   int mmu_idx, uintptr_t ra)
274 {
275 #ifdef CONFIG_USER_ONLY
276     return ldub_p(*haddr + offset);
277 #else
278     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
279     uint8_t byte;
280 
281     if (likely(*haddr)) {
282         return ldub_p(*haddr + offset);
283     }
284     /*
285      * Do a single access and test if we can then get access to the
286      * page. This is especially relevant to speed up TLB_NOTDIRTY.
287      */
288     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
289     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
290     return byte;
291 #endif
292 }
293 
294 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
295                                int offset, uintptr_t ra)
296 {
297     if (offset < access->size1) {
298         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
299                                   offset, access->mmu_idx, ra);
300     }
301     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
302                               offset - access->size1, access->mmu_idx, ra);
303 }
304 
305 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, void **haddr,
306                                int offset, uint8_t byte, int mmu_idx,
307                                uintptr_t ra)
308 {
309 #ifdef CONFIG_USER_ONLY
310     stb_p(*haddr + offset, byte);
311 #else
312     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
313 
314     if (likely(*haddr)) {
315         stb_p(*haddr + offset, byte);
316         return;
317     }
318     /*
319      * Do a single access and test if we can then get access to the
320      * page. This is especially relevant to speed up TLB_NOTDIRTY.
321      */
322     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
323     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
324 #endif
325 }
326 
327 static void access_set_byte(CPUS390XState *env, S390Access *access,
328                             int offset, uint8_t byte, uintptr_t ra)
329 {
330     if (offset < access->size1) {
331         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
332                            access->mmu_idx, ra);
333     } else {
334         do_access_set_byte(env, access->vaddr2, &access->haddr2,
335                            offset - access->size1, byte, access->mmu_idx, ra);
336     }
337 }
338 
339 /*
340  * Move data with the same semantics as memmove() in case ranges don't overlap
341  * or src > dest. Undefined behavior on destructive overlaps.
342  */
343 static void access_memmove(CPUS390XState *env, S390Access *desta,
344                            S390Access *srca, uintptr_t ra)
345 {
346     int diff;
347 
348     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
349 
350     /* Fallback to slow access in case we don't have access to all host pages */
351     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
352                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
353         int i;
354 
355         for (i = 0; i < desta->size1 + desta->size2; i++) {
356             uint8_t byte = access_get_byte(env, srca, i, ra);
357 
358             access_set_byte(env, desta, i, byte, ra);
359         }
360         return;
361     }
362 
363     if (srca->size1 == desta->size1) {
364         memmove(desta->haddr1, srca->haddr1, srca->size1);
365         if (unlikely(srca->size2)) {
366             memmove(desta->haddr2, srca->haddr2, srca->size2);
367         }
368     } else if (srca->size1 < desta->size1) {
369         diff = desta->size1 - srca->size1;
370         memmove(desta->haddr1, srca->haddr1, srca->size1);
371         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
372         if (likely(desta->size2)) {
373             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
374         }
375     } else {
376         diff = srca->size1 - desta->size1;
377         memmove(desta->haddr1, srca->haddr1, desta->size1);
378         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
379         if (likely(srca->size2)) {
380             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
381         }
382     }
383 }
384 
385 static int mmu_idx_from_as(uint8_t as)
386 {
387     switch (as) {
388     case AS_PRIMARY:
389         return MMU_PRIMARY_IDX;
390     case AS_SECONDARY:
391         return MMU_SECONDARY_IDX;
392     case AS_HOME:
393         return MMU_HOME_IDX;
394     default:
395         /* FIXME AS_ACCREG */
396         g_assert_not_reached();
397     }
398 }
399 
400 /* and on array */
401 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
402                              uint64_t src, uintptr_t ra)
403 {
404     const int mmu_idx = cpu_mmu_index(env, false);
405     S390Access srca1, srca2, desta;
406     uint32_t i;
407     uint8_t c = 0;
408 
409     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
410                __func__, l, dest, src);
411 
412     /* NC always processes one more byte than specified - maximum is 256 */
413     l++;
414 
415     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
416     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
417     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
418     for (i = 0; i < l; i++) {
419         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
420                           access_get_byte(env, &srca2, i, ra);
421 
422         c |= x;
423         access_set_byte(env, &desta, i, x, ra);
424     }
425     return c != 0;
426 }
427 
428 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
429                     uint64_t src)
430 {
431     return do_helper_nc(env, l, dest, src, GETPC());
432 }
433 
434 /* xor on array */
435 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
436                              uint64_t src, uintptr_t ra)
437 {
438     const int mmu_idx = cpu_mmu_index(env, false);
439     S390Access srca1, srca2, desta;
440     uint32_t i;
441     uint8_t c = 0;
442 
443     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
444                __func__, l, dest, src);
445 
446     /* XC always processes one more byte than specified - maximum is 256 */
447     l++;
448 
449     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
450     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
451     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
452 
453     /* xor with itself is the same as memset(0) */
454     if (src == dest) {
455         access_memset(env, &desta, 0, ra);
456         return 0;
457     }
458 
459     for (i = 0; i < l; i++) {
460         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
461                           access_get_byte(env, &srca2, i, ra);
462 
463         c |= x;
464         access_set_byte(env, &desta, i, x, ra);
465     }
466     return c != 0;
467 }
468 
469 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
470                     uint64_t src)
471 {
472     return do_helper_xc(env, l, dest, src, GETPC());
473 }
474 
475 /* or on array */
476 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
477                              uint64_t src, uintptr_t ra)
478 {
479     const int mmu_idx = cpu_mmu_index(env, false);
480     S390Access srca1, srca2, desta;
481     uint32_t i;
482     uint8_t c = 0;
483 
484     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
485                __func__, l, dest, src);
486 
487     /* OC always processes one more byte than specified - maximum is 256 */
488     l++;
489 
490     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
491     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
492     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
493     for (i = 0; i < l; i++) {
494         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
495                           access_get_byte(env, &srca2, i, ra);
496 
497         c |= x;
498         access_set_byte(env, &desta, i, x, ra);
499     }
500     return c != 0;
501 }
502 
503 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
504                     uint64_t src)
505 {
506     return do_helper_oc(env, l, dest, src, GETPC());
507 }
508 
509 /* memmove */
510 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
511                               uint64_t src, uintptr_t ra)
512 {
513     const int mmu_idx = cpu_mmu_index(env, false);
514     S390Access srca, desta;
515     uint32_t i;
516 
517     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
518                __func__, l, dest, src);
519 
520     /* MVC always copies one more byte than specified - maximum is 256 */
521     l++;
522 
523     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
524     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
525 
526     /*
527      * "When the operands overlap, the result is obtained as if the operands
528      * were processed one byte at a time". Only non-destructive overlaps
529      * behave like memmove().
530      */
531     if (dest == src + 1) {
532         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
533     } else if (!is_destructive_overlap(env, dest, src, l)) {
534         access_memmove(env, &desta, &srca, ra);
535     } else {
536         for (i = 0; i < l; i++) {
537             uint8_t byte = access_get_byte(env, &srca, i, ra);
538 
539             access_set_byte(env, &desta, i, byte, ra);
540         }
541     }
542 
543     return env->cc_op;
544 }
545 
546 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
547 {
548     do_helper_mvc(env, l, dest, src, GETPC());
549 }
550 
551 /* move right to left */
552 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
553 {
554     const int mmu_idx = cpu_mmu_index(env, false);
555     const uint64_t ra = GETPC();
556     S390Access srca, desta;
557     int32_t i;
558 
559     /* MVCRL always copies one more byte than specified - maximum is 256 */
560     l++;
561 
562     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
563     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
564 
565     for (i = l - 1; i >= 0; i--) {
566         uint8_t byte = access_get_byte(env, &srca, i, ra);
567         access_set_byte(env, &desta, i, byte, ra);
568     }
569 }
570 
571 /* move inverse  */
572 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
573 {
574     const int mmu_idx = cpu_mmu_index(env, false);
575     S390Access srca, desta;
576     uintptr_t ra = GETPC();
577     int i;
578 
579     /* MVCIN always copies one more byte than specified - maximum is 256 */
580     l++;
581 
582     src = wrap_address(env, src - l + 1);
583     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
584     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
585     for (i = 0; i < l; i++) {
586         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
587 
588         access_set_byte(env, &desta, i, x, ra);
589     }
590 }
591 
592 /* move numerics  */
593 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
594 {
595     const int mmu_idx = cpu_mmu_index(env, false);
596     S390Access srca1, srca2, desta;
597     uintptr_t ra = GETPC();
598     int i;
599 
600     /* MVN always copies one more byte than specified - maximum is 256 */
601     l++;
602 
603     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
604     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
605     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
606     for (i = 0; i < l; i++) {
607         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
608                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
609 
610         access_set_byte(env, &desta, i, x, ra);
611     }
612 }
613 
614 /* move with offset  */
615 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
616 {
617     const int mmu_idx = cpu_mmu_index(env, false);
618     /* MVO always processes one more byte than specified - maximum is 16 */
619     const int len_dest = (l >> 4) + 1;
620     const int len_src = (l & 0xf) + 1;
621     uintptr_t ra = GETPC();
622     uint8_t byte_dest, byte_src;
623     S390Access srca, desta;
624     int i, j;
625 
626     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
627     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
628 
629     /* Handle rightmost byte */
630     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
631     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
632     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
633     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
634 
635     /* Process remaining bytes from right to left */
636     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
637         byte_dest = byte_src >> 4;
638         if (j >= 0) {
639             byte_src = access_get_byte(env, &srca, j, ra);
640         } else {
641             byte_src = 0;
642         }
643         byte_dest |= byte_src << 4;
644         access_set_byte(env, &desta, i, byte_dest, ra);
645     }
646 }
647 
648 /* move zones  */
649 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
650 {
651     const int mmu_idx = cpu_mmu_index(env, false);
652     S390Access srca1, srca2, desta;
653     uintptr_t ra = GETPC();
654     int i;
655 
656     /* MVZ always copies one more byte than specified - maximum is 256 */
657     l++;
658 
659     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
660     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
661     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
662     for (i = 0; i < l; i++) {
663         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
664                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
665 
666         access_set_byte(env, &desta, i, x, ra);
667     }
668 }
669 
670 /* compare unsigned byte arrays */
671 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
672                               uint64_t s2, uintptr_t ra)
673 {
674     uint32_t i;
675     uint32_t cc = 0;
676 
677     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
678                __func__, l, s1, s2);
679 
680     for (i = 0; i <= l; i++) {
681         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
682         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
683         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
684         if (x < y) {
685             cc = 1;
686             break;
687         } else if (x > y) {
688             cc = 2;
689             break;
690         }
691     }
692 
693     HELPER_LOG("\n");
694     return cc;
695 }
696 
697 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
698 {
699     return do_helper_clc(env, l, s1, s2, GETPC());
700 }
701 
702 /* compare logical under mask */
703 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
704                      uint64_t addr)
705 {
706     uintptr_t ra = GETPC();
707     uint32_t cc = 0;
708 
709     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
710                mask, addr);
711 
712     while (mask) {
713         if (mask & 8) {
714             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
715             uint8_t r = extract32(r1, 24, 8);
716             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
717                        addr);
718             if (r < d) {
719                 cc = 1;
720                 break;
721             } else if (r > d) {
722                 cc = 2;
723                 break;
724             }
725             addr++;
726         }
727         mask = (mask << 1) & 0xf;
728         r1 <<= 8;
729     }
730 
731     HELPER_LOG("\n");
732     return cc;
733 }
734 
735 static inline uint64_t get_address(CPUS390XState *env, int reg)
736 {
737     return wrap_address(env, env->regs[reg]);
738 }
739 
740 /*
741  * Store the address to the given register, zeroing out unused leftmost
742  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
743  */
744 static inline void set_address_zero(CPUS390XState *env, int reg,
745                                     uint64_t address)
746 {
747     if (env->psw.mask & PSW_MASK_64) {
748         env->regs[reg] = address;
749     } else {
750         if (!(env->psw.mask & PSW_MASK_32)) {
751             address &= 0x00ffffff;
752         } else {
753             address &= 0x7fffffff;
754         }
755         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
756     }
757 }
758 
759 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
760 {
761     if (env->psw.mask & PSW_MASK_64) {
762         /* 64-Bit mode */
763         env->regs[reg] = address;
764     } else {
765         if (!(env->psw.mask & PSW_MASK_32)) {
766             /* 24-Bit mode. According to the PoO it is implementation
767             dependent if bits 32-39 remain unchanged or are set to
768             zeros.  Choose the former so that the function can also be
769             used for TRT.  */
770             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
771         } else {
772             /* 31-Bit mode. According to the PoO it is implementation
773             dependent if bit 32 remains unchanged or is set to zero.
774             Choose the latter so that the function can also be used for
775             TRT.  */
776             address &= 0x7fffffff;
777             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
778         }
779     }
780 }
781 
782 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
783 {
784     if (!(env->psw.mask & PSW_MASK_64)) {
785         return (uint32_t)length;
786     }
787     return length;
788 }
789 
790 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
791 {
792     if (!(env->psw.mask & PSW_MASK_64)) {
793         /* 24-Bit and 31-Bit mode */
794         length &= 0x7fffffff;
795     }
796     return length;
797 }
798 
799 static inline uint64_t get_length(CPUS390XState *env, int reg)
800 {
801     return wrap_length31(env, env->regs[reg]);
802 }
803 
804 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
805 {
806     if (env->psw.mask & PSW_MASK_64) {
807         /* 64-Bit mode */
808         env->regs[reg] = length;
809     } else {
810         /* 24-Bit and 31-Bit mode */
811         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
812     }
813 }
814 
815 /* search string (c is byte to search, r2 is string, r1 end of string) */
816 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
817 {
818     uintptr_t ra = GETPC();
819     uint64_t end, str;
820     uint32_t len;
821     uint8_t v, c = env->regs[0];
822 
823     /* Bits 32-55 must contain all 0.  */
824     if (env->regs[0] & 0xffffff00u) {
825         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
826     }
827 
828     str = get_address(env, r2);
829     end = get_address(env, r1);
830 
831     /* Lest we fail to service interrupts in a timely manner, limit the
832        amount of work we're willing to do.  For now, let's cap at 8k.  */
833     for (len = 0; len < 0x2000; ++len) {
834         if (str + len == end) {
835             /* Character not found.  R1 & R2 are unmodified.  */
836             env->cc_op = 2;
837             return;
838         }
839         v = cpu_ldub_data_ra(env, str + len, ra);
840         if (v == c) {
841             /* Character found.  Set R1 to the location; R2 is unmodified.  */
842             env->cc_op = 1;
843             set_address(env, r1, str + len);
844             return;
845         }
846     }
847 
848     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
849     env->cc_op = 3;
850     set_address(env, r2, str + len);
851 }
852 
853 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
854 {
855     uintptr_t ra = GETPC();
856     uint32_t len;
857     uint16_t v, c = env->regs[0];
858     uint64_t end, str, adj_end;
859 
860     /* Bits 32-47 of R0 must be zero.  */
861     if (env->regs[0] & 0xffff0000u) {
862         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
863     }
864 
865     str = get_address(env, r2);
866     end = get_address(env, r1);
867 
868     /* If the LSB of the two addresses differ, use one extra byte.  */
869     adj_end = end + ((str ^ end) & 1);
870 
871     /* Lest we fail to service interrupts in a timely manner, limit the
872        amount of work we're willing to do.  For now, let's cap at 8k.  */
873     for (len = 0; len < 0x2000; len += 2) {
874         if (str + len == adj_end) {
875             /* End of input found.  */
876             env->cc_op = 2;
877             return;
878         }
879         v = cpu_lduw_data_ra(env, str + len, ra);
880         if (v == c) {
881             /* Character found.  Set R1 to the location; R2 is unmodified.  */
882             env->cc_op = 1;
883             set_address(env, r1, str + len);
884             return;
885         }
886     }
887 
888     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
889     env->cc_op = 3;
890     set_address(env, r2, str + len);
891 }
892 
893 /* unsigned string compare (c is string terminator) */
894 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
895 {
896     uintptr_t ra = GETPC();
897     uint32_t len;
898 
899     c = c & 0xff;
900     s1 = wrap_address(env, s1);
901     s2 = wrap_address(env, s2);
902 
903     /* Lest we fail to service interrupts in a timely manner, limit the
904        amount of work we're willing to do.  For now, let's cap at 8k.  */
905     for (len = 0; len < 0x2000; ++len) {
906         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
907         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
908         if (v1 == v2) {
909             if (v1 == c) {
910                 /* Equal.  CC=0, and don't advance the registers.  */
911                 env->cc_op = 0;
912                 return int128_make128(s2, s1);
913             }
914         } else {
915             /* Unequal.  CC={1,2}, and advance the registers.  Note that
916                the terminator need not be zero, but the string that contains
917                the terminator is by definition "low".  */
918             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
919             return int128_make128(s2 + len, s1 + len);
920         }
921     }
922 
923     /* CPU-determined bytes equal; advance the registers.  */
924     env->cc_op = 3;
925     return int128_make128(s2 + len, s1 + len);
926 }
927 
928 /* move page */
929 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
930 {
931     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
932     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
933     const int mmu_idx = cpu_mmu_index(env, false);
934     const bool f = extract64(r0, 11, 1);
935     const bool s = extract64(r0, 10, 1);
936     const bool cco = extract64(r0, 8, 1);
937     uintptr_t ra = GETPC();
938     S390Access srca, desta;
939     int exc;
940 
941     if ((f && s) || extract64(r0, 12, 4)) {
942         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
943     }
944 
945     /*
946      * We always manually handle exceptions such that we can properly store
947      * r1/r2 to the lowcore on page-translation exceptions.
948      *
949      * TODO: Access key handling
950      */
951     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
952                             MMU_DATA_LOAD, mmu_idx, ra);
953     if (exc) {
954         if (cco) {
955             return 2;
956         }
957         goto inject_exc;
958     }
959     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
960                             MMU_DATA_STORE, mmu_idx, ra);
961     if (exc) {
962         if (cco && exc != PGM_PROTECTION) {
963             return 1;
964         }
965         goto inject_exc;
966     }
967     access_memmove(env, &desta, &srca, ra);
968     return 0; /* data moved */
969 inject_exc:
970 #if !defined(CONFIG_USER_ONLY)
971     if (exc != PGM_ADDRESSING) {
972         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
973                  env->tlb_fill_tec);
974     }
975     if (exc == PGM_PAGE_TRANS) {
976         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
977                  r1 << 4 | r2);
978     }
979 #endif
980     tcg_s390_program_interrupt(env, exc, ra);
981 }
982 
983 /* string copy */
984 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
985 {
986     const int mmu_idx = cpu_mmu_index(env, false);
987     const uint64_t d = get_address(env, r1);
988     const uint64_t s = get_address(env, r2);
989     const uint8_t c = env->regs[0];
990     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
991     S390Access srca, desta;
992     uintptr_t ra = GETPC();
993     int i;
994 
995     if (env->regs[0] & 0xffffff00ull) {
996         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
997     }
998 
999     /*
1000      * Our access should not exceed single pages, as we must not report access
1001      * exceptions exceeding the actually copied range (which we don't know at
1002      * this point). We might over-indicate watchpoints within the pages
1003      * (if we ever care, we have to limit processing to a single byte).
1004      */
1005     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
1006     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
1007     for (i = 0; i < len; i++) {
1008         const uint8_t v = access_get_byte(env, &srca, i, ra);
1009 
1010         access_set_byte(env, &desta, i, v, ra);
1011         if (v == c) {
1012             set_address_zero(env, r1, d + i);
1013             return 1;
1014         }
1015     }
1016     set_address_zero(env, r1, d + len);
1017     set_address_zero(env, r2, s + len);
1018     return 3;
1019 }
1020 
1021 /* load access registers r1 to r3 from memory at a2 */
1022 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1023 {
1024     uintptr_t ra = GETPC();
1025     int i;
1026 
1027     if (a2 & 0x3) {
1028         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1029     }
1030 
1031     for (i = r1;; i = (i + 1) % 16) {
1032         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1033         a2 += 4;
1034 
1035         if (i == r3) {
1036             break;
1037         }
1038     }
1039 }
1040 
1041 /* store access registers r1 to r3 in memory at a2 */
1042 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1043 {
1044     uintptr_t ra = GETPC();
1045     int i;
1046 
1047     if (a2 & 0x3) {
1048         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1049     }
1050 
1051     for (i = r1;; i = (i + 1) % 16) {
1052         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1053         a2 += 4;
1054 
1055         if (i == r3) {
1056             break;
1057         }
1058     }
1059 }
1060 
1061 /* move long helper */
1062 static inline uint32_t do_mvcl(CPUS390XState *env,
1063                                uint64_t *dest, uint64_t *destlen,
1064                                uint64_t *src, uint64_t *srclen,
1065                                uint16_t pad, int wordsize, uintptr_t ra)
1066 {
1067     const int mmu_idx = cpu_mmu_index(env, false);
1068     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1069     S390Access srca, desta;
1070     int i, cc;
1071 
1072     if (*destlen == *srclen) {
1073         cc = 0;
1074     } else if (*destlen < *srclen) {
1075         cc = 1;
1076     } else {
1077         cc = 2;
1078     }
1079 
1080     if (!*destlen) {
1081         return cc;
1082     }
1083 
1084     /*
1085      * Only perform one type of type of operation (move/pad) at a time.
1086      * Stay within single pages.
1087      */
1088     if (*srclen) {
1089         /* Copy the src array */
1090         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1091         *destlen -= len;
1092         *srclen -= len;
1093         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1094         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095         access_memmove(env, &desta, &srca, ra);
1096         *src = wrap_address(env, *src + len);
1097         *dest = wrap_address(env, *dest + len);
1098     } else if (wordsize == 1) {
1099         /* Pad the remaining area */
1100         *destlen -= len;
1101         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1102         access_memset(env, &desta, pad, ra);
1103         *dest = wrap_address(env, *dest + len);
1104     } else {
1105         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1106 
1107         /* The remaining length selects the padding byte. */
1108         for (i = 0; i < len; (*destlen)--, i++) {
1109             if (*destlen & 1) {
1110                 access_set_byte(env, &desta, i, pad, ra);
1111             } else {
1112                 access_set_byte(env, &desta, i, pad >> 8, ra);
1113             }
1114         }
1115         *dest = wrap_address(env, *dest + len);
1116     }
1117 
1118     return *destlen ? 3 : cc;
1119 }
1120 
1121 /* move long */
1122 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1123 {
1124     const int mmu_idx = cpu_mmu_index(env, false);
1125     uintptr_t ra = GETPC();
1126     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1127     uint64_t dest = get_address(env, r1);
1128     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1129     uint64_t src = get_address(env, r2);
1130     uint8_t pad = env->regs[r2 + 1] >> 24;
1131     CPUState *cs = env_cpu(env);
1132     S390Access srca, desta;
1133     uint32_t cc, cur_len;
1134 
1135     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1136         cc = 3;
1137     } else if (srclen == destlen) {
1138         cc = 0;
1139     } else if (destlen < srclen) {
1140         cc = 1;
1141     } else {
1142         cc = 2;
1143     }
1144 
1145     /* We might have to zero-out some bits even if there was no action. */
1146     if (unlikely(!destlen || cc == 3)) {
1147         set_address_zero(env, r2, src);
1148         set_address_zero(env, r1, dest);
1149         return cc;
1150     } else if (!srclen) {
1151         set_address_zero(env, r2, src);
1152     }
1153 
1154     /*
1155      * Only perform one type of type of operation (move/pad) in one step.
1156      * Stay within single pages.
1157      */
1158     while (destlen) {
1159         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1160         if (!srclen) {
1161             access_prepare(&desta, env, dest, cur_len,
1162                            MMU_DATA_STORE, mmu_idx, ra);
1163             access_memset(env, &desta, pad, ra);
1164         } else {
1165             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1166 
1167             access_prepare(&srca, env, src, cur_len,
1168                            MMU_DATA_LOAD, mmu_idx, ra);
1169             access_prepare(&desta, env, dest, cur_len,
1170                            MMU_DATA_STORE, mmu_idx, ra);
1171             access_memmove(env, &desta, &srca, ra);
1172             src = wrap_address(env, src + cur_len);
1173             srclen -= cur_len;
1174             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1175             set_address_zero(env, r2, src);
1176         }
1177         dest = wrap_address(env, dest + cur_len);
1178         destlen -= cur_len;
1179         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1180         set_address_zero(env, r1, dest);
1181 
1182         /*
1183          * MVCL is interruptible. Return to the main loop if requested after
1184          * writing back all state to registers. If no interrupt will get
1185          * injected, we'll end up back in this handler and continue processing
1186          * the remaining parts.
1187          */
1188         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1189             cpu_loop_exit_restore(cs, ra);
1190         }
1191     }
1192     return cc;
1193 }
1194 
1195 /* move long extended */
1196 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1197                        uint32_t r3)
1198 {
1199     uintptr_t ra = GETPC();
1200     uint64_t destlen = get_length(env, r1 + 1);
1201     uint64_t dest = get_address(env, r1);
1202     uint64_t srclen = get_length(env, r3 + 1);
1203     uint64_t src = get_address(env, r3);
1204     uint8_t pad = a2;
1205     uint32_t cc;
1206 
1207     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1208 
1209     set_length(env, r1 + 1, destlen);
1210     set_length(env, r3 + 1, srclen);
1211     set_address(env, r1, dest);
1212     set_address(env, r3, src);
1213 
1214     return cc;
1215 }
1216 
1217 /* move long unicode */
1218 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1219                        uint32_t r3)
1220 {
1221     uintptr_t ra = GETPC();
1222     uint64_t destlen = get_length(env, r1 + 1);
1223     uint64_t dest = get_address(env, r1);
1224     uint64_t srclen = get_length(env, r3 + 1);
1225     uint64_t src = get_address(env, r3);
1226     uint16_t pad = a2;
1227     uint32_t cc;
1228 
1229     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1230 
1231     set_length(env, r1 + 1, destlen);
1232     set_length(env, r3 + 1, srclen);
1233     set_address(env, r1, dest);
1234     set_address(env, r3, src);
1235 
1236     return cc;
1237 }
1238 
1239 /* compare logical long helper */
1240 static inline uint32_t do_clcl(CPUS390XState *env,
1241                                uint64_t *src1, uint64_t *src1len,
1242                                uint64_t *src3, uint64_t *src3len,
1243                                uint16_t pad, uint64_t limit,
1244                                int wordsize, uintptr_t ra)
1245 {
1246     uint64_t len = MAX(*src1len, *src3len);
1247     uint32_t cc = 0;
1248 
1249     check_alignment(env, *src1len | *src3len, wordsize, ra);
1250 
1251     if (!len) {
1252         return cc;
1253     }
1254 
1255     /* Lest we fail to service interrupts in a timely manner, limit the
1256        amount of work we're willing to do.  */
1257     if (len > limit) {
1258         len = limit;
1259         cc = 3;
1260     }
1261 
1262     for (; len; len -= wordsize) {
1263         uint16_t v1 = pad;
1264         uint16_t v3 = pad;
1265 
1266         if (*src1len) {
1267             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1268         }
1269         if (*src3len) {
1270             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1271         }
1272 
1273         if (v1 != v3) {
1274             cc = (v1 < v3) ? 1 : 2;
1275             break;
1276         }
1277 
1278         if (*src1len) {
1279             *src1 += wordsize;
1280             *src1len -= wordsize;
1281         }
1282         if (*src3len) {
1283             *src3 += wordsize;
1284             *src3len -= wordsize;
1285         }
1286     }
1287 
1288     return cc;
1289 }
1290 
1291 
1292 /* compare logical long */
1293 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1294 {
1295     uintptr_t ra = GETPC();
1296     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1297     uint64_t src1 = get_address(env, r1);
1298     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1299     uint64_t src3 = get_address(env, r2);
1300     uint8_t pad = env->regs[r2 + 1] >> 24;
1301     uint32_t cc;
1302 
1303     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1304 
1305     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1306     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1307     set_address(env, r1, src1);
1308     set_address(env, r2, src3);
1309 
1310     return cc;
1311 }
1312 
1313 /* compare logical long extended memcompare insn with padding */
1314 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1315                        uint32_t r3)
1316 {
1317     uintptr_t ra = GETPC();
1318     uint64_t src1len = get_length(env, r1 + 1);
1319     uint64_t src1 = get_address(env, r1);
1320     uint64_t src3len = get_length(env, r3 + 1);
1321     uint64_t src3 = get_address(env, r3);
1322     uint8_t pad = a2;
1323     uint32_t cc;
1324 
1325     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1326 
1327     set_length(env, r1 + 1, src1len);
1328     set_length(env, r3 + 1, src3len);
1329     set_address(env, r1, src1);
1330     set_address(env, r3, src3);
1331 
1332     return cc;
1333 }
1334 
1335 /* compare logical long unicode memcompare insn with padding */
1336 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1337                        uint32_t r3)
1338 {
1339     uintptr_t ra = GETPC();
1340     uint64_t src1len = get_length(env, r1 + 1);
1341     uint64_t src1 = get_address(env, r1);
1342     uint64_t src3len = get_length(env, r3 + 1);
1343     uint64_t src3 = get_address(env, r3);
1344     uint16_t pad = a2;
1345     uint32_t cc = 0;
1346 
1347     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1348 
1349     set_length(env, r1 + 1, src1len);
1350     set_length(env, r3 + 1, src3len);
1351     set_address(env, r1, src1);
1352     set_address(env, r3, src3);
1353 
1354     return cc;
1355 }
1356 
1357 /* checksum */
1358 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1359                     uint64_t src, uint64_t src_len)
1360 {
1361     uintptr_t ra = GETPC();
1362     uint64_t max_len, len;
1363     uint64_t cksm = (uint32_t)r1;
1364 
1365     /* Lest we fail to service interrupts in a timely manner, limit the
1366        amount of work we're willing to do.  For now, let's cap at 8k.  */
1367     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1368 
1369     /* Process full words as available.  */
1370     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1371         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1372     }
1373 
1374     switch (max_len - len) {
1375     case 1:
1376         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1377         len += 1;
1378         break;
1379     case 2:
1380         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1381         len += 2;
1382         break;
1383     case 3:
1384         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1385         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1386         len += 3;
1387         break;
1388     }
1389 
1390     /* Fold the carry from the checksum.  Note that we can see carry-out
1391        during folding more than once (but probably not more than twice).  */
1392     while (cksm > 0xffffffffull) {
1393         cksm = (uint32_t)cksm + (cksm >> 32);
1394     }
1395 
1396     /* Indicate whether or not we've processed everything.  */
1397     env->cc_op = (len == src_len ? 0 : 3);
1398 
1399     /* Return both cksm and processed length.  */
1400     return int128_make128(cksm, len);
1401 }
1402 
1403 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1404 {
1405     uintptr_t ra = GETPC();
1406     int len_dest = len >> 4;
1407     int len_src = len & 0xf;
1408     uint8_t b;
1409 
1410     dest += len_dest;
1411     src += len_src;
1412 
1413     /* last byte is special, it only flips the nibbles */
1414     b = cpu_ldub_data_ra(env, src, ra);
1415     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1416     src--;
1417     len_src--;
1418 
1419     /* now pack every value */
1420     while (len_dest > 0) {
1421         b = 0;
1422 
1423         if (len_src >= 0) {
1424             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1425             src--;
1426             len_src--;
1427         }
1428         if (len_src >= 0) {
1429             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1430             src--;
1431             len_src--;
1432         }
1433 
1434         len_dest--;
1435         dest--;
1436         cpu_stb_data_ra(env, dest, b, ra);
1437     }
1438 }
1439 
1440 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1441                            uint32_t srclen, int ssize, uintptr_t ra)
1442 {
1443     int i;
1444     /* The destination operand is always 16 bytes long.  */
1445     const int destlen = 16;
1446 
1447     /* The operands are processed from right to left.  */
1448     src += srclen - 1;
1449     dest += destlen - 1;
1450 
1451     for (i = 0; i < destlen; i++) {
1452         uint8_t b = 0;
1453 
1454         /* Start with a positive sign */
1455         if (i == 0) {
1456             b = 0xc;
1457         } else if (srclen > ssize) {
1458             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1459             src -= ssize;
1460             srclen -= ssize;
1461         }
1462 
1463         if (srclen > ssize) {
1464             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1465             src -= ssize;
1466             srclen -= ssize;
1467         }
1468 
1469         cpu_stb_data_ra(env, dest, b, ra);
1470         dest--;
1471     }
1472 }
1473 
1474 
1475 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1476                  uint32_t srclen)
1477 {
1478     do_pkau(env, dest, src, srclen, 1, GETPC());
1479 }
1480 
1481 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1482                  uint32_t srclen)
1483 {
1484     do_pkau(env, dest, src, srclen, 2, GETPC());
1485 }
1486 
1487 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1488                   uint64_t src)
1489 {
1490     uintptr_t ra = GETPC();
1491     int len_dest = len >> 4;
1492     int len_src = len & 0xf;
1493     uint8_t b;
1494     int second_nibble = 0;
1495 
1496     dest += len_dest;
1497     src += len_src;
1498 
1499     /* last byte is special, it only flips the nibbles */
1500     b = cpu_ldub_data_ra(env, src, ra);
1501     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1502     src--;
1503     len_src--;
1504 
1505     /* now pad every nibble with 0xf0 */
1506 
1507     while (len_dest > 0) {
1508         uint8_t cur_byte = 0;
1509 
1510         if (len_src > 0) {
1511             cur_byte = cpu_ldub_data_ra(env, src, ra);
1512         }
1513 
1514         len_dest--;
1515         dest--;
1516 
1517         /* only advance one nibble at a time */
1518         if (second_nibble) {
1519             cur_byte >>= 4;
1520             len_src--;
1521             src--;
1522         }
1523         second_nibble = !second_nibble;
1524 
1525         /* digit */
1526         cur_byte = (cur_byte & 0xf);
1527         /* zone bits */
1528         cur_byte |= 0xf0;
1529 
1530         cpu_stb_data_ra(env, dest, cur_byte, ra);
1531     }
1532 }
1533 
1534 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1535                                  uint32_t destlen, int dsize, uint64_t src,
1536                                  uintptr_t ra)
1537 {
1538     int i;
1539     uint32_t cc;
1540     uint8_t b;
1541     /* The source operand is always 16 bytes long.  */
1542     const int srclen = 16;
1543 
1544     /* The operands are processed from right to left.  */
1545     src += srclen - 1;
1546     dest += destlen - dsize;
1547 
1548     /* Check for the sign.  */
1549     b = cpu_ldub_data_ra(env, src, ra);
1550     src--;
1551     switch (b & 0xf) {
1552     case 0xa:
1553     case 0xc:
1554     case 0xe ... 0xf:
1555         cc = 0;  /* plus */
1556         break;
1557     case 0xb:
1558     case 0xd:
1559         cc = 1;  /* minus */
1560         break;
1561     default:
1562     case 0x0 ... 0x9:
1563         cc = 3;  /* invalid */
1564         break;
1565     }
1566 
1567     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1568     for (i = 0; i < destlen; i += dsize) {
1569         if (i == (31 * dsize)) {
1570             /* If length is 32/64 bytes, the leftmost byte is 0. */
1571             b = 0;
1572         } else if (i % (2 * dsize)) {
1573             b = cpu_ldub_data_ra(env, src, ra);
1574             src--;
1575         } else {
1576             b >>= 4;
1577         }
1578         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1579         dest -= dsize;
1580     }
1581 
1582     return cc;
1583 }
1584 
1585 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1586                        uint64_t src)
1587 {
1588     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1589 }
1590 
1591 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1592                        uint64_t src)
1593 {
1594     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1595 }
1596 
1597 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1598 {
1599     uintptr_t ra = GETPC();
1600     uint32_t cc = 0;
1601     int i;
1602 
1603     for (i = 0; i < destlen; i++) {
1604         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1605         /* digit */
1606         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1607 
1608         if (i == (destlen - 1)) {
1609             /* sign */
1610             cc |= (b & 0xf) < 0xa ? 1 : 0;
1611         } else {
1612             /* digit */
1613             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1614         }
1615     }
1616 
1617     return cc;
1618 }
1619 
1620 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1621                              uint64_t trans, uintptr_t ra)
1622 {
1623     uint32_t i;
1624 
1625     for (i = 0; i <= len; i++) {
1626         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1627         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1628         cpu_stb_data_ra(env, array + i, new_byte, ra);
1629     }
1630 
1631     return env->cc_op;
1632 }
1633 
1634 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1635                 uint64_t trans)
1636 {
1637     do_helper_tr(env, len, array, trans, GETPC());
1638 }
1639 
1640 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1641                    uint64_t len, uint64_t trans)
1642 {
1643     uintptr_t ra = GETPC();
1644     uint8_t end = env->regs[0] & 0xff;
1645     uint64_t l = len;
1646     uint64_t i;
1647     uint32_t cc = 0;
1648 
1649     if (!(env->psw.mask & PSW_MASK_64)) {
1650         array &= 0x7fffffff;
1651         l = (uint32_t)l;
1652     }
1653 
1654     /* Lest we fail to service interrupts in a timely manner, limit the
1655        amount of work we're willing to do.  For now, let's cap at 8k.  */
1656     if (l > 0x2000) {
1657         l = 0x2000;
1658         cc = 3;
1659     }
1660 
1661     for (i = 0; i < l; i++) {
1662         uint8_t byte, new_byte;
1663 
1664         byte = cpu_ldub_data_ra(env, array + i, ra);
1665 
1666         if (byte == end) {
1667             cc = 1;
1668             break;
1669         }
1670 
1671         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1672         cpu_stb_data_ra(env, array + i, new_byte, ra);
1673     }
1674 
1675     env->cc_op = cc;
1676     return int128_make128(len - i, array + i);
1677 }
1678 
1679 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1680                                      uint64_t array, uint64_t trans,
1681                                      int inc, uintptr_t ra)
1682 {
1683     int i;
1684 
1685     for (i = 0; i <= len; i++) {
1686         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1687         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1688 
1689         if (sbyte != 0) {
1690             set_address(env, 1, array + i * inc);
1691             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1692             return (i == len) ? 2 : 1;
1693         }
1694     }
1695 
1696     return 0;
1697 }
1698 
1699 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1700                                   uint64_t array, uint64_t trans,
1701                                   uintptr_t ra)
1702 {
1703     return do_helper_trt(env, len, array, trans, 1, ra);
1704 }
1705 
1706 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1707                      uint64_t trans)
1708 {
1709     return do_helper_trt(env, len, array, trans, 1, GETPC());
1710 }
1711 
1712 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1713                                    uint64_t array, uint64_t trans,
1714                                    uintptr_t ra)
1715 {
1716     return do_helper_trt(env, len, array, trans, -1, ra);
1717 }
1718 
1719 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1720                       uint64_t trans)
1721 {
1722     return do_helper_trt(env, len, array, trans, -1, GETPC());
1723 }
1724 
1725 /* Translate one/two to one/two */
1726 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1727                       uint32_t tst, uint32_t sizes)
1728 {
1729     uintptr_t ra = GETPC();
1730     int dsize = (sizes & 1) ? 1 : 2;
1731     int ssize = (sizes & 2) ? 1 : 2;
1732     uint64_t tbl = get_address(env, 1);
1733     uint64_t dst = get_address(env, r1);
1734     uint64_t len = get_length(env, r1 + 1);
1735     uint64_t src = get_address(env, r2);
1736     uint32_t cc = 3;
1737     int i;
1738 
1739     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1740        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1741        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1742     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1743         tbl &= -4096;
1744     } else {
1745         tbl &= -8;
1746     }
1747 
1748     check_alignment(env, len, ssize, ra);
1749 
1750     /* Lest we fail to service interrupts in a timely manner, */
1751     /* limit the amount of work we're willing to do.   */
1752     for (i = 0; i < 0x2000; i++) {
1753         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1754         uint64_t tble = tbl + (sval * dsize);
1755         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1756         if (dval == tst) {
1757             cc = 1;
1758             break;
1759         }
1760         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1761 
1762         len -= ssize;
1763         src += ssize;
1764         dst += dsize;
1765 
1766         if (len == 0) {
1767             cc = 0;
1768             break;
1769         }
1770     }
1771 
1772     set_address(env, r1, dst);
1773     set_length(env, r1 + 1, len);
1774     set_address(env, r2, src);
1775 
1776     return cc;
1777 }
1778 
1779 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1780                         uint64_t a2, bool parallel)
1781 {
1782     uint32_t mem_idx = cpu_mmu_index(env, false);
1783     uintptr_t ra = GETPC();
1784     uint32_t fc = extract32(env->regs[0], 0, 8);
1785     uint32_t sc = extract32(env->regs[0], 8, 8);
1786     uint64_t pl = get_address(env, 1) & -16;
1787     uint64_t svh, svl;
1788     uint32_t cc;
1789 
1790     /* Sanity check the function code and storage characteristic.  */
1791     if (fc > 1 || sc > 3) {
1792         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1793             goto spec_exception;
1794         }
1795         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1796             goto spec_exception;
1797         }
1798     }
1799 
1800     /* Sanity check the alignments.  */
1801     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1802         goto spec_exception;
1803     }
1804 
1805     /* Sanity check writability of the store address.  */
1806     probe_write(env, a2, 1 << sc, mem_idx, ra);
1807 
1808     /*
1809      * Note that the compare-and-swap is atomic, and the store is atomic,
1810      * but the complete operation is not.  Therefore we do not need to
1811      * assert serial context in order to implement this.  That said,
1812      * restart early if we can't support either operation that is supposed
1813      * to be atomic.
1814      */
1815     if (parallel) {
1816         uint32_t max = 2;
1817 #ifdef CONFIG_ATOMIC64
1818         max = 3;
1819 #endif
1820         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1821             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1822             cpu_loop_exit_atomic(env_cpu(env), ra);
1823         }
1824     }
1825 
1826     /* All loads happen before all stores.  For simplicity, load the entire
1827        store value area from the parameter list.  */
1828     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1829     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1830 
1831     switch (fc) {
1832     case 0:
1833         {
1834             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1835             uint32_t cv = env->regs[r3];
1836             uint32_t ov;
1837 
1838             if (parallel) {
1839 #ifdef CONFIG_USER_ONLY
1840                 uint32_t *haddr = g2h(env_cpu(env), a1);
1841                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1842 #else
1843                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1844                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1845 #endif
1846             } else {
1847                 ov = cpu_ldl_data_ra(env, a1, ra);
1848                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1849             }
1850             cc = (ov != cv);
1851             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1852         }
1853         break;
1854 
1855     case 1:
1856         {
1857             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1858             uint64_t cv = env->regs[r3];
1859             uint64_t ov;
1860 
1861             if (parallel) {
1862 #ifdef CONFIG_ATOMIC64
1863                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1864                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1865 #else
1866                 /* Note that we asserted !parallel above.  */
1867                 g_assert_not_reached();
1868 #endif
1869             } else {
1870                 ov = cpu_ldq_data_ra(env, a1, ra);
1871                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1872             }
1873             cc = (ov != cv);
1874             env->regs[r3] = ov;
1875         }
1876         break;
1877 
1878     case 2:
1879         {
1880             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1881             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1882             Int128 nv = int128_make128(nvl, nvh);
1883             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1884             Int128 ov;
1885 
1886             if (!parallel) {
1887                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1888                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1889 
1890                 ov = int128_make128(ol, oh);
1891                 cc = !int128_eq(ov, cv);
1892                 if (cc) {
1893                     nv = ov;
1894                 }
1895 
1896                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1897                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1898             } else if (HAVE_CMPXCHG128) {
1899                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1900                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1901                 cc = !int128_eq(ov, cv);
1902             } else {
1903                 /* Note that we asserted !parallel above.  */
1904                 g_assert_not_reached();
1905             }
1906 
1907             env->regs[r3 + 0] = int128_gethi(ov);
1908             env->regs[r3 + 1] = int128_getlo(ov);
1909         }
1910         break;
1911 
1912     default:
1913         g_assert_not_reached();
1914     }
1915 
1916     /* Store only if the comparison succeeded.  Note that above we use a pair
1917        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1918        from the most-significant bits of svh.  */
1919     if (cc == 0) {
1920         switch (sc) {
1921         case 0:
1922             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1923             break;
1924         case 1:
1925             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1926             break;
1927         case 2:
1928             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1929             break;
1930         case 3:
1931             cpu_stq_data_ra(env, a2, svh, ra);
1932             break;
1933         case 4:
1934             if (!parallel) {
1935                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1936                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1937             } else if (HAVE_ATOMIC128) {
1938                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1939                 Int128 sv = int128_make128(svl, svh);
1940                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1941             } else {
1942                 /* Note that we asserted !parallel above.  */
1943                 g_assert_not_reached();
1944             }
1945             break;
1946         default:
1947             g_assert_not_reached();
1948         }
1949     }
1950 
1951     return cc;
1952 
1953  spec_exception:
1954     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1955 }
1956 
1957 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1958 {
1959     return do_csst(env, r3, a1, a2, false);
1960 }
1961 
1962 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1963                                uint64_t a2)
1964 {
1965     return do_csst(env, r3, a1, a2, true);
1966 }
1967 
1968 #if !defined(CONFIG_USER_ONLY)
1969 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1970 {
1971     uintptr_t ra = GETPC();
1972     bool PERchanged = false;
1973     uint64_t src = a2;
1974     uint32_t i;
1975 
1976     if (src & 0x7) {
1977         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1978     }
1979 
1980     for (i = r1;; i = (i + 1) % 16) {
1981         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1982         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1983             PERchanged = true;
1984         }
1985         env->cregs[i] = val;
1986         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1987                    i, src, val);
1988         src += sizeof(uint64_t);
1989 
1990         if (i == r3) {
1991             break;
1992         }
1993     }
1994 
1995     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1996         s390_cpu_recompute_watchpoints(env_cpu(env));
1997     }
1998 
1999     tlb_flush(env_cpu(env));
2000 }
2001 
2002 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2003 {
2004     uintptr_t ra = GETPC();
2005     bool PERchanged = false;
2006     uint64_t src = a2;
2007     uint32_t i;
2008 
2009     if (src & 0x3) {
2010         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2011     }
2012 
2013     for (i = r1;; i = (i + 1) % 16) {
2014         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2015         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2016             PERchanged = true;
2017         }
2018         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2019         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2020         src += sizeof(uint32_t);
2021 
2022         if (i == r3) {
2023             break;
2024         }
2025     }
2026 
2027     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2028         s390_cpu_recompute_watchpoints(env_cpu(env));
2029     }
2030 
2031     tlb_flush(env_cpu(env));
2032 }
2033 
2034 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2035 {
2036     uintptr_t ra = GETPC();
2037     uint64_t dest = a2;
2038     uint32_t i;
2039 
2040     if (dest & 0x7) {
2041         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2042     }
2043 
2044     for (i = r1;; i = (i + 1) % 16) {
2045         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2046         dest += sizeof(uint64_t);
2047 
2048         if (i == r3) {
2049             break;
2050         }
2051     }
2052 }
2053 
2054 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2055 {
2056     uintptr_t ra = GETPC();
2057     uint64_t dest = a2;
2058     uint32_t i;
2059 
2060     if (dest & 0x3) {
2061         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2062     }
2063 
2064     for (i = r1;; i = (i + 1) % 16) {
2065         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2066         dest += sizeof(uint32_t);
2067 
2068         if (i == r3) {
2069             break;
2070         }
2071     }
2072 }
2073 
2074 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2075 {
2076     uintptr_t ra = GETPC();
2077     int i;
2078 
2079     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2080 
2081     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2082         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2083     }
2084 
2085     return 0;
2086 }
2087 
2088 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2089 {
2090     S390CPU *cpu = env_archcpu(env);
2091     CPUState *cs = env_cpu(env);
2092 
2093     /*
2094      * TODO: we currently don't handle all access protection types
2095      * (including access-list and key-controlled) as well as AR mode.
2096      */
2097     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2098         /* Fetching permitted; storing permitted */
2099         return 0;
2100     }
2101 
2102     if (env->int_pgm_code == PGM_PROTECTION) {
2103         /* retry if reading is possible */
2104         cs->exception_index = -1;
2105         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2106             /* Fetching permitted; storing not permitted */
2107             return 1;
2108         }
2109     }
2110 
2111     switch (env->int_pgm_code) {
2112     case PGM_PROTECTION:
2113         /* Fetching not permitted; storing not permitted */
2114         cs->exception_index = -1;
2115         return 2;
2116     case PGM_ADDRESSING:
2117     case PGM_TRANS_SPEC:
2118         /* exceptions forwarded to the guest */
2119         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2120         return 0;
2121     }
2122 
2123     /* Translation not available */
2124     cs->exception_index = -1;
2125     return 3;
2126 }
2127 
2128 /* insert storage key extended */
2129 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2130 {
2131     static S390SKeysState *ss;
2132     static S390SKeysClass *skeyclass;
2133     uint64_t addr = wrap_address(env, r2);
2134     uint8_t key;
2135     int rc;
2136 
2137     addr = mmu_real2abs(env, addr);
2138     if (!mmu_absolute_addr_valid(addr, false)) {
2139         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2140     }
2141 
2142     if (unlikely(!ss)) {
2143         ss = s390_get_skeys_device();
2144         skeyclass = S390_SKEYS_GET_CLASS(ss);
2145         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2146             tlb_flush_all_cpus_synced(env_cpu(env));
2147         }
2148     }
2149 
2150     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2151     if (rc) {
2152         trace_get_skeys_nonzero(rc);
2153         return 0;
2154     }
2155     return key;
2156 }
2157 
2158 /* set storage key extended */
2159 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2160 {
2161     static S390SKeysState *ss;
2162     static S390SKeysClass *skeyclass;
2163     uint64_t addr = wrap_address(env, r2);
2164     uint8_t key;
2165     int rc;
2166 
2167     addr = mmu_real2abs(env, addr);
2168     if (!mmu_absolute_addr_valid(addr, false)) {
2169         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2170     }
2171 
2172     if (unlikely(!ss)) {
2173         ss = s390_get_skeys_device();
2174         skeyclass = S390_SKEYS_GET_CLASS(ss);
2175         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2176             tlb_flush_all_cpus_synced(env_cpu(env));
2177         }
2178     }
2179 
2180     key = r1 & 0xfe;
2181     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2182     if (rc) {
2183         trace_set_skeys_nonzero(rc);
2184     }
2185    /*
2186     * As we can only flush by virtual address and not all the entries
2187     * that point to a physical address we have to flush the whole TLB.
2188     */
2189     tlb_flush_all_cpus_synced(env_cpu(env));
2190 }
2191 
2192 /* reset reference bit extended */
2193 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2194 {
2195     uint64_t addr = wrap_address(env, r2);
2196     static S390SKeysState *ss;
2197     static S390SKeysClass *skeyclass;
2198     uint8_t re, key;
2199     int rc;
2200 
2201     addr = mmu_real2abs(env, addr);
2202     if (!mmu_absolute_addr_valid(addr, false)) {
2203         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2204     }
2205 
2206     if (unlikely(!ss)) {
2207         ss = s390_get_skeys_device();
2208         skeyclass = S390_SKEYS_GET_CLASS(ss);
2209         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2210             tlb_flush_all_cpus_synced(env_cpu(env));
2211         }
2212     }
2213 
2214     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2215     if (rc) {
2216         trace_get_skeys_nonzero(rc);
2217         return 0;
2218     }
2219 
2220     re = key & (SK_R | SK_C);
2221     key &= ~SK_R;
2222 
2223     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2224     if (rc) {
2225         trace_set_skeys_nonzero(rc);
2226         return 0;
2227     }
2228    /*
2229     * As we can only flush by virtual address and not all the entries
2230     * that point to a physical address we have to flush the whole TLB.
2231     */
2232     tlb_flush_all_cpus_synced(env_cpu(env));
2233 
2234     /*
2235      * cc
2236      *
2237      * 0  Reference bit zero; change bit zero
2238      * 1  Reference bit zero; change bit one
2239      * 2  Reference bit one; change bit zero
2240      * 3  Reference bit one; change bit one
2241      */
2242 
2243     return re >> 1;
2244 }
2245 
2246 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2247                       uint64_t key)
2248 {
2249     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2250     S390Access srca, desta;
2251     uintptr_t ra = GETPC();
2252     int cc = 0;
2253 
2254     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2255                __func__, l, a1, a2);
2256 
2257     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2258         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2259         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2260     }
2261 
2262     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2263         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2264     }
2265 
2266     l = wrap_length32(env, l);
2267     if (l > 256) {
2268         /* max 256 */
2269         l = 256;
2270         cc = 3;
2271     } else if (!l) {
2272         return cc;
2273     }
2274 
2275     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2276     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2277     access_memmove(env, &desta, &srca, ra);
2278     return cc;
2279 }
2280 
2281 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2282                       uint64_t key)
2283 {
2284     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2285     S390Access srca, desta;
2286     uintptr_t ra = GETPC();
2287     int cc = 0;
2288 
2289     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2290                __func__, l, a1, a2);
2291 
2292     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2293         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2294         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2295     }
2296 
2297     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2298         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2299     }
2300 
2301     l = wrap_length32(env, l);
2302     if (l > 256) {
2303         /* max 256 */
2304         l = 256;
2305         cc = 3;
2306     } else if (!l) {
2307         return cc;
2308     }
2309     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2310     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2311     access_memmove(env, &desta, &srca, ra);
2312     return cc;
2313 }
2314 
2315 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2316 {
2317     CPUState *cs = env_cpu(env);
2318     const uintptr_t ra = GETPC();
2319     uint64_t table, entry, raddr;
2320     uint16_t entries, i, index = 0;
2321 
2322     if (r2 & 0xff000) {
2323         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2324     }
2325 
2326     if (!(r2 & 0x800)) {
2327         /* invalidation-and-clearing operation */
2328         table = r1 & ASCE_ORIGIN;
2329         entries = (r2 & 0x7ff) + 1;
2330 
2331         switch (r1 & ASCE_TYPE_MASK) {
2332         case ASCE_TYPE_REGION1:
2333             index = (r2 >> 53) & 0x7ff;
2334             break;
2335         case ASCE_TYPE_REGION2:
2336             index = (r2 >> 42) & 0x7ff;
2337             break;
2338         case ASCE_TYPE_REGION3:
2339             index = (r2 >> 31) & 0x7ff;
2340             break;
2341         case ASCE_TYPE_SEGMENT:
2342             index = (r2 >> 20) & 0x7ff;
2343             break;
2344         }
2345         for (i = 0; i < entries; i++) {
2346             /* addresses are not wrapped in 24/31bit mode but table index is */
2347             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2348             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2349             if (!(entry & REGION_ENTRY_I)) {
2350                 /* we are allowed to not store if already invalid */
2351                 entry |= REGION_ENTRY_I;
2352                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2353             }
2354         }
2355     }
2356 
2357     /* We simply flush the complete tlb, therefore we can ignore r3. */
2358     if (m4 & 1) {
2359         tlb_flush(cs);
2360     } else {
2361         tlb_flush_all_cpus_synced(cs);
2362     }
2363 }
2364 
2365 /* invalidate pte */
2366 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2367                   uint32_t m4)
2368 {
2369     CPUState *cs = env_cpu(env);
2370     const uintptr_t ra = GETPC();
2371     uint64_t page = vaddr & TARGET_PAGE_MASK;
2372     uint64_t pte_addr, pte;
2373 
2374     /* Compute the page table entry address */
2375     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2376     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2377 
2378     /* Mark the page table entry as invalid */
2379     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2380     pte |= PAGE_ENTRY_I;
2381     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2382 
2383     /* XXX we exploit the fact that Linux passes the exact virtual
2384        address here - it's not obliged to! */
2385     if (m4 & 1) {
2386         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2387             tlb_flush_page(cs, page);
2388             /* XXX 31-bit hack */
2389             tlb_flush_page(cs, page ^ 0x80000000);
2390         } else {
2391             /* looks like we don't have a valid virtual address */
2392             tlb_flush(cs);
2393         }
2394     } else {
2395         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2396             tlb_flush_page_all_cpus_synced(cs, page);
2397             /* XXX 31-bit hack */
2398             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2399         } else {
2400             /* looks like we don't have a valid virtual address */
2401             tlb_flush_all_cpus_synced(cs);
2402         }
2403     }
2404 }
2405 
2406 /* flush local tlb */
2407 void HELPER(ptlb)(CPUS390XState *env)
2408 {
2409     tlb_flush(env_cpu(env));
2410 }
2411 
2412 /* flush global tlb */
2413 void HELPER(purge)(CPUS390XState *env)
2414 {
2415     tlb_flush_all_cpus_synced(env_cpu(env));
2416 }
2417 
2418 /* load real address */
2419 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2420 {
2421     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2422     uint64_t ret, tec;
2423     int flags, exc, cc;
2424 
2425     /* XXX incomplete - has more corner cases */
2426     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2427         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2428     }
2429 
2430     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2431     if (exc) {
2432         cc = 3;
2433         ret = exc | 0x80000000;
2434     } else {
2435         cc = 0;
2436         ret |= addr & ~TARGET_PAGE_MASK;
2437     }
2438 
2439     env->cc_op = cc;
2440     return ret;
2441 }
2442 #endif
2443 
2444 /* load pair from quadword */
2445 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2446 {
2447     uintptr_t ra = GETPC();
2448     uint64_t hi, lo;
2449 
2450     check_alignment(env, addr, 16, ra);
2451     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2452     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2453 
2454     env->retxl = lo;
2455     return hi;
2456 }
2457 
2458 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2459 {
2460     uintptr_t ra = GETPC();
2461     uint64_t hi, lo;
2462     int mem_idx;
2463     MemOpIdx oi;
2464     Int128 v;
2465 
2466     assert(HAVE_ATOMIC128);
2467 
2468     mem_idx = cpu_mmu_index(env, false);
2469     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2470     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2471     hi = int128_gethi(v);
2472     lo = int128_getlo(v);
2473 
2474     env->retxl = lo;
2475     return hi;
2476 }
2477 
2478 /* store pair to quadword */
2479 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2480                   uint64_t low, uint64_t high)
2481 {
2482     uintptr_t ra = GETPC();
2483 
2484     check_alignment(env, addr, 16, ra);
2485     cpu_stq_data_ra(env, addr + 0, high, ra);
2486     cpu_stq_data_ra(env, addr + 8, low, ra);
2487 }
2488 
2489 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2490                            uint64_t low, uint64_t high)
2491 {
2492     uintptr_t ra = GETPC();
2493     int mem_idx;
2494     MemOpIdx oi;
2495     Int128 v;
2496 
2497     assert(HAVE_ATOMIC128);
2498 
2499     mem_idx = cpu_mmu_index(env, false);
2500     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2501     v = int128_make128(low, high);
2502     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2503 }
2504 
2505 /* Execute instruction.  This instruction executes an insn modified with
2506    the contents of r1.  It does not change the executed instruction in memory;
2507    it does not change the program counter.
2508 
2509    Perform this by recording the modified instruction in env->ex_value.
2510    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2511 */
2512 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2513 {
2514     uint64_t insn = cpu_lduw_code(env, addr);
2515     uint8_t opc = insn >> 8;
2516 
2517     /* Or in the contents of R1[56:63].  */
2518     insn |= r1 & 0xff;
2519 
2520     /* Load the rest of the instruction.  */
2521     insn <<= 48;
2522     switch (get_ilen(opc)) {
2523     case 2:
2524         break;
2525     case 4:
2526         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2527         break;
2528     case 6:
2529         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2530         break;
2531     default:
2532         g_assert_not_reached();
2533     }
2534 
2535     /* The very most common cases can be sped up by avoiding a new TB.  */
2536     if ((opc & 0xf0) == 0xd0) {
2537         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2538                                       uint64_t, uintptr_t);
2539         static const dx_helper dx[16] = {
2540             [0x0] = do_helper_trt_bkwd,
2541             [0x2] = do_helper_mvc,
2542             [0x4] = do_helper_nc,
2543             [0x5] = do_helper_clc,
2544             [0x6] = do_helper_oc,
2545             [0x7] = do_helper_xc,
2546             [0xc] = do_helper_tr,
2547             [0xd] = do_helper_trt_fwd,
2548         };
2549         dx_helper helper = dx[opc & 0xf];
2550 
2551         if (helper) {
2552             uint32_t l = extract64(insn, 48, 8);
2553             uint32_t b1 = extract64(insn, 44, 4);
2554             uint32_t d1 = extract64(insn, 32, 12);
2555             uint32_t b2 = extract64(insn, 28, 4);
2556             uint32_t d2 = extract64(insn, 16, 12);
2557             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2558             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2559 
2560             env->cc_op = helper(env, l, a1, a2, 0);
2561             env->psw.addr += ilen;
2562             return;
2563         }
2564     } else if (opc == 0x0a) {
2565         env->int_svc_code = extract64(insn, 48, 8);
2566         env->int_svc_ilen = ilen;
2567         helper_exception(env, EXCP_SVC);
2568         g_assert_not_reached();
2569     }
2570 
2571     /* Record the insn we want to execute as well as the ilen to use
2572        during the execution of the target insn.  This will also ensure
2573        that ex_value is non-zero, which flags that we are in a state
2574        that requires such execution.  */
2575     env->ex_value = insn | ilen;
2576 }
2577 
2578 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2579                        uint64_t len)
2580 {
2581     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2582     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2583     const uint64_t r0 = env->regs[0];
2584     const uintptr_t ra = GETPC();
2585     uint8_t dest_key, dest_as, dest_k, dest_a;
2586     uint8_t src_key, src_as, src_k, src_a;
2587     uint64_t val;
2588     int cc = 0;
2589 
2590     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2591                __func__, dest, src, len);
2592 
2593     if (!(env->psw.mask & PSW_MASK_DAT)) {
2594         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2595     }
2596 
2597     /* OAC (operand access control) for the first operand -> dest */
2598     val = (r0 & 0xffff0000ULL) >> 16;
2599     dest_key = (val >> 12) & 0xf;
2600     dest_as = (val >> 6) & 0x3;
2601     dest_k = (val >> 1) & 0x1;
2602     dest_a = val & 0x1;
2603 
2604     /* OAC (operand access control) for the second operand -> src */
2605     val = (r0 & 0x0000ffffULL);
2606     src_key = (val >> 12) & 0xf;
2607     src_as = (val >> 6) & 0x3;
2608     src_k = (val >> 1) & 0x1;
2609     src_a = val & 0x1;
2610 
2611     if (!dest_k) {
2612         dest_key = psw_key;
2613     }
2614     if (!src_k) {
2615         src_key = psw_key;
2616     }
2617     if (!dest_a) {
2618         dest_as = psw_as;
2619     }
2620     if (!src_a) {
2621         src_as = psw_as;
2622     }
2623 
2624     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2625         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2626     }
2627     if (!(env->cregs[0] & CR0_SECONDARY) &&
2628         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2629         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2630     }
2631     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2632         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2633     }
2634 
2635     len = wrap_length32(env, len);
2636     if (len > 4096) {
2637         cc = 3;
2638         len = 4096;
2639     }
2640 
2641     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2642     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2643         (env->psw.mask & PSW_MASK_PSTATE)) {
2644         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2645                       __func__);
2646         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2647     }
2648 
2649     /* FIXME: Access using correct keys and AR-mode */
2650     if (len) {
2651         S390Access srca, desta;
2652 
2653         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2654                        mmu_idx_from_as(src_as), ra);
2655         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2656                        mmu_idx_from_as(dest_as), ra);
2657 
2658         access_memmove(env, &desta, &srca, ra);
2659     }
2660 
2661     return cc;
2662 }
2663 
2664 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2665    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2666    value >= 0 indicates failure, and the CC value to be returned.  */
2667 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2668                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2669                                  uint32_t *ochar, uint32_t *olen);
2670 
2671 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2672    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2673    indicates failure, and the CC value to be returned.  */
2674 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2675                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2676                                  uint32_t *olen);
2677 
2678 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2679                        bool enh_check, uintptr_t ra,
2680                        uint32_t *ochar, uint32_t *olen)
2681 {
2682     uint8_t s0, s1, s2, s3;
2683     uint32_t c, l;
2684 
2685     if (ilen < 1) {
2686         return 0;
2687     }
2688     s0 = cpu_ldub_data_ra(env, addr, ra);
2689     if (s0 <= 0x7f) {
2690         /* one byte character */
2691         l = 1;
2692         c = s0;
2693     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2694         /* invalid character */
2695         return 2;
2696     } else if (s0 <= 0xdf) {
2697         /* two byte character */
2698         l = 2;
2699         if (ilen < 2) {
2700             return 0;
2701         }
2702         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2703         c = s0 & 0x1f;
2704         c = (c << 6) | (s1 & 0x3f);
2705         if (enh_check && (s1 & 0xc0) != 0x80) {
2706             return 2;
2707         }
2708     } else if (s0 <= 0xef) {
2709         /* three byte character */
2710         l = 3;
2711         if (ilen < 3) {
2712             return 0;
2713         }
2714         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2715         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2716         c = s0 & 0x0f;
2717         c = (c << 6) | (s1 & 0x3f);
2718         c = (c << 6) | (s2 & 0x3f);
2719         /* Fold the byte-by-byte range descriptions in the PoO into
2720            tests against the complete value.  It disallows encodings
2721            that could be smaller, and the UTF-16 surrogates.  */
2722         if (enh_check
2723             && ((s1 & 0xc0) != 0x80
2724                 || (s2 & 0xc0) != 0x80
2725                 || c < 0x1000
2726                 || (c >= 0xd800 && c <= 0xdfff))) {
2727             return 2;
2728         }
2729     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2730         /* four byte character */
2731         l = 4;
2732         if (ilen < 4) {
2733             return 0;
2734         }
2735         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2736         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2737         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2738         c = s0 & 0x07;
2739         c = (c << 6) | (s1 & 0x3f);
2740         c = (c << 6) | (s2 & 0x3f);
2741         c = (c << 6) | (s3 & 0x3f);
2742         /* See above.  */
2743         if (enh_check
2744             && ((s1 & 0xc0) != 0x80
2745                 || (s2 & 0xc0) != 0x80
2746                 || (s3 & 0xc0) != 0x80
2747                 || c < 0x010000
2748                 || c > 0x10ffff)) {
2749             return 2;
2750         }
2751     } else {
2752         /* invalid character */
2753         return 2;
2754     }
2755 
2756     *ochar = c;
2757     *olen = l;
2758     return -1;
2759 }
2760 
2761 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2762                         bool enh_check, uintptr_t ra,
2763                         uint32_t *ochar, uint32_t *olen)
2764 {
2765     uint16_t s0, s1;
2766     uint32_t c, l;
2767 
2768     if (ilen < 2) {
2769         return 0;
2770     }
2771     s0 = cpu_lduw_data_ra(env, addr, ra);
2772     if ((s0 & 0xfc00) != 0xd800) {
2773         /* one word character */
2774         l = 2;
2775         c = s0;
2776     } else {
2777         /* two word character */
2778         l = 4;
2779         if (ilen < 4) {
2780             return 0;
2781         }
2782         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2783         c = extract32(s0, 6, 4) + 1;
2784         c = (c << 6) | (s0 & 0x3f);
2785         c = (c << 10) | (s1 & 0x3ff);
2786         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2787             /* invalid surrogate character */
2788             return 2;
2789         }
2790     }
2791 
2792     *ochar = c;
2793     *olen = l;
2794     return -1;
2795 }
2796 
2797 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2798                         bool enh_check, uintptr_t ra,
2799                         uint32_t *ochar, uint32_t *olen)
2800 {
2801     uint32_t c;
2802 
2803     if (ilen < 4) {
2804         return 0;
2805     }
2806     c = cpu_ldl_data_ra(env, addr, ra);
2807     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2808         /* invalid unicode character */
2809         return 2;
2810     }
2811 
2812     *ochar = c;
2813     *olen = 4;
2814     return -1;
2815 }
2816 
2817 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2818                        uintptr_t ra, uint32_t c, uint32_t *olen)
2819 {
2820     uint8_t d[4];
2821     uint32_t l, i;
2822 
2823     if (c <= 0x7f) {
2824         /* one byte character */
2825         l = 1;
2826         d[0] = c;
2827     } else if (c <= 0x7ff) {
2828         /* two byte character */
2829         l = 2;
2830         d[1] = 0x80 | extract32(c, 0, 6);
2831         d[0] = 0xc0 | extract32(c, 6, 5);
2832     } else if (c <= 0xffff) {
2833         /* three byte character */
2834         l = 3;
2835         d[2] = 0x80 | extract32(c, 0, 6);
2836         d[1] = 0x80 | extract32(c, 6, 6);
2837         d[0] = 0xe0 | extract32(c, 12, 4);
2838     } else {
2839         /* four byte character */
2840         l = 4;
2841         d[3] = 0x80 | extract32(c, 0, 6);
2842         d[2] = 0x80 | extract32(c, 6, 6);
2843         d[1] = 0x80 | extract32(c, 12, 6);
2844         d[0] = 0xf0 | extract32(c, 18, 3);
2845     }
2846 
2847     if (ilen < l) {
2848         return 1;
2849     }
2850     for (i = 0; i < l; ++i) {
2851         cpu_stb_data_ra(env, addr + i, d[i], ra);
2852     }
2853 
2854     *olen = l;
2855     return -1;
2856 }
2857 
2858 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2859                         uintptr_t ra, uint32_t c, uint32_t *olen)
2860 {
2861     uint16_t d0, d1;
2862 
2863     if (c <= 0xffff) {
2864         /* one word character */
2865         if (ilen < 2) {
2866             return 1;
2867         }
2868         cpu_stw_data_ra(env, addr, c, ra);
2869         *olen = 2;
2870     } else {
2871         /* two word character */
2872         if (ilen < 4) {
2873             return 1;
2874         }
2875         d1 = 0xdc00 | extract32(c, 0, 10);
2876         d0 = 0xd800 | extract32(c, 10, 6);
2877         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2878         cpu_stw_data_ra(env, addr + 0, d0, ra);
2879         cpu_stw_data_ra(env, addr + 2, d1, ra);
2880         *olen = 4;
2881     }
2882 
2883     return -1;
2884 }
2885 
2886 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2887                         uintptr_t ra, uint32_t c, uint32_t *olen)
2888 {
2889     if (ilen < 4) {
2890         return 1;
2891     }
2892     cpu_stl_data_ra(env, addr, c, ra);
2893     *olen = 4;
2894     return -1;
2895 }
2896 
2897 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2898                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2899                                        decode_unicode_fn decode,
2900                                        encode_unicode_fn encode)
2901 {
2902     uint64_t dst = get_address(env, r1);
2903     uint64_t dlen = get_length(env, r1 + 1);
2904     uint64_t src = get_address(env, r2);
2905     uint64_t slen = get_length(env, r2 + 1);
2906     bool enh_check = m3 & 1;
2907     int cc, i;
2908 
2909     /* Lest we fail to service interrupts in a timely manner, limit the
2910        amount of work we're willing to do.  For now, let's cap at 256.  */
2911     for (i = 0; i < 256; ++i) {
2912         uint32_t c, ilen, olen;
2913 
2914         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2915         if (unlikely(cc >= 0)) {
2916             break;
2917         }
2918         cc = encode(env, dst, dlen, ra, c, &olen);
2919         if (unlikely(cc >= 0)) {
2920             break;
2921         }
2922 
2923         src += ilen;
2924         slen -= ilen;
2925         dst += olen;
2926         dlen -= olen;
2927         cc = 3;
2928     }
2929 
2930     set_address(env, r1, dst);
2931     set_length(env, r1 + 1, dlen);
2932     set_address(env, r2, src);
2933     set_length(env, r2 + 1, slen);
2934 
2935     return cc;
2936 }
2937 
2938 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2939 {
2940     return convert_unicode(env, r1, r2, m3, GETPC(),
2941                            decode_utf8, encode_utf16);
2942 }
2943 
2944 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2945 {
2946     return convert_unicode(env, r1, r2, m3, GETPC(),
2947                            decode_utf8, encode_utf32);
2948 }
2949 
2950 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2951 {
2952     return convert_unicode(env, r1, r2, m3, GETPC(),
2953                            decode_utf16, encode_utf8);
2954 }
2955 
2956 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2957 {
2958     return convert_unicode(env, r1, r2, m3, GETPC(),
2959                            decode_utf16, encode_utf32);
2960 }
2961 
2962 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2963 {
2964     return convert_unicode(env, r1, r2, m3, GETPC(),
2965                            decode_utf32, encode_utf8);
2966 }
2967 
2968 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2969 {
2970     return convert_unicode(env, r1, r2, m3, GETPC(),
2971                            decode_utf32, encode_utf16);
2972 }
2973 
2974 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2975                         uintptr_t ra)
2976 {
2977     /* test the actual access, not just any access to the page due to LAP */
2978     while (len) {
2979         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2980         const uint64_t curlen = MIN(pagelen, len);
2981 
2982         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2983         addr = wrap_address(env, addr + curlen);
2984         len -= curlen;
2985     }
2986 }
2987 
2988 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2989 {
2990     probe_write_access(env, addr, len, GETPC());
2991 }
2992