xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision b71dd2a51e898ee91bee3e23708e8d4d14ac6812)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x8000 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     char *haddr1;
118     char *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
128      * pages, we might trigger a new MMU translation - very unlikely that
129      * the mapping changes in between and we would trigger a fault.
130      */
131     int mmu_idx;
132 } S390Access;
133 
134 /*
135  * With nonfault=1, return the PGM_ exception that would have been injected
136  * into the guest; return 0 if no exception was detected.
137  *
138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
140  */
141 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
142                              MMUAccessType access_type, int mmu_idx,
143                              bool nonfault, void **phost, uintptr_t ra)
144 {
145 #if defined(CONFIG_USER_ONLY)
146     return probe_access_flags(env, addr, access_type, mmu_idx,
147                               nonfault, phost, ra);
148 #else
149     int flags;
150 
151     env->tlb_fill_exc = 0;
152     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
153                                ra);
154     if (env->tlb_fill_exc) {
155         return env->tlb_fill_exc;
156     }
157 
158     if (unlikely(flags & TLB_WATCHPOINT)) {
159         /* S390 does not presently use transaction attributes. */
160         cpu_check_watchpoint(env_cpu(env), addr, size,
161                              MEMTXATTRS_UNSPECIFIED,
162                              (access_type == MMU_DATA_STORE
163                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
164     }
165     return 0;
166 #endif
167 }
168 
169 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
170                              bool nonfault, vaddr vaddr1, int size,
171                              MMUAccessType access_type,
172                              int mmu_idx, uintptr_t ra)
173 {
174     void *haddr1, *haddr2 = NULL;
175     int size1, size2, exc;
176     vaddr vaddr2 = 0;
177 
178     assert(size > 0 && size <= 4096);
179 
180     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
181     size2 = size - size1;
182 
183     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
184                             &haddr1, ra);
185     if (exc) {
186         return exc;
187     }
188     if (unlikely(size2)) {
189         /* The access crosses page boundaries. */
190         vaddr2 = wrap_address(env, vaddr1 + size1);
191         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
192                                 nonfault, &haddr2, ra);
193         if (exc) {
194             return exc;
195         }
196     }
197 
198     *access = (S390Access) {
199         .vaddr1 = vaddr1,
200         .vaddr2 = vaddr2,
201         .haddr1 = haddr1,
202         .haddr2 = haddr2,
203         .size1 = size1,
204         .size2 = size2,
205         .mmu_idx = mmu_idx
206     };
207     return 0;
208 }
209 
210 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
211                                  MMUAccessType access_type, int mmu_idx,
212                                  uintptr_t ra)
213 {
214     S390Access ret;
215     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
216                                 access_type, mmu_idx, ra);
217     assert(!exc);
218     return ret;
219 }
220 
221 /* Helper to handle memset on a single page. */
222 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
223                              uint8_t byte, uint16_t size, int mmu_idx,
224                              uintptr_t ra)
225 {
226 #ifdef CONFIG_USER_ONLY
227     g_assert(haddr);
228     memset(haddr, byte, size);
229 #else
230     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
231     int i;
232 
233     if (likely(haddr)) {
234         memset(haddr, byte, size);
235     } else {
236         /*
237          * Do a single access and test if we can then get access to the
238          * page. This is especially relevant to speed up TLB_NOTDIRTY.
239          */
240         g_assert(size > 0);
241         cpu_stb_mmu(env, vaddr, byte, oi, ra);
242         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
243         if (likely(haddr)) {
244             memset(haddr + 1, byte, size - 1);
245         } else {
246             for (i = 1; i < size; i++) {
247                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
248             }
249         }
250     }
251 #endif
252 }
253 
254 static void access_memset(CPUS390XState *env, S390Access *desta,
255                           uint8_t byte, uintptr_t ra)
256 {
257 
258     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
259                      desta->mmu_idx, ra);
260     if (likely(!desta->size2)) {
261         return;
262     }
263     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
264                      desta->mmu_idx, ra);
265 }
266 
267 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
268                                   int offset, int mmu_idx, uintptr_t ra)
269 {
270 #ifdef CONFIG_USER_ONLY
271     return ldub_p(*haddr + offset);
272 #else
273     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
274     uint8_t byte;
275 
276     if (likely(*haddr)) {
277         return ldub_p(*haddr + offset);
278     }
279     /*
280      * Do a single access and test if we can then get access to the
281      * page. This is especially relevant to speed up TLB_NOTDIRTY.
282      */
283     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
284     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
285     return byte;
286 #endif
287 }
288 
289 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
290                                int offset, uintptr_t ra)
291 {
292     if (offset < access->size1) {
293         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
294                                   offset, access->mmu_idx, ra);
295     }
296     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
297                               offset - access->size1, access->mmu_idx, ra);
298 }
299 
300 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
301                                int offset, uint8_t byte, int mmu_idx,
302                                uintptr_t ra)
303 {
304 #ifdef CONFIG_USER_ONLY
305     stb_p(*haddr + offset, byte);
306 #else
307     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
308 
309     if (likely(*haddr)) {
310         stb_p(*haddr + offset, byte);
311         return;
312     }
313     /*
314      * Do a single access and test if we can then get access to the
315      * page. This is especially relevant to speed up TLB_NOTDIRTY.
316      */
317     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
318     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
319 #endif
320 }
321 
322 static void access_set_byte(CPUS390XState *env, S390Access *access,
323                             int offset, uint8_t byte, uintptr_t ra)
324 {
325     if (offset < access->size1) {
326         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
327                            access->mmu_idx, ra);
328     } else {
329         do_access_set_byte(env, access->vaddr2, &access->haddr2,
330                            offset - access->size1, byte, access->mmu_idx, ra);
331     }
332 }
333 
334 /*
335  * Move data with the same semantics as memmove() in case ranges don't overlap
336  * or src > dest. Undefined behavior on destructive overlaps.
337  */
338 static void access_memmove(CPUS390XState *env, S390Access *desta,
339                            S390Access *srca, uintptr_t ra)
340 {
341     int diff;
342 
343     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
344 
345     /* Fallback to slow access in case we don't have access to all host pages */
346     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
347                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
348         int i;
349 
350         for (i = 0; i < desta->size1 + desta->size2; i++) {
351             uint8_t byte = access_get_byte(env, srca, i, ra);
352 
353             access_set_byte(env, desta, i, byte, ra);
354         }
355         return;
356     }
357 
358     if (srca->size1 == desta->size1) {
359         memmove(desta->haddr1, srca->haddr1, srca->size1);
360         if (unlikely(srca->size2)) {
361             memmove(desta->haddr2, srca->haddr2, srca->size2);
362         }
363     } else if (srca->size1 < desta->size1) {
364         diff = desta->size1 - srca->size1;
365         memmove(desta->haddr1, srca->haddr1, srca->size1);
366         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
367         if (likely(desta->size2)) {
368             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
369         }
370     } else {
371         diff = srca->size1 - desta->size1;
372         memmove(desta->haddr1, srca->haddr1, desta->size1);
373         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
374         if (likely(srca->size2)) {
375             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
376         }
377     }
378 }
379 
380 static int mmu_idx_from_as(uint8_t as)
381 {
382     switch (as) {
383     case AS_PRIMARY:
384         return MMU_PRIMARY_IDX;
385     case AS_SECONDARY:
386         return MMU_SECONDARY_IDX;
387     case AS_HOME:
388         return MMU_HOME_IDX;
389     default:
390         /* FIXME AS_ACCREG */
391         g_assert_not_reached();
392     }
393 }
394 
395 /* and on array */
396 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
397                              uint64_t src, uintptr_t ra)
398 {
399     const int mmu_idx = cpu_mmu_index(env, false);
400     S390Access srca1, srca2, desta;
401     uint32_t i;
402     uint8_t c = 0;
403 
404     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
405                __func__, l, dest, src);
406 
407     /* NC always processes one more byte than specified - maximum is 256 */
408     l++;
409 
410     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
411     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
412     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
413     for (i = 0; i < l; i++) {
414         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
415                           access_get_byte(env, &srca2, i, ra);
416 
417         c |= x;
418         access_set_byte(env, &desta, i, x, ra);
419     }
420     return c != 0;
421 }
422 
423 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
424                     uint64_t src)
425 {
426     return do_helper_nc(env, l, dest, src, GETPC());
427 }
428 
429 /* xor on array */
430 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
431                              uint64_t src, uintptr_t ra)
432 {
433     const int mmu_idx = cpu_mmu_index(env, false);
434     S390Access srca1, srca2, desta;
435     uint32_t i;
436     uint8_t c = 0;
437 
438     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
439                __func__, l, dest, src);
440 
441     /* XC always processes one more byte than specified - maximum is 256 */
442     l++;
443 
444     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
445     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
446     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
447 
448     /* xor with itself is the same as memset(0) */
449     if (src == dest) {
450         access_memset(env, &desta, 0, ra);
451         return 0;
452     }
453 
454     for (i = 0; i < l; i++) {
455         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
456                           access_get_byte(env, &srca2, i, ra);
457 
458         c |= x;
459         access_set_byte(env, &desta, i, x, ra);
460     }
461     return c != 0;
462 }
463 
464 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
465                     uint64_t src)
466 {
467     return do_helper_xc(env, l, dest, src, GETPC());
468 }
469 
470 /* or on array */
471 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
472                              uint64_t src, uintptr_t ra)
473 {
474     const int mmu_idx = cpu_mmu_index(env, false);
475     S390Access srca1, srca2, desta;
476     uint32_t i;
477     uint8_t c = 0;
478 
479     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
480                __func__, l, dest, src);
481 
482     /* OC always processes one more byte than specified - maximum is 256 */
483     l++;
484 
485     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
486     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
487     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
488     for (i = 0; i < l; i++) {
489         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
490                           access_get_byte(env, &srca2, i, ra);
491 
492         c |= x;
493         access_set_byte(env, &desta, i, x, ra);
494     }
495     return c != 0;
496 }
497 
498 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
499                     uint64_t src)
500 {
501     return do_helper_oc(env, l, dest, src, GETPC());
502 }
503 
504 /* memmove */
505 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
506                               uint64_t src, uintptr_t ra)
507 {
508     const int mmu_idx = cpu_mmu_index(env, false);
509     S390Access srca, desta;
510     uint32_t i;
511 
512     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
513                __func__, l, dest, src);
514 
515     /* MVC always copies one more byte than specified - maximum is 256 */
516     l++;
517 
518     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
519     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
520 
521     /*
522      * "When the operands overlap, the result is obtained as if the operands
523      * were processed one byte at a time". Only non-destructive overlaps
524      * behave like memmove().
525      */
526     if (dest == src + 1) {
527         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
528     } else if (!is_destructive_overlap(env, dest, src, l)) {
529         access_memmove(env, &desta, &srca, ra);
530     } else {
531         for (i = 0; i < l; i++) {
532             uint8_t byte = access_get_byte(env, &srca, i, ra);
533 
534             access_set_byte(env, &desta, i, byte, ra);
535         }
536     }
537 
538     return env->cc_op;
539 }
540 
541 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
542 {
543     do_helper_mvc(env, l, dest, src, GETPC());
544 }
545 
546 /* move right to left */
547 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
548 {
549     const int mmu_idx = cpu_mmu_index(env, false);
550     const uint64_t ra = GETPC();
551     S390Access srca, desta;
552     int32_t i;
553 
554     /* MVCRL always copies one more byte than specified - maximum is 256 */
555     l++;
556 
557     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
558     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
559 
560     for (i = l - 1; i >= 0; i--) {
561         uint8_t byte = access_get_byte(env, &srca, i, ra);
562         access_set_byte(env, &desta, i, byte, ra);
563     }
564 }
565 
566 /* move inverse  */
567 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
568 {
569     const int mmu_idx = cpu_mmu_index(env, false);
570     S390Access srca, desta;
571     uintptr_t ra = GETPC();
572     int i;
573 
574     /* MVCIN always copies one more byte than specified - maximum is 256 */
575     l++;
576 
577     src = wrap_address(env, src - l + 1);
578     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
579     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
580     for (i = 0; i < l; i++) {
581         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
582 
583         access_set_byte(env, &desta, i, x, ra);
584     }
585 }
586 
587 /* move numerics  */
588 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
589 {
590     const int mmu_idx = cpu_mmu_index(env, false);
591     S390Access srca1, srca2, desta;
592     uintptr_t ra = GETPC();
593     int i;
594 
595     /* MVN always copies one more byte than specified - maximum is 256 */
596     l++;
597 
598     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
599     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
600     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
601     for (i = 0; i < l; i++) {
602         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
603                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
604 
605         access_set_byte(env, &desta, i, x, ra);
606     }
607 }
608 
609 /* move with offset  */
610 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
611 {
612     const int mmu_idx = cpu_mmu_index(env, false);
613     /* MVO always processes one more byte than specified - maximum is 16 */
614     const int len_dest = (l >> 4) + 1;
615     const int len_src = (l & 0xf) + 1;
616     uintptr_t ra = GETPC();
617     uint8_t byte_dest, byte_src;
618     S390Access srca, desta;
619     int i, j;
620 
621     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
622     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
623 
624     /* Handle rightmost byte */
625     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
626     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
627     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
628     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
629 
630     /* Process remaining bytes from right to left */
631     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
632         byte_dest = byte_src >> 4;
633         if (j >= 0) {
634             byte_src = access_get_byte(env, &srca, j, ra);
635         } else {
636             byte_src = 0;
637         }
638         byte_dest |= byte_src << 4;
639         access_set_byte(env, &desta, i, byte_dest, ra);
640     }
641 }
642 
643 /* move zones  */
644 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
645 {
646     const int mmu_idx = cpu_mmu_index(env, false);
647     S390Access srca1, srca2, desta;
648     uintptr_t ra = GETPC();
649     int i;
650 
651     /* MVZ always copies one more byte than specified - maximum is 256 */
652     l++;
653 
654     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
655     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
656     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
657     for (i = 0; i < l; i++) {
658         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
659                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
660 
661         access_set_byte(env, &desta, i, x, ra);
662     }
663 }
664 
665 /* compare unsigned byte arrays */
666 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
667                               uint64_t s2, uintptr_t ra)
668 {
669     uint32_t i;
670     uint32_t cc = 0;
671 
672     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
673                __func__, l, s1, s2);
674 
675     for (i = 0; i <= l; i++) {
676         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
677         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
678         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
679         if (x < y) {
680             cc = 1;
681             break;
682         } else if (x > y) {
683             cc = 2;
684             break;
685         }
686     }
687 
688     HELPER_LOG("\n");
689     return cc;
690 }
691 
692 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
693 {
694     return do_helper_clc(env, l, s1, s2, GETPC());
695 }
696 
697 /* compare logical under mask */
698 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
699                      uint64_t addr)
700 {
701     uintptr_t ra = GETPC();
702     uint32_t cc = 0;
703 
704     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
705                mask, addr);
706 
707     while (mask) {
708         if (mask & 8) {
709             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
710             uint8_t r = extract32(r1, 24, 8);
711             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
712                        addr);
713             if (r < d) {
714                 cc = 1;
715                 break;
716             } else if (r > d) {
717                 cc = 2;
718                 break;
719             }
720             addr++;
721         }
722         mask = (mask << 1) & 0xf;
723         r1 <<= 8;
724     }
725 
726     HELPER_LOG("\n");
727     return cc;
728 }
729 
730 static inline uint64_t get_address(CPUS390XState *env, int reg)
731 {
732     return wrap_address(env, env->regs[reg]);
733 }
734 
735 /*
736  * Store the address to the given register, zeroing out unused leftmost
737  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
738  */
739 static inline void set_address_zero(CPUS390XState *env, int reg,
740                                     uint64_t address)
741 {
742     if (env->psw.mask & PSW_MASK_64) {
743         env->regs[reg] = address;
744     } else {
745         if (!(env->psw.mask & PSW_MASK_32)) {
746             address &= 0x00ffffff;
747         } else {
748             address &= 0x7fffffff;
749         }
750         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
751     }
752 }
753 
754 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
755 {
756     if (env->psw.mask & PSW_MASK_64) {
757         /* 64-Bit mode */
758         env->regs[reg] = address;
759     } else {
760         if (!(env->psw.mask & PSW_MASK_32)) {
761             /* 24-Bit mode. According to the PoO it is implementation
762             dependent if bits 32-39 remain unchanged or are set to
763             zeros.  Choose the former so that the function can also be
764             used for TRT.  */
765             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
766         } else {
767             /* 31-Bit mode. According to the PoO it is implementation
768             dependent if bit 32 remains unchanged or is set to zero.
769             Choose the latter so that the function can also be used for
770             TRT.  */
771             address &= 0x7fffffff;
772             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
773         }
774     }
775 }
776 
777 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
778 {
779     if (!(env->psw.mask & PSW_MASK_64)) {
780         return (uint32_t)length;
781     }
782     return length;
783 }
784 
785 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
786 {
787     if (!(env->psw.mask & PSW_MASK_64)) {
788         /* 24-Bit and 31-Bit mode */
789         length &= 0x7fffffff;
790     }
791     return length;
792 }
793 
794 static inline uint64_t get_length(CPUS390XState *env, int reg)
795 {
796     return wrap_length31(env, env->regs[reg]);
797 }
798 
799 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
800 {
801     if (env->psw.mask & PSW_MASK_64) {
802         /* 64-Bit mode */
803         env->regs[reg] = length;
804     } else {
805         /* 24-Bit and 31-Bit mode */
806         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
807     }
808 }
809 
810 /* search string (c is byte to search, r2 is string, r1 end of string) */
811 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
812 {
813     uintptr_t ra = GETPC();
814     uint64_t end, str;
815     uint32_t len;
816     uint8_t v, c = env->regs[0];
817 
818     /* Bits 32-55 must contain all 0.  */
819     if (env->regs[0] & 0xffffff00u) {
820         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
821     }
822 
823     str = get_address(env, r2);
824     end = get_address(env, r1);
825 
826     /* Lest we fail to service interrupts in a timely manner, limit the
827        amount of work we're willing to do.  For now, let's cap at 8k.  */
828     for (len = 0; len < 0x2000; ++len) {
829         if (str + len == end) {
830             /* Character not found.  R1 & R2 are unmodified.  */
831             env->cc_op = 2;
832             return;
833         }
834         v = cpu_ldub_data_ra(env, str + len, ra);
835         if (v == c) {
836             /* Character found.  Set R1 to the location; R2 is unmodified.  */
837             env->cc_op = 1;
838             set_address(env, r1, str + len);
839             return;
840         }
841     }
842 
843     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
844     env->cc_op = 3;
845     set_address(env, r2, str + len);
846 }
847 
848 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
849 {
850     uintptr_t ra = GETPC();
851     uint32_t len;
852     uint16_t v, c = env->regs[0];
853     uint64_t end, str, adj_end;
854 
855     /* Bits 32-47 of R0 must be zero.  */
856     if (env->regs[0] & 0xffff0000u) {
857         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
858     }
859 
860     str = get_address(env, r2);
861     end = get_address(env, r1);
862 
863     /* If the LSB of the two addresses differ, use one extra byte.  */
864     adj_end = end + ((str ^ end) & 1);
865 
866     /* Lest we fail to service interrupts in a timely manner, limit the
867        amount of work we're willing to do.  For now, let's cap at 8k.  */
868     for (len = 0; len < 0x2000; len += 2) {
869         if (str + len == adj_end) {
870             /* End of input found.  */
871             env->cc_op = 2;
872             return;
873         }
874         v = cpu_lduw_data_ra(env, str + len, ra);
875         if (v == c) {
876             /* Character found.  Set R1 to the location; R2 is unmodified.  */
877             env->cc_op = 1;
878             set_address(env, r1, str + len);
879             return;
880         }
881     }
882 
883     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
884     env->cc_op = 3;
885     set_address(env, r2, str + len);
886 }
887 
888 /* unsigned string compare (c is string terminator) */
889 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
890 {
891     uintptr_t ra = GETPC();
892     uint32_t len;
893 
894     c = c & 0xff;
895     s1 = wrap_address(env, s1);
896     s2 = wrap_address(env, s2);
897 
898     /* Lest we fail to service interrupts in a timely manner, limit the
899        amount of work we're willing to do.  For now, let's cap at 8k.  */
900     for (len = 0; len < 0x2000; ++len) {
901         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
902         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
903         if (v1 == v2) {
904             if (v1 == c) {
905                 /* Equal.  CC=0, and don't advance the registers.  */
906                 env->cc_op = 0;
907                 return int128_make128(s2, s1);
908             }
909         } else {
910             /* Unequal.  CC={1,2}, and advance the registers.  Note that
911                the terminator need not be zero, but the string that contains
912                the terminator is by definition "low".  */
913             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
914             return int128_make128(s2 + len, s1 + len);
915         }
916     }
917 
918     /* CPU-determined bytes equal; advance the registers.  */
919     env->cc_op = 3;
920     return int128_make128(s2 + len, s1 + len);
921 }
922 
923 /* move page */
924 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
925 {
926     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
927     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
928     const int mmu_idx = cpu_mmu_index(env, false);
929     const bool f = extract64(r0, 11, 1);
930     const bool s = extract64(r0, 10, 1);
931     const bool cco = extract64(r0, 8, 1);
932     uintptr_t ra = GETPC();
933     S390Access srca, desta;
934     int exc;
935 
936     if ((f && s) || extract64(r0, 12, 4)) {
937         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
938     }
939 
940     /*
941      * We always manually handle exceptions such that we can properly store
942      * r1/r2 to the lowcore on page-translation exceptions.
943      *
944      * TODO: Access key handling
945      */
946     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
947                             MMU_DATA_LOAD, mmu_idx, ra);
948     if (exc) {
949         if (cco) {
950             return 2;
951         }
952         goto inject_exc;
953     }
954     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
955                             MMU_DATA_STORE, mmu_idx, ra);
956     if (exc) {
957         if (cco && exc != PGM_PROTECTION) {
958             return 1;
959         }
960         goto inject_exc;
961     }
962     access_memmove(env, &desta, &srca, ra);
963     return 0; /* data moved */
964 inject_exc:
965 #if !defined(CONFIG_USER_ONLY)
966     if (exc != PGM_ADDRESSING) {
967         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
968                  env->tlb_fill_tec);
969     }
970     if (exc == PGM_PAGE_TRANS) {
971         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
972                  r1 << 4 | r2);
973     }
974 #endif
975     tcg_s390_program_interrupt(env, exc, ra);
976 }
977 
978 /* string copy */
979 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
980 {
981     const int mmu_idx = cpu_mmu_index(env, false);
982     const uint64_t d = get_address(env, r1);
983     const uint64_t s = get_address(env, r2);
984     const uint8_t c = env->regs[0];
985     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
986     S390Access srca, desta;
987     uintptr_t ra = GETPC();
988     int i;
989 
990     if (env->regs[0] & 0xffffff00ull) {
991         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
992     }
993 
994     /*
995      * Our access should not exceed single pages, as we must not report access
996      * exceptions exceeding the actually copied range (which we don't know at
997      * this point). We might over-indicate watchpoints within the pages
998      * (if we ever care, we have to limit processing to a single byte).
999      */
1000     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
1001     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
1002     for (i = 0; i < len; i++) {
1003         const uint8_t v = access_get_byte(env, &srca, i, ra);
1004 
1005         access_set_byte(env, &desta, i, v, ra);
1006         if (v == c) {
1007             set_address_zero(env, r1, d + i);
1008             return 1;
1009         }
1010     }
1011     set_address_zero(env, r1, d + len);
1012     set_address_zero(env, r2, s + len);
1013     return 3;
1014 }
1015 
1016 /* load access registers r1 to r3 from memory at a2 */
1017 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1018 {
1019     uintptr_t ra = GETPC();
1020     int i;
1021 
1022     if (a2 & 0x3) {
1023         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1024     }
1025 
1026     for (i = r1;; i = (i + 1) % 16) {
1027         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1028         a2 += 4;
1029 
1030         if (i == r3) {
1031             break;
1032         }
1033     }
1034 }
1035 
1036 /* store access registers r1 to r3 in memory at a2 */
1037 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1038 {
1039     uintptr_t ra = GETPC();
1040     int i;
1041 
1042     if (a2 & 0x3) {
1043         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1044     }
1045 
1046     for (i = r1;; i = (i + 1) % 16) {
1047         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1048         a2 += 4;
1049 
1050         if (i == r3) {
1051             break;
1052         }
1053     }
1054 }
1055 
1056 /* move long helper */
1057 static inline uint32_t do_mvcl(CPUS390XState *env,
1058                                uint64_t *dest, uint64_t *destlen,
1059                                uint64_t *src, uint64_t *srclen,
1060                                uint16_t pad, int wordsize, uintptr_t ra)
1061 {
1062     const int mmu_idx = cpu_mmu_index(env, false);
1063     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1064     S390Access srca, desta;
1065     int i, cc;
1066 
1067     if (*destlen == *srclen) {
1068         cc = 0;
1069     } else if (*destlen < *srclen) {
1070         cc = 1;
1071     } else {
1072         cc = 2;
1073     }
1074 
1075     if (!*destlen) {
1076         return cc;
1077     }
1078 
1079     /*
1080      * Only perform one type of type of operation (move/pad) at a time.
1081      * Stay within single pages.
1082      */
1083     if (*srclen) {
1084         /* Copy the src array */
1085         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1086         *destlen -= len;
1087         *srclen -= len;
1088         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1089         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1090         access_memmove(env, &desta, &srca, ra);
1091         *src = wrap_address(env, *src + len);
1092         *dest = wrap_address(env, *dest + len);
1093     } else if (wordsize == 1) {
1094         /* Pad the remaining area */
1095         *destlen -= len;
1096         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1097         access_memset(env, &desta, pad, ra);
1098         *dest = wrap_address(env, *dest + len);
1099     } else {
1100         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1101 
1102         /* The remaining length selects the padding byte. */
1103         for (i = 0; i < len; (*destlen)--, i++) {
1104             if (*destlen & 1) {
1105                 access_set_byte(env, &desta, i, pad, ra);
1106             } else {
1107                 access_set_byte(env, &desta, i, pad >> 8, ra);
1108             }
1109         }
1110         *dest = wrap_address(env, *dest + len);
1111     }
1112 
1113     return *destlen ? 3 : cc;
1114 }
1115 
1116 /* move long */
1117 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1118 {
1119     const int mmu_idx = cpu_mmu_index(env, false);
1120     uintptr_t ra = GETPC();
1121     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1122     uint64_t dest = get_address(env, r1);
1123     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1124     uint64_t src = get_address(env, r2);
1125     uint8_t pad = env->regs[r2 + 1] >> 24;
1126     CPUState *cs = env_cpu(env);
1127     S390Access srca, desta;
1128     uint32_t cc, cur_len;
1129 
1130     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1131         cc = 3;
1132     } else if (srclen == destlen) {
1133         cc = 0;
1134     } else if (destlen < srclen) {
1135         cc = 1;
1136     } else {
1137         cc = 2;
1138     }
1139 
1140     /* We might have to zero-out some bits even if there was no action. */
1141     if (unlikely(!destlen || cc == 3)) {
1142         set_address_zero(env, r2, src);
1143         set_address_zero(env, r1, dest);
1144         return cc;
1145     } else if (!srclen) {
1146         set_address_zero(env, r2, src);
1147     }
1148 
1149     /*
1150      * Only perform one type of type of operation (move/pad) in one step.
1151      * Stay within single pages.
1152      */
1153     while (destlen) {
1154         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1155         if (!srclen) {
1156             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1157                                    ra);
1158             access_memset(env, &desta, pad, ra);
1159         } else {
1160             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1161 
1162             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1163                                   ra);
1164             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1165                                    ra);
1166             access_memmove(env, &desta, &srca, ra);
1167             src = wrap_address(env, src + cur_len);
1168             srclen -= cur_len;
1169             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1170             set_address_zero(env, r2, src);
1171         }
1172         dest = wrap_address(env, dest + cur_len);
1173         destlen -= cur_len;
1174         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1175         set_address_zero(env, r1, dest);
1176 
1177         /*
1178          * MVCL is interruptible. Return to the main loop if requested after
1179          * writing back all state to registers. If no interrupt will get
1180          * injected, we'll end up back in this handler and continue processing
1181          * the remaining parts.
1182          */
1183         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1184             cpu_loop_exit_restore(cs, ra);
1185         }
1186     }
1187     return cc;
1188 }
1189 
1190 /* move long extended */
1191 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1192                        uint32_t r3)
1193 {
1194     uintptr_t ra = GETPC();
1195     uint64_t destlen = get_length(env, r1 + 1);
1196     uint64_t dest = get_address(env, r1);
1197     uint64_t srclen = get_length(env, r3 + 1);
1198     uint64_t src = get_address(env, r3);
1199     uint8_t pad = a2;
1200     uint32_t cc;
1201 
1202     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1203 
1204     set_length(env, r1 + 1, destlen);
1205     set_length(env, r3 + 1, srclen);
1206     set_address(env, r1, dest);
1207     set_address(env, r3, src);
1208 
1209     return cc;
1210 }
1211 
1212 /* move long unicode */
1213 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1214                        uint32_t r3)
1215 {
1216     uintptr_t ra = GETPC();
1217     uint64_t destlen = get_length(env, r1 + 1);
1218     uint64_t dest = get_address(env, r1);
1219     uint64_t srclen = get_length(env, r3 + 1);
1220     uint64_t src = get_address(env, r3);
1221     uint16_t pad = a2;
1222     uint32_t cc;
1223 
1224     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1225 
1226     set_length(env, r1 + 1, destlen);
1227     set_length(env, r3 + 1, srclen);
1228     set_address(env, r1, dest);
1229     set_address(env, r3, src);
1230 
1231     return cc;
1232 }
1233 
1234 /* compare logical long helper */
1235 static inline uint32_t do_clcl(CPUS390XState *env,
1236                                uint64_t *src1, uint64_t *src1len,
1237                                uint64_t *src3, uint64_t *src3len,
1238                                uint16_t pad, uint64_t limit,
1239                                int wordsize, uintptr_t ra)
1240 {
1241     uint64_t len = MAX(*src1len, *src3len);
1242     uint32_t cc = 0;
1243 
1244     check_alignment(env, *src1len | *src3len, wordsize, ra);
1245 
1246     if (!len) {
1247         return cc;
1248     }
1249 
1250     /* Lest we fail to service interrupts in a timely manner, limit the
1251        amount of work we're willing to do.  */
1252     if (len > limit) {
1253         len = limit;
1254         cc = 3;
1255     }
1256 
1257     for (; len; len -= wordsize) {
1258         uint16_t v1 = pad;
1259         uint16_t v3 = pad;
1260 
1261         if (*src1len) {
1262             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1263         }
1264         if (*src3len) {
1265             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1266         }
1267 
1268         if (v1 != v3) {
1269             cc = (v1 < v3) ? 1 : 2;
1270             break;
1271         }
1272 
1273         if (*src1len) {
1274             *src1 += wordsize;
1275             *src1len -= wordsize;
1276         }
1277         if (*src3len) {
1278             *src3 += wordsize;
1279             *src3len -= wordsize;
1280         }
1281     }
1282 
1283     return cc;
1284 }
1285 
1286 
1287 /* compare logical long */
1288 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1289 {
1290     uintptr_t ra = GETPC();
1291     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1292     uint64_t src1 = get_address(env, r1);
1293     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1294     uint64_t src3 = get_address(env, r2);
1295     uint8_t pad = env->regs[r2 + 1] >> 24;
1296     uint32_t cc;
1297 
1298     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1299 
1300     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1301     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1302     set_address(env, r1, src1);
1303     set_address(env, r2, src3);
1304 
1305     return cc;
1306 }
1307 
1308 /* compare logical long extended memcompare insn with padding */
1309 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1310                        uint32_t r3)
1311 {
1312     uintptr_t ra = GETPC();
1313     uint64_t src1len = get_length(env, r1 + 1);
1314     uint64_t src1 = get_address(env, r1);
1315     uint64_t src3len = get_length(env, r3 + 1);
1316     uint64_t src3 = get_address(env, r3);
1317     uint8_t pad = a2;
1318     uint32_t cc;
1319 
1320     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1321 
1322     set_length(env, r1 + 1, src1len);
1323     set_length(env, r3 + 1, src3len);
1324     set_address(env, r1, src1);
1325     set_address(env, r3, src3);
1326 
1327     return cc;
1328 }
1329 
1330 /* compare logical long unicode memcompare insn with padding */
1331 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1332                        uint32_t r3)
1333 {
1334     uintptr_t ra = GETPC();
1335     uint64_t src1len = get_length(env, r1 + 1);
1336     uint64_t src1 = get_address(env, r1);
1337     uint64_t src3len = get_length(env, r3 + 1);
1338     uint64_t src3 = get_address(env, r3);
1339     uint16_t pad = a2;
1340     uint32_t cc = 0;
1341 
1342     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1343 
1344     set_length(env, r1 + 1, src1len);
1345     set_length(env, r3 + 1, src3len);
1346     set_address(env, r1, src1);
1347     set_address(env, r3, src3);
1348 
1349     return cc;
1350 }
1351 
1352 /* checksum */
1353 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1354                       uint64_t src, uint64_t src_len)
1355 {
1356     uintptr_t ra = GETPC();
1357     uint64_t max_len, len;
1358     uint64_t cksm = (uint32_t)r1;
1359 
1360     /* Lest we fail to service interrupts in a timely manner, limit the
1361        amount of work we're willing to do.  For now, let's cap at 8k.  */
1362     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1363 
1364     /* Process full words as available.  */
1365     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1366         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1367     }
1368 
1369     switch (max_len - len) {
1370     case 1:
1371         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1372         len += 1;
1373         break;
1374     case 2:
1375         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1376         len += 2;
1377         break;
1378     case 3:
1379         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1380         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1381         len += 3;
1382         break;
1383     }
1384 
1385     /* Fold the carry from the checksum.  Note that we can see carry-out
1386        during folding more than once (but probably not more than twice).  */
1387     while (cksm > 0xffffffffull) {
1388         cksm = (uint32_t)cksm + (cksm >> 32);
1389     }
1390 
1391     /* Indicate whether or not we've processed everything.  */
1392     env->cc_op = (len == src_len ? 0 : 3);
1393 
1394     /* Return both cksm and processed length.  */
1395     env->retxl = cksm;
1396     return len;
1397 }
1398 
1399 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1400 {
1401     uintptr_t ra = GETPC();
1402     int len_dest = len >> 4;
1403     int len_src = len & 0xf;
1404     uint8_t b;
1405 
1406     dest += len_dest;
1407     src += len_src;
1408 
1409     /* last byte is special, it only flips the nibbles */
1410     b = cpu_ldub_data_ra(env, src, ra);
1411     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1412     src--;
1413     len_src--;
1414 
1415     /* now pack every value */
1416     while (len_dest > 0) {
1417         b = 0;
1418 
1419         if (len_src >= 0) {
1420             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1421             src--;
1422             len_src--;
1423         }
1424         if (len_src >= 0) {
1425             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1426             src--;
1427             len_src--;
1428         }
1429 
1430         len_dest--;
1431         dest--;
1432         cpu_stb_data_ra(env, dest, b, ra);
1433     }
1434 }
1435 
1436 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1437                            uint32_t srclen, int ssize, uintptr_t ra)
1438 {
1439     int i;
1440     /* The destination operand is always 16 bytes long.  */
1441     const int destlen = 16;
1442 
1443     /* The operands are processed from right to left.  */
1444     src += srclen - 1;
1445     dest += destlen - 1;
1446 
1447     for (i = 0; i < destlen; i++) {
1448         uint8_t b = 0;
1449 
1450         /* Start with a positive sign */
1451         if (i == 0) {
1452             b = 0xc;
1453         } else if (srclen > ssize) {
1454             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1455             src -= ssize;
1456             srclen -= ssize;
1457         }
1458 
1459         if (srclen > ssize) {
1460             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1461             src -= ssize;
1462             srclen -= ssize;
1463         }
1464 
1465         cpu_stb_data_ra(env, dest, b, ra);
1466         dest--;
1467     }
1468 }
1469 
1470 
1471 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                  uint32_t srclen)
1473 {
1474     do_pkau(env, dest, src, srclen, 1, GETPC());
1475 }
1476 
1477 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1478                  uint32_t srclen)
1479 {
1480     do_pkau(env, dest, src, srclen, 2, GETPC());
1481 }
1482 
1483 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1484                   uint64_t src)
1485 {
1486     uintptr_t ra = GETPC();
1487     int len_dest = len >> 4;
1488     int len_src = len & 0xf;
1489     uint8_t b;
1490     int second_nibble = 0;
1491 
1492     dest += len_dest;
1493     src += len_src;
1494 
1495     /* last byte is special, it only flips the nibbles */
1496     b = cpu_ldub_data_ra(env, src, ra);
1497     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1498     src--;
1499     len_src--;
1500 
1501     /* now pad every nibble with 0xf0 */
1502 
1503     while (len_dest > 0) {
1504         uint8_t cur_byte = 0;
1505 
1506         if (len_src > 0) {
1507             cur_byte = cpu_ldub_data_ra(env, src, ra);
1508         }
1509 
1510         len_dest--;
1511         dest--;
1512 
1513         /* only advance one nibble at a time */
1514         if (second_nibble) {
1515             cur_byte >>= 4;
1516             len_src--;
1517             src--;
1518         }
1519         second_nibble = !second_nibble;
1520 
1521         /* digit */
1522         cur_byte = (cur_byte & 0xf);
1523         /* zone bits */
1524         cur_byte |= 0xf0;
1525 
1526         cpu_stb_data_ra(env, dest, cur_byte, ra);
1527     }
1528 }
1529 
1530 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1531                                  uint32_t destlen, int dsize, uint64_t src,
1532                                  uintptr_t ra)
1533 {
1534     int i;
1535     uint32_t cc;
1536     uint8_t b;
1537     /* The source operand is always 16 bytes long.  */
1538     const int srclen = 16;
1539 
1540     /* The operands are processed from right to left.  */
1541     src += srclen - 1;
1542     dest += destlen - dsize;
1543 
1544     /* Check for the sign.  */
1545     b = cpu_ldub_data_ra(env, src, ra);
1546     src--;
1547     switch (b & 0xf) {
1548     case 0xa:
1549     case 0xc:
1550     case 0xe ... 0xf:
1551         cc = 0;  /* plus */
1552         break;
1553     case 0xb:
1554     case 0xd:
1555         cc = 1;  /* minus */
1556         break;
1557     default:
1558     case 0x0 ... 0x9:
1559         cc = 3;  /* invalid */
1560         break;
1561     }
1562 
1563     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1564     for (i = 0; i < destlen; i += dsize) {
1565         if (i == (31 * dsize)) {
1566             /* If length is 32/64 bytes, the leftmost byte is 0. */
1567             b = 0;
1568         } else if (i % (2 * dsize)) {
1569             b = cpu_ldub_data_ra(env, src, ra);
1570             src--;
1571         } else {
1572             b >>= 4;
1573         }
1574         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1575         dest -= dsize;
1576     }
1577 
1578     return cc;
1579 }
1580 
1581 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                        uint64_t src)
1583 {
1584     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1585 }
1586 
1587 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1588                        uint64_t src)
1589 {
1590     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1591 }
1592 
1593 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1594 {
1595     uintptr_t ra = GETPC();
1596     uint32_t cc = 0;
1597     int i;
1598 
1599     for (i = 0; i < destlen; i++) {
1600         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1601         /* digit */
1602         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1603 
1604         if (i == (destlen - 1)) {
1605             /* sign */
1606             cc |= (b & 0xf) < 0xa ? 1 : 0;
1607         } else {
1608             /* digit */
1609             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1610         }
1611     }
1612 
1613     return cc;
1614 }
1615 
1616 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1617                              uint64_t trans, uintptr_t ra)
1618 {
1619     uint32_t i;
1620 
1621     for (i = 0; i <= len; i++) {
1622         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1623         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1624         cpu_stb_data_ra(env, array + i, new_byte, ra);
1625     }
1626 
1627     return env->cc_op;
1628 }
1629 
1630 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1631                 uint64_t trans)
1632 {
1633     do_helper_tr(env, len, array, trans, GETPC());
1634 }
1635 
1636 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1637                      uint64_t len, uint64_t trans)
1638 {
1639     uintptr_t ra = GETPC();
1640     uint8_t end = env->regs[0] & 0xff;
1641     uint64_t l = len;
1642     uint64_t i;
1643     uint32_t cc = 0;
1644 
1645     if (!(env->psw.mask & PSW_MASK_64)) {
1646         array &= 0x7fffffff;
1647         l = (uint32_t)l;
1648     }
1649 
1650     /* Lest we fail to service interrupts in a timely manner, limit the
1651        amount of work we're willing to do.  For now, let's cap at 8k.  */
1652     if (l > 0x2000) {
1653         l = 0x2000;
1654         cc = 3;
1655     }
1656 
1657     for (i = 0; i < l; i++) {
1658         uint8_t byte, new_byte;
1659 
1660         byte = cpu_ldub_data_ra(env, array + i, ra);
1661 
1662         if (byte == end) {
1663             cc = 1;
1664             break;
1665         }
1666 
1667         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1668         cpu_stb_data_ra(env, array + i, new_byte, ra);
1669     }
1670 
1671     env->cc_op = cc;
1672     env->retxl = len - i;
1673     return array + i;
1674 }
1675 
1676 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1677                                      uint64_t array, uint64_t trans,
1678                                      int inc, uintptr_t ra)
1679 {
1680     int i;
1681 
1682     for (i = 0; i <= len; i++) {
1683         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1684         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1685 
1686         if (sbyte != 0) {
1687             set_address(env, 1, array + i * inc);
1688             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1689             return (i == len) ? 2 : 1;
1690         }
1691     }
1692 
1693     return 0;
1694 }
1695 
1696 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1697                                   uint64_t array, uint64_t trans,
1698                                   uintptr_t ra)
1699 {
1700     return do_helper_trt(env, len, array, trans, 1, ra);
1701 }
1702 
1703 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1704                      uint64_t trans)
1705 {
1706     return do_helper_trt(env, len, array, trans, 1, GETPC());
1707 }
1708 
1709 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1710                                    uint64_t array, uint64_t trans,
1711                                    uintptr_t ra)
1712 {
1713     return do_helper_trt(env, len, array, trans, -1, ra);
1714 }
1715 
1716 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1717                       uint64_t trans)
1718 {
1719     return do_helper_trt(env, len, array, trans, -1, GETPC());
1720 }
1721 
1722 /* Translate one/two to one/two */
1723 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1724                       uint32_t tst, uint32_t sizes)
1725 {
1726     uintptr_t ra = GETPC();
1727     int dsize = (sizes & 1) ? 1 : 2;
1728     int ssize = (sizes & 2) ? 1 : 2;
1729     uint64_t tbl = get_address(env, 1);
1730     uint64_t dst = get_address(env, r1);
1731     uint64_t len = get_length(env, r1 + 1);
1732     uint64_t src = get_address(env, r2);
1733     uint32_t cc = 3;
1734     int i;
1735 
1736     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1737        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1738        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1739     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1740         tbl &= -4096;
1741     } else {
1742         tbl &= -8;
1743     }
1744 
1745     check_alignment(env, len, ssize, ra);
1746 
1747     /* Lest we fail to service interrupts in a timely manner, */
1748     /* limit the amount of work we're willing to do.   */
1749     for (i = 0; i < 0x2000; i++) {
1750         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1751         uint64_t tble = tbl + (sval * dsize);
1752         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1753         if (dval == tst) {
1754             cc = 1;
1755             break;
1756         }
1757         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1758 
1759         len -= ssize;
1760         src += ssize;
1761         dst += dsize;
1762 
1763         if (len == 0) {
1764             cc = 0;
1765             break;
1766         }
1767     }
1768 
1769     set_address(env, r1, dst);
1770     set_length(env, r1 + 1, len);
1771     set_address(env, r2, src);
1772 
1773     return cc;
1774 }
1775 
1776 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1777                   uint32_t r1, uint32_t r3)
1778 {
1779     uintptr_t ra = GETPC();
1780     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1781     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1782     Int128 oldv;
1783     uint64_t oldh, oldl;
1784     bool fail;
1785 
1786     check_alignment(env, addr, 16, ra);
1787 
1788     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1789     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1790 
1791     oldv = int128_make128(oldl, oldh);
1792     fail = !int128_eq(oldv, cmpv);
1793     if (fail) {
1794         newv = oldv;
1795     }
1796 
1797     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1798     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1799 
1800     env->cc_op = fail;
1801     env->regs[r1] = int128_gethi(oldv);
1802     env->regs[r1 + 1] = int128_getlo(oldv);
1803 }
1804 
1805 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1806                            uint32_t r1, uint32_t r3)
1807 {
1808     uintptr_t ra = GETPC();
1809     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1810     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1811     int mem_idx;
1812     MemOpIdx oi;
1813     Int128 oldv;
1814     bool fail;
1815 
1816     assert(HAVE_CMPXCHG128);
1817 
1818     mem_idx = cpu_mmu_index(env, false);
1819     oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1820     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1821     fail = !int128_eq(oldv, cmpv);
1822 
1823     env->cc_op = fail;
1824     env->regs[r1] = int128_gethi(oldv);
1825     env->regs[r1 + 1] = int128_getlo(oldv);
1826 }
1827 
1828 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1829                         uint64_t a2, bool parallel)
1830 {
1831     uint32_t mem_idx = cpu_mmu_index(env, false);
1832     uintptr_t ra = GETPC();
1833     uint32_t fc = extract32(env->regs[0], 0, 8);
1834     uint32_t sc = extract32(env->regs[0], 8, 8);
1835     uint64_t pl = get_address(env, 1) & -16;
1836     uint64_t svh, svl;
1837     uint32_t cc;
1838 
1839     /* Sanity check the function code and storage characteristic.  */
1840     if (fc > 1 || sc > 3) {
1841         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1842             goto spec_exception;
1843         }
1844         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1845             goto spec_exception;
1846         }
1847     }
1848 
1849     /* Sanity check the alignments.  */
1850     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1851         goto spec_exception;
1852     }
1853 
1854     /* Sanity check writability of the store address.  */
1855     probe_write(env, a2, 1 << sc, mem_idx, ra);
1856 
1857     /*
1858      * Note that the compare-and-swap is atomic, and the store is atomic,
1859      * but the complete operation is not.  Therefore we do not need to
1860      * assert serial context in order to implement this.  That said,
1861      * restart early if we can't support either operation that is supposed
1862      * to be atomic.
1863      */
1864     if (parallel) {
1865         uint32_t max = 2;
1866 #ifdef CONFIG_ATOMIC64
1867         max = 3;
1868 #endif
1869         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1870             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1871             cpu_loop_exit_atomic(env_cpu(env), ra);
1872         }
1873     }
1874 
1875     /* All loads happen before all stores.  For simplicity, load the entire
1876        store value area from the parameter list.  */
1877     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1878     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1879 
1880     switch (fc) {
1881     case 0:
1882         {
1883             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1884             uint32_t cv = env->regs[r3];
1885             uint32_t ov;
1886 
1887             if (parallel) {
1888 #ifdef CONFIG_USER_ONLY
1889                 uint32_t *haddr = g2h(env_cpu(env), a1);
1890                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1891 #else
1892                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1893                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1894 #endif
1895             } else {
1896                 ov = cpu_ldl_data_ra(env, a1, ra);
1897                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1898             }
1899             cc = (ov != cv);
1900             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1901         }
1902         break;
1903 
1904     case 1:
1905         {
1906             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1907             uint64_t cv = env->regs[r3];
1908             uint64_t ov;
1909 
1910             if (parallel) {
1911 #ifdef CONFIG_ATOMIC64
1912                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1913                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1914 #else
1915                 /* Note that we asserted !parallel above.  */
1916                 g_assert_not_reached();
1917 #endif
1918             } else {
1919                 ov = cpu_ldq_data_ra(env, a1, ra);
1920                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1921             }
1922             cc = (ov != cv);
1923             env->regs[r3] = ov;
1924         }
1925         break;
1926 
1927     case 2:
1928         {
1929             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1930             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1931             Int128 nv = int128_make128(nvl, nvh);
1932             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1933             Int128 ov;
1934 
1935             if (!parallel) {
1936                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1937                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1938 
1939                 ov = int128_make128(ol, oh);
1940                 cc = !int128_eq(ov, cv);
1941                 if (cc) {
1942                     nv = ov;
1943                 }
1944 
1945                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1946                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1947             } else if (HAVE_CMPXCHG128) {
1948                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1949                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1950                 cc = !int128_eq(ov, cv);
1951             } else {
1952                 /* Note that we asserted !parallel above.  */
1953                 g_assert_not_reached();
1954             }
1955 
1956             env->regs[r3 + 0] = int128_gethi(ov);
1957             env->regs[r3 + 1] = int128_getlo(ov);
1958         }
1959         break;
1960 
1961     default:
1962         g_assert_not_reached();
1963     }
1964 
1965     /* Store only if the comparison succeeded.  Note that above we use a pair
1966        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1967        from the most-significant bits of svh.  */
1968     if (cc == 0) {
1969         switch (sc) {
1970         case 0:
1971             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1972             break;
1973         case 1:
1974             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1975             break;
1976         case 2:
1977             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1978             break;
1979         case 3:
1980             cpu_stq_data_ra(env, a2, svh, ra);
1981             break;
1982         case 4:
1983             if (!parallel) {
1984                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1985                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1986             } else if (HAVE_ATOMIC128) {
1987                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1988                 Int128 sv = int128_make128(svl, svh);
1989                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1990             } else {
1991                 /* Note that we asserted !parallel above.  */
1992                 g_assert_not_reached();
1993             }
1994             break;
1995         default:
1996             g_assert_not_reached();
1997         }
1998     }
1999 
2000     return cc;
2001 
2002  spec_exception:
2003     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2004 }
2005 
2006 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2007 {
2008     return do_csst(env, r3, a1, a2, false);
2009 }
2010 
2011 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2012                                uint64_t a2)
2013 {
2014     return do_csst(env, r3, a1, a2, true);
2015 }
2016 
2017 #if !defined(CONFIG_USER_ONLY)
2018 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2019 {
2020     uintptr_t ra = GETPC();
2021     bool PERchanged = false;
2022     uint64_t src = a2;
2023     uint32_t i;
2024 
2025     if (src & 0x7) {
2026         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2027     }
2028 
2029     for (i = r1;; i = (i + 1) % 16) {
2030         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2031         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2032             PERchanged = true;
2033         }
2034         env->cregs[i] = val;
2035         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2036                    i, src, val);
2037         src += sizeof(uint64_t);
2038 
2039         if (i == r3) {
2040             break;
2041         }
2042     }
2043 
2044     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2045         s390_cpu_recompute_watchpoints(env_cpu(env));
2046     }
2047 
2048     tlb_flush(env_cpu(env));
2049 }
2050 
2051 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2052 {
2053     uintptr_t ra = GETPC();
2054     bool PERchanged = false;
2055     uint64_t src = a2;
2056     uint32_t i;
2057 
2058     if (src & 0x3) {
2059         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2060     }
2061 
2062     for (i = r1;; i = (i + 1) % 16) {
2063         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2064         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2065             PERchanged = true;
2066         }
2067         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2068         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2069         src += sizeof(uint32_t);
2070 
2071         if (i == r3) {
2072             break;
2073         }
2074     }
2075 
2076     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2077         s390_cpu_recompute_watchpoints(env_cpu(env));
2078     }
2079 
2080     tlb_flush(env_cpu(env));
2081 }
2082 
2083 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2084 {
2085     uintptr_t ra = GETPC();
2086     uint64_t dest = a2;
2087     uint32_t i;
2088 
2089     if (dest & 0x7) {
2090         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2091     }
2092 
2093     for (i = r1;; i = (i + 1) % 16) {
2094         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2095         dest += sizeof(uint64_t);
2096 
2097         if (i == r3) {
2098             break;
2099         }
2100     }
2101 }
2102 
2103 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2104 {
2105     uintptr_t ra = GETPC();
2106     uint64_t dest = a2;
2107     uint32_t i;
2108 
2109     if (dest & 0x3) {
2110         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2111     }
2112 
2113     for (i = r1;; i = (i + 1) % 16) {
2114         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2115         dest += sizeof(uint32_t);
2116 
2117         if (i == r3) {
2118             break;
2119         }
2120     }
2121 }
2122 
2123 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2124 {
2125     uintptr_t ra = GETPC();
2126     int i;
2127 
2128     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2129 
2130     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2131         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2132     }
2133 
2134     return 0;
2135 }
2136 
2137 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2138 {
2139     S390CPU *cpu = env_archcpu(env);
2140     CPUState *cs = env_cpu(env);
2141 
2142     /*
2143      * TODO: we currently don't handle all access protection types
2144      * (including access-list and key-controlled) as well as AR mode.
2145      */
2146     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2147         /* Fetching permitted; storing permitted */
2148         return 0;
2149     }
2150 
2151     if (env->int_pgm_code == PGM_PROTECTION) {
2152         /* retry if reading is possible */
2153         cs->exception_index = -1;
2154         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2155             /* Fetching permitted; storing not permitted */
2156             return 1;
2157         }
2158     }
2159 
2160     switch (env->int_pgm_code) {
2161     case PGM_PROTECTION:
2162         /* Fetching not permitted; storing not permitted */
2163         cs->exception_index = -1;
2164         return 2;
2165     case PGM_ADDRESSING:
2166     case PGM_TRANS_SPEC:
2167         /* exceptions forwarded to the guest */
2168         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2169         return 0;
2170     }
2171 
2172     /* Translation not available */
2173     cs->exception_index = -1;
2174     return 3;
2175 }
2176 
2177 /* insert storage key extended */
2178 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2179 {
2180     static S390SKeysState *ss;
2181     static S390SKeysClass *skeyclass;
2182     uint64_t addr = wrap_address(env, r2);
2183     uint8_t key;
2184     int rc;
2185 
2186     addr = mmu_real2abs(env, addr);
2187     if (!mmu_absolute_addr_valid(addr, false)) {
2188         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2189     }
2190 
2191     if (unlikely(!ss)) {
2192         ss = s390_get_skeys_device();
2193         skeyclass = S390_SKEYS_GET_CLASS(ss);
2194         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2195             tlb_flush_all_cpus_synced(env_cpu(env));
2196         }
2197     }
2198 
2199     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2200     if (rc) {
2201         trace_get_skeys_nonzero(rc);
2202         return 0;
2203     }
2204     return key;
2205 }
2206 
2207 /* set storage key extended */
2208 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2209 {
2210     static S390SKeysState *ss;
2211     static S390SKeysClass *skeyclass;
2212     uint64_t addr = wrap_address(env, r2);
2213     uint8_t key;
2214     int rc;
2215 
2216     addr = mmu_real2abs(env, addr);
2217     if (!mmu_absolute_addr_valid(addr, false)) {
2218         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2219     }
2220 
2221     if (unlikely(!ss)) {
2222         ss = s390_get_skeys_device();
2223         skeyclass = S390_SKEYS_GET_CLASS(ss);
2224         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2225             tlb_flush_all_cpus_synced(env_cpu(env));
2226         }
2227     }
2228 
2229     key = r1 & 0xfe;
2230     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2231     if (rc) {
2232         trace_set_skeys_nonzero(rc);
2233     }
2234    /*
2235     * As we can only flush by virtual address and not all the entries
2236     * that point to a physical address we have to flush the whole TLB.
2237     */
2238     tlb_flush_all_cpus_synced(env_cpu(env));
2239 }
2240 
2241 /* reset reference bit extended */
2242 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2243 {
2244     uint64_t addr = wrap_address(env, r2);
2245     static S390SKeysState *ss;
2246     static S390SKeysClass *skeyclass;
2247     uint8_t re, key;
2248     int rc;
2249 
2250     addr = mmu_real2abs(env, addr);
2251     if (!mmu_absolute_addr_valid(addr, false)) {
2252         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2253     }
2254 
2255     if (unlikely(!ss)) {
2256         ss = s390_get_skeys_device();
2257         skeyclass = S390_SKEYS_GET_CLASS(ss);
2258         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2259             tlb_flush_all_cpus_synced(env_cpu(env));
2260         }
2261     }
2262 
2263     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2264     if (rc) {
2265         trace_get_skeys_nonzero(rc);
2266         return 0;
2267     }
2268 
2269     re = key & (SK_R | SK_C);
2270     key &= ~SK_R;
2271 
2272     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2273     if (rc) {
2274         trace_set_skeys_nonzero(rc);
2275         return 0;
2276     }
2277    /*
2278     * As we can only flush by virtual address and not all the entries
2279     * that point to a physical address we have to flush the whole TLB.
2280     */
2281     tlb_flush_all_cpus_synced(env_cpu(env));
2282 
2283     /*
2284      * cc
2285      *
2286      * 0  Reference bit zero; change bit zero
2287      * 1  Reference bit zero; change bit one
2288      * 2  Reference bit one; change bit zero
2289      * 3  Reference bit one; change bit one
2290      */
2291 
2292     return re >> 1;
2293 }
2294 
2295 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2296                       uint64_t key)
2297 {
2298     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2299     S390Access srca, desta;
2300     uintptr_t ra = GETPC();
2301     int cc = 0;
2302 
2303     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2304                __func__, l, a1, a2);
2305 
2306     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2307         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2308         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2309     }
2310 
2311     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2312         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2313     }
2314 
2315     l = wrap_length32(env, l);
2316     if (l > 256) {
2317         /* max 256 */
2318         l = 256;
2319         cc = 3;
2320     } else if (!l) {
2321         return cc;
2322     }
2323 
2324     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2325     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2326     access_memmove(env, &desta, &srca, ra);
2327     return cc;
2328 }
2329 
2330 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2331                       uint64_t key)
2332 {
2333     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2334     S390Access srca, desta;
2335     uintptr_t ra = GETPC();
2336     int cc = 0;
2337 
2338     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2339                __func__, l, a1, a2);
2340 
2341     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2342         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2343         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2344     }
2345 
2346     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2347         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2348     }
2349 
2350     l = wrap_length32(env, l);
2351     if (l > 256) {
2352         /* max 256 */
2353         l = 256;
2354         cc = 3;
2355     } else if (!l) {
2356         return cc;
2357     }
2358 
2359     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2360     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2361     access_memmove(env, &desta, &srca, ra);
2362     return cc;
2363 }
2364 
2365 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2366 {
2367     CPUState *cs = env_cpu(env);
2368     const uintptr_t ra = GETPC();
2369     uint64_t table, entry, raddr;
2370     uint16_t entries, i, index = 0;
2371 
2372     if (r2 & 0xff000) {
2373         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2374     }
2375 
2376     if (!(r2 & 0x800)) {
2377         /* invalidation-and-clearing operation */
2378         table = r1 & ASCE_ORIGIN;
2379         entries = (r2 & 0x7ff) + 1;
2380 
2381         switch (r1 & ASCE_TYPE_MASK) {
2382         case ASCE_TYPE_REGION1:
2383             index = (r2 >> 53) & 0x7ff;
2384             break;
2385         case ASCE_TYPE_REGION2:
2386             index = (r2 >> 42) & 0x7ff;
2387             break;
2388         case ASCE_TYPE_REGION3:
2389             index = (r2 >> 31) & 0x7ff;
2390             break;
2391         case ASCE_TYPE_SEGMENT:
2392             index = (r2 >> 20) & 0x7ff;
2393             break;
2394         }
2395         for (i = 0; i < entries; i++) {
2396             /* addresses are not wrapped in 24/31bit mode but table index is */
2397             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2398             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2399             if (!(entry & REGION_ENTRY_I)) {
2400                 /* we are allowed to not store if already invalid */
2401                 entry |= REGION_ENTRY_I;
2402                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2403             }
2404         }
2405     }
2406 
2407     /* We simply flush the complete tlb, therefore we can ignore r3. */
2408     if (m4 & 1) {
2409         tlb_flush(cs);
2410     } else {
2411         tlb_flush_all_cpus_synced(cs);
2412     }
2413 }
2414 
2415 /* invalidate pte */
2416 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2417                   uint32_t m4)
2418 {
2419     CPUState *cs = env_cpu(env);
2420     const uintptr_t ra = GETPC();
2421     uint64_t page = vaddr & TARGET_PAGE_MASK;
2422     uint64_t pte_addr, pte;
2423 
2424     /* Compute the page table entry address */
2425     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2426     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2427 
2428     /* Mark the page table entry as invalid */
2429     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2430     pte |= PAGE_ENTRY_I;
2431     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2432 
2433     /* XXX we exploit the fact that Linux passes the exact virtual
2434        address here - it's not obliged to! */
2435     if (m4 & 1) {
2436         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2437             tlb_flush_page(cs, page);
2438             /* XXX 31-bit hack */
2439             tlb_flush_page(cs, page ^ 0x80000000);
2440         } else {
2441             /* looks like we don't have a valid virtual address */
2442             tlb_flush(cs);
2443         }
2444     } else {
2445         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2446             tlb_flush_page_all_cpus_synced(cs, page);
2447             /* XXX 31-bit hack */
2448             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2449         } else {
2450             /* looks like we don't have a valid virtual address */
2451             tlb_flush_all_cpus_synced(cs);
2452         }
2453     }
2454 }
2455 
2456 /* flush local tlb */
2457 void HELPER(ptlb)(CPUS390XState *env)
2458 {
2459     tlb_flush(env_cpu(env));
2460 }
2461 
2462 /* flush global tlb */
2463 void HELPER(purge)(CPUS390XState *env)
2464 {
2465     tlb_flush_all_cpus_synced(env_cpu(env));
2466 }
2467 
2468 /* load real address */
2469 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2470 {
2471     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2472     uint64_t ret, tec;
2473     int flags, exc, cc;
2474 
2475     /* XXX incomplete - has more corner cases */
2476     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2477         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2478     }
2479 
2480     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2481     if (exc) {
2482         cc = 3;
2483         ret = exc | 0x80000000;
2484     } else {
2485         cc = 0;
2486         ret |= addr & ~TARGET_PAGE_MASK;
2487     }
2488 
2489     env->cc_op = cc;
2490     return ret;
2491 }
2492 #endif
2493 
2494 /* load pair from quadword */
2495 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2496 {
2497     uintptr_t ra = GETPC();
2498     uint64_t hi, lo;
2499 
2500     check_alignment(env, addr, 16, ra);
2501     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2502     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2503 
2504     env->retxl = lo;
2505     return hi;
2506 }
2507 
2508 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2509 {
2510     uintptr_t ra = GETPC();
2511     uint64_t hi, lo;
2512     int mem_idx;
2513     MemOpIdx oi;
2514     Int128 v;
2515 
2516     assert(HAVE_ATOMIC128);
2517 
2518     mem_idx = cpu_mmu_index(env, false);
2519     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2520     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2521     hi = int128_gethi(v);
2522     lo = int128_getlo(v);
2523 
2524     env->retxl = lo;
2525     return hi;
2526 }
2527 
2528 /* store pair to quadword */
2529 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2530                   uint64_t low, uint64_t high)
2531 {
2532     uintptr_t ra = GETPC();
2533 
2534     check_alignment(env, addr, 16, ra);
2535     cpu_stq_data_ra(env, addr + 0, high, ra);
2536     cpu_stq_data_ra(env, addr + 8, low, ra);
2537 }
2538 
2539 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2540                            uint64_t low, uint64_t high)
2541 {
2542     uintptr_t ra = GETPC();
2543     int mem_idx;
2544     MemOpIdx oi;
2545     Int128 v;
2546 
2547     assert(HAVE_ATOMIC128);
2548 
2549     mem_idx = cpu_mmu_index(env, false);
2550     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2551     v = int128_make128(low, high);
2552     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2553 }
2554 
2555 /* Execute instruction.  This instruction executes an insn modified with
2556    the contents of r1.  It does not change the executed instruction in memory;
2557    it does not change the program counter.
2558 
2559    Perform this by recording the modified instruction in env->ex_value.
2560    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2561 */
2562 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2563 {
2564     uint64_t insn = cpu_lduw_code(env, addr);
2565     uint8_t opc = insn >> 8;
2566 
2567     /* Or in the contents of R1[56:63].  */
2568     insn |= r1 & 0xff;
2569 
2570     /* Load the rest of the instruction.  */
2571     insn <<= 48;
2572     switch (get_ilen(opc)) {
2573     case 2:
2574         break;
2575     case 4:
2576         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2577         break;
2578     case 6:
2579         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2580         break;
2581     default:
2582         g_assert_not_reached();
2583     }
2584 
2585     /* The very most common cases can be sped up by avoiding a new TB.  */
2586     if ((opc & 0xf0) == 0xd0) {
2587         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2588                                       uint64_t, uintptr_t);
2589         static const dx_helper dx[16] = {
2590             [0x0] = do_helper_trt_bkwd,
2591             [0x2] = do_helper_mvc,
2592             [0x4] = do_helper_nc,
2593             [0x5] = do_helper_clc,
2594             [0x6] = do_helper_oc,
2595             [0x7] = do_helper_xc,
2596             [0xc] = do_helper_tr,
2597             [0xd] = do_helper_trt_fwd,
2598         };
2599         dx_helper helper = dx[opc & 0xf];
2600 
2601         if (helper) {
2602             uint32_t l = extract64(insn, 48, 8);
2603             uint32_t b1 = extract64(insn, 44, 4);
2604             uint32_t d1 = extract64(insn, 32, 12);
2605             uint32_t b2 = extract64(insn, 28, 4);
2606             uint32_t d2 = extract64(insn, 16, 12);
2607             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2608             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2609 
2610             env->cc_op = helper(env, l, a1, a2, 0);
2611             env->psw.addr += ilen;
2612             return;
2613         }
2614     } else if (opc == 0x0a) {
2615         env->int_svc_code = extract64(insn, 48, 8);
2616         env->int_svc_ilen = ilen;
2617         helper_exception(env, EXCP_SVC);
2618         g_assert_not_reached();
2619     }
2620 
2621     /* Record the insn we want to execute as well as the ilen to use
2622        during the execution of the target insn.  This will also ensure
2623        that ex_value is non-zero, which flags that we are in a state
2624        that requires such execution.  */
2625     env->ex_value = insn | ilen;
2626 }
2627 
2628 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2629                        uint64_t len)
2630 {
2631     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2632     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2633     const uint64_t r0 = env->regs[0];
2634     const uintptr_t ra = GETPC();
2635     uint8_t dest_key, dest_as, dest_k, dest_a;
2636     uint8_t src_key, src_as, src_k, src_a;
2637     uint64_t val;
2638     int cc = 0;
2639 
2640     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2641                __func__, dest, src, len);
2642 
2643     if (!(env->psw.mask & PSW_MASK_DAT)) {
2644         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2645     }
2646 
2647     /* OAC (operand access control) for the first operand -> dest */
2648     val = (r0 & 0xffff0000ULL) >> 16;
2649     dest_key = (val >> 12) & 0xf;
2650     dest_as = (val >> 6) & 0x3;
2651     dest_k = (val >> 1) & 0x1;
2652     dest_a = val & 0x1;
2653 
2654     /* OAC (operand access control) for the second operand -> src */
2655     val = (r0 & 0x0000ffffULL);
2656     src_key = (val >> 12) & 0xf;
2657     src_as = (val >> 6) & 0x3;
2658     src_k = (val >> 1) & 0x1;
2659     src_a = val & 0x1;
2660 
2661     if (!dest_k) {
2662         dest_key = psw_key;
2663     }
2664     if (!src_k) {
2665         src_key = psw_key;
2666     }
2667     if (!dest_a) {
2668         dest_as = psw_as;
2669     }
2670     if (!src_a) {
2671         src_as = psw_as;
2672     }
2673 
2674     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2675         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2676     }
2677     if (!(env->cregs[0] & CR0_SECONDARY) &&
2678         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2679         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2680     }
2681     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2682         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2683     }
2684 
2685     len = wrap_length32(env, len);
2686     if (len > 4096) {
2687         cc = 3;
2688         len = 4096;
2689     }
2690 
2691     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2692     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2693         (env->psw.mask & PSW_MASK_PSTATE)) {
2694         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2695                       __func__);
2696         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2697     }
2698 
2699     /* FIXME: Access using correct keys and AR-mode */
2700     if (len) {
2701         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2702                                          mmu_idx_from_as(src_as), ra);
2703         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2704                                           mmu_idx_from_as(dest_as), ra);
2705 
2706         access_memmove(env, &desta, &srca, ra);
2707     }
2708 
2709     return cc;
2710 }
2711 
2712 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2713    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2714    value >= 0 indicates failure, and the CC value to be returned.  */
2715 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2716                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2717                                  uint32_t *ochar, uint32_t *olen);
2718 
2719 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2720    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2721    indicates failure, and the CC value to be returned.  */
2722 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2723                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2724                                  uint32_t *olen);
2725 
2726 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2727                        bool enh_check, uintptr_t ra,
2728                        uint32_t *ochar, uint32_t *olen)
2729 {
2730     uint8_t s0, s1, s2, s3;
2731     uint32_t c, l;
2732 
2733     if (ilen < 1) {
2734         return 0;
2735     }
2736     s0 = cpu_ldub_data_ra(env, addr, ra);
2737     if (s0 <= 0x7f) {
2738         /* one byte character */
2739         l = 1;
2740         c = s0;
2741     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2742         /* invalid character */
2743         return 2;
2744     } else if (s0 <= 0xdf) {
2745         /* two byte character */
2746         l = 2;
2747         if (ilen < 2) {
2748             return 0;
2749         }
2750         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2751         c = s0 & 0x1f;
2752         c = (c << 6) | (s1 & 0x3f);
2753         if (enh_check && (s1 & 0xc0) != 0x80) {
2754             return 2;
2755         }
2756     } else if (s0 <= 0xef) {
2757         /* three byte character */
2758         l = 3;
2759         if (ilen < 3) {
2760             return 0;
2761         }
2762         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2763         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2764         c = s0 & 0x0f;
2765         c = (c << 6) | (s1 & 0x3f);
2766         c = (c << 6) | (s2 & 0x3f);
2767         /* Fold the byte-by-byte range descriptions in the PoO into
2768            tests against the complete value.  It disallows encodings
2769            that could be smaller, and the UTF-16 surrogates.  */
2770         if (enh_check
2771             && ((s1 & 0xc0) != 0x80
2772                 || (s2 & 0xc0) != 0x80
2773                 || c < 0x1000
2774                 || (c >= 0xd800 && c <= 0xdfff))) {
2775             return 2;
2776         }
2777     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2778         /* four byte character */
2779         l = 4;
2780         if (ilen < 4) {
2781             return 0;
2782         }
2783         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2784         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2785         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2786         c = s0 & 0x07;
2787         c = (c << 6) | (s1 & 0x3f);
2788         c = (c << 6) | (s2 & 0x3f);
2789         c = (c << 6) | (s3 & 0x3f);
2790         /* See above.  */
2791         if (enh_check
2792             && ((s1 & 0xc0) != 0x80
2793                 || (s2 & 0xc0) != 0x80
2794                 || (s3 & 0xc0) != 0x80
2795                 || c < 0x010000
2796                 || c > 0x10ffff)) {
2797             return 2;
2798         }
2799     } else {
2800         /* invalid character */
2801         return 2;
2802     }
2803 
2804     *ochar = c;
2805     *olen = l;
2806     return -1;
2807 }
2808 
2809 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2810                         bool enh_check, uintptr_t ra,
2811                         uint32_t *ochar, uint32_t *olen)
2812 {
2813     uint16_t s0, s1;
2814     uint32_t c, l;
2815 
2816     if (ilen < 2) {
2817         return 0;
2818     }
2819     s0 = cpu_lduw_data_ra(env, addr, ra);
2820     if ((s0 & 0xfc00) != 0xd800) {
2821         /* one word character */
2822         l = 2;
2823         c = s0;
2824     } else {
2825         /* two word character */
2826         l = 4;
2827         if (ilen < 4) {
2828             return 0;
2829         }
2830         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2831         c = extract32(s0, 6, 4) + 1;
2832         c = (c << 6) | (s0 & 0x3f);
2833         c = (c << 10) | (s1 & 0x3ff);
2834         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2835             /* invalid surrogate character */
2836             return 2;
2837         }
2838     }
2839 
2840     *ochar = c;
2841     *olen = l;
2842     return -1;
2843 }
2844 
2845 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2846                         bool enh_check, uintptr_t ra,
2847                         uint32_t *ochar, uint32_t *olen)
2848 {
2849     uint32_t c;
2850 
2851     if (ilen < 4) {
2852         return 0;
2853     }
2854     c = cpu_ldl_data_ra(env, addr, ra);
2855     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2856         /* invalid unicode character */
2857         return 2;
2858     }
2859 
2860     *ochar = c;
2861     *olen = 4;
2862     return -1;
2863 }
2864 
2865 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2866                        uintptr_t ra, uint32_t c, uint32_t *olen)
2867 {
2868     uint8_t d[4];
2869     uint32_t l, i;
2870 
2871     if (c <= 0x7f) {
2872         /* one byte character */
2873         l = 1;
2874         d[0] = c;
2875     } else if (c <= 0x7ff) {
2876         /* two byte character */
2877         l = 2;
2878         d[1] = 0x80 | extract32(c, 0, 6);
2879         d[0] = 0xc0 | extract32(c, 6, 5);
2880     } else if (c <= 0xffff) {
2881         /* three byte character */
2882         l = 3;
2883         d[2] = 0x80 | extract32(c, 0, 6);
2884         d[1] = 0x80 | extract32(c, 6, 6);
2885         d[0] = 0xe0 | extract32(c, 12, 4);
2886     } else {
2887         /* four byte character */
2888         l = 4;
2889         d[3] = 0x80 | extract32(c, 0, 6);
2890         d[2] = 0x80 | extract32(c, 6, 6);
2891         d[1] = 0x80 | extract32(c, 12, 6);
2892         d[0] = 0xf0 | extract32(c, 18, 3);
2893     }
2894 
2895     if (ilen < l) {
2896         return 1;
2897     }
2898     for (i = 0; i < l; ++i) {
2899         cpu_stb_data_ra(env, addr + i, d[i], ra);
2900     }
2901 
2902     *olen = l;
2903     return -1;
2904 }
2905 
2906 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2907                         uintptr_t ra, uint32_t c, uint32_t *olen)
2908 {
2909     uint16_t d0, d1;
2910 
2911     if (c <= 0xffff) {
2912         /* one word character */
2913         if (ilen < 2) {
2914             return 1;
2915         }
2916         cpu_stw_data_ra(env, addr, c, ra);
2917         *olen = 2;
2918     } else {
2919         /* two word character */
2920         if (ilen < 4) {
2921             return 1;
2922         }
2923         d1 = 0xdc00 | extract32(c, 0, 10);
2924         d0 = 0xd800 | extract32(c, 10, 6);
2925         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2926         cpu_stw_data_ra(env, addr + 0, d0, ra);
2927         cpu_stw_data_ra(env, addr + 2, d1, ra);
2928         *olen = 4;
2929     }
2930 
2931     return -1;
2932 }
2933 
2934 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2935                         uintptr_t ra, uint32_t c, uint32_t *olen)
2936 {
2937     if (ilen < 4) {
2938         return 1;
2939     }
2940     cpu_stl_data_ra(env, addr, c, ra);
2941     *olen = 4;
2942     return -1;
2943 }
2944 
2945 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2946                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2947                                        decode_unicode_fn decode,
2948                                        encode_unicode_fn encode)
2949 {
2950     uint64_t dst = get_address(env, r1);
2951     uint64_t dlen = get_length(env, r1 + 1);
2952     uint64_t src = get_address(env, r2);
2953     uint64_t slen = get_length(env, r2 + 1);
2954     bool enh_check = m3 & 1;
2955     int cc, i;
2956 
2957     /* Lest we fail to service interrupts in a timely manner, limit the
2958        amount of work we're willing to do.  For now, let's cap at 256.  */
2959     for (i = 0; i < 256; ++i) {
2960         uint32_t c, ilen, olen;
2961 
2962         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2963         if (unlikely(cc >= 0)) {
2964             break;
2965         }
2966         cc = encode(env, dst, dlen, ra, c, &olen);
2967         if (unlikely(cc >= 0)) {
2968             break;
2969         }
2970 
2971         src += ilen;
2972         slen -= ilen;
2973         dst += olen;
2974         dlen -= olen;
2975         cc = 3;
2976     }
2977 
2978     set_address(env, r1, dst);
2979     set_length(env, r1 + 1, dlen);
2980     set_address(env, r2, src);
2981     set_length(env, r2 + 1, slen);
2982 
2983     return cc;
2984 }
2985 
2986 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2987 {
2988     return convert_unicode(env, r1, r2, m3, GETPC(),
2989                            decode_utf8, encode_utf16);
2990 }
2991 
2992 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2993 {
2994     return convert_unicode(env, r1, r2, m3, GETPC(),
2995                            decode_utf8, encode_utf32);
2996 }
2997 
2998 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2999 {
3000     return convert_unicode(env, r1, r2, m3, GETPC(),
3001                            decode_utf16, encode_utf8);
3002 }
3003 
3004 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3005 {
3006     return convert_unicode(env, r1, r2, m3, GETPC(),
3007                            decode_utf16, encode_utf32);
3008 }
3009 
3010 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3011 {
3012     return convert_unicode(env, r1, r2, m3, GETPC(),
3013                            decode_utf32, encode_utf8);
3014 }
3015 
3016 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3017 {
3018     return convert_unicode(env, r1, r2, m3, GETPC(),
3019                            decode_utf32, encode_utf16);
3020 }
3021 
3022 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3023                         uintptr_t ra)
3024 {
3025     /* test the actual access, not just any access to the page due to LAP */
3026     while (len) {
3027         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3028         const uint64_t curlen = MIN(pagelen, len);
3029 
3030         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3031         addr = wrap_address(env, addr + curlen);
3032         len -= curlen;
3033     }
3034 }
3035 
3036 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3037 {
3038     probe_write_access(env, addr, len, GETPC());
3039 }
3040