xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision fb391b0b474c316d841f5e27fd094832a91f77f4)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x8000 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     void *haddr1;
118     void *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
128      * pages, we might trigger a new MMU translation - very unlikely that
129      * the mapping changes in between and we would trigger a fault.
130      */
131     int mmu_idx;
132 } S390Access;
133 
134 /*
135  * With nonfault=1, return the PGM_ exception that would have been injected
136  * into the guest; return 0 if no exception was detected.
137  *
138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
140  */
141 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
142                                     int size, MMUAccessType access_type,
143                                     int mmu_idx, bool nonfault,
144                                     void **phost, uintptr_t ra)
145 {
146     int flags = probe_access_flags(env, addr, access_type, mmu_idx,
147                                    nonfault, phost, ra);
148 
149     if (unlikely(flags & TLB_INVALID_MASK)) {
150         assert(!nonfault);
151 #ifdef CONFIG_USER_ONLY
152         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
153         env->__excp_addr = addr & TARGET_PAGE_MASK;
154         return (page_get_flags(addr) & PAGE_VALID
155                 ? PGM_PROTECTION : PGM_ADDRESSING);
156 #else
157         return env->tlb_fill_exc;
158 #endif
159     }
160 
161 #ifndef CONFIG_USER_ONLY
162     if (unlikely(flags & TLB_WATCHPOINT)) {
163         /* S390 does not presently use transaction attributes. */
164         cpu_check_watchpoint(env_cpu(env), addr, size,
165                              MEMTXATTRS_UNSPECIFIED,
166                              (access_type == MMU_DATA_STORE
167                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
168     }
169 #endif
170 
171     return 0;
172 }
173 
174 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
175                              bool nonfault, vaddr vaddr1, int size,
176                              MMUAccessType access_type,
177                              int mmu_idx, uintptr_t ra)
178 {
179     int size1, size2, exc;
180 
181     assert(size > 0 && size <= 4096);
182 
183     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
184     size2 = size - size1;
185 
186     memset(access, 0, sizeof(*access));
187     access->vaddr1 = vaddr1;
188     access->size1 = size1;
189     access->size2 = size2;
190     access->mmu_idx = mmu_idx;
191 
192     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
193                             &access->haddr1, ra);
194     if (unlikely(exc)) {
195         return exc;
196     }
197     if (unlikely(size2)) {
198         /* The access crosses page boundaries. */
199         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
200 
201         access->vaddr2 = vaddr2;
202         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
203                                 nonfault, &access->haddr2, ra);
204         if (unlikely(exc)) {
205             return exc;
206         }
207     }
208     return 0;
209 }
210 
211 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
212                                   vaddr vaddr, int size,
213                                   MMUAccessType access_type, int mmu_idx,
214                                   uintptr_t ra)
215 {
216     int exc = access_prepare_nf(ret, env, false, vaddr, size,
217                                 access_type, mmu_idx, ra);
218     assert(!exc);
219 }
220 
221 /* Helper to handle memset on a single page. */
222 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
223                              uint8_t byte, uint16_t size, int mmu_idx,
224                              uintptr_t ra)
225 {
226 #ifdef CONFIG_USER_ONLY
227     g_assert(haddr);
228     memset(haddr, byte, size);
229 #else
230     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
231     int i;
232 
233     if (likely(haddr)) {
234         memset(haddr, byte, size);
235     } else {
236         /*
237          * Do a single access and test if we can then get access to the
238          * page. This is especially relevant to speed up TLB_NOTDIRTY.
239          */
240         g_assert(size > 0);
241         cpu_stb_mmu(env, vaddr, byte, oi, ra);
242         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
243         if (likely(haddr)) {
244             memset(haddr + 1, byte, size - 1);
245         } else {
246             for (i = 1; i < size; i++) {
247                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
248             }
249         }
250     }
251 #endif
252 }
253 
254 static void access_memset(CPUS390XState *env, S390Access *desta,
255                           uint8_t byte, uintptr_t ra)
256 {
257 
258     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
259                      desta->mmu_idx, ra);
260     if (likely(!desta->size2)) {
261         return;
262     }
263     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
264                      desta->mmu_idx, ra);
265 }
266 
267 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr,
268                                   void **haddr, int offset,
269                                   int mmu_idx, uintptr_t ra)
270 {
271 #ifdef CONFIG_USER_ONLY
272     return ldub_p(*haddr + offset);
273 #else
274     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
275     uint8_t byte;
276 
277     if (likely(*haddr)) {
278         return ldub_p(*haddr + offset);
279     }
280     /*
281      * Do a single access and test if we can then get access to the
282      * page. This is especially relevant to speed up TLB_NOTDIRTY.
283      */
284     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
285     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
286     return byte;
287 #endif
288 }
289 
290 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
291                                int offset, uintptr_t ra)
292 {
293     if (offset < access->size1) {
294         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
295                                   offset, access->mmu_idx, ra);
296     }
297     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
298                               offset - access->size1, access->mmu_idx, ra);
299 }
300 
301 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, void **haddr,
302                                int offset, uint8_t byte, int mmu_idx,
303                                uintptr_t ra)
304 {
305 #ifdef CONFIG_USER_ONLY
306     stb_p(*haddr + offset, byte);
307 #else
308     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
309 
310     if (likely(*haddr)) {
311         stb_p(*haddr + offset, byte);
312         return;
313     }
314     /*
315      * Do a single access and test if we can then get access to the
316      * page. This is especially relevant to speed up TLB_NOTDIRTY.
317      */
318     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
319     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
320 #endif
321 }
322 
323 static void access_set_byte(CPUS390XState *env, S390Access *access,
324                             int offset, uint8_t byte, uintptr_t ra)
325 {
326     if (offset < access->size1) {
327         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
328                            access->mmu_idx, ra);
329     } else {
330         do_access_set_byte(env, access->vaddr2, &access->haddr2,
331                            offset - access->size1, byte, access->mmu_idx, ra);
332     }
333 }
334 
335 /*
336  * Move data with the same semantics as memmove() in case ranges don't overlap
337  * or src > dest. Undefined behavior on destructive overlaps.
338  */
339 static void access_memmove(CPUS390XState *env, S390Access *desta,
340                            S390Access *srca, uintptr_t ra)
341 {
342     int diff;
343 
344     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
345 
346     /* Fallback to slow access in case we don't have access to all host pages */
347     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
348                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
349         int i;
350 
351         for (i = 0; i < desta->size1 + desta->size2; i++) {
352             uint8_t byte = access_get_byte(env, srca, i, ra);
353 
354             access_set_byte(env, desta, i, byte, ra);
355         }
356         return;
357     }
358 
359     if (srca->size1 == desta->size1) {
360         memmove(desta->haddr1, srca->haddr1, srca->size1);
361         if (unlikely(srca->size2)) {
362             memmove(desta->haddr2, srca->haddr2, srca->size2);
363         }
364     } else if (srca->size1 < desta->size1) {
365         diff = desta->size1 - srca->size1;
366         memmove(desta->haddr1, srca->haddr1, srca->size1);
367         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
368         if (likely(desta->size2)) {
369             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
370         }
371     } else {
372         diff = srca->size1 - desta->size1;
373         memmove(desta->haddr1, srca->haddr1, desta->size1);
374         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
375         if (likely(srca->size2)) {
376             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
377         }
378     }
379 }
380 
381 static int mmu_idx_from_as(uint8_t as)
382 {
383     switch (as) {
384     case AS_PRIMARY:
385         return MMU_PRIMARY_IDX;
386     case AS_SECONDARY:
387         return MMU_SECONDARY_IDX;
388     case AS_HOME:
389         return MMU_HOME_IDX;
390     default:
391         /* FIXME AS_ACCREG */
392         g_assert_not_reached();
393     }
394 }
395 
396 /* and on array */
397 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
398                              uint64_t src, uintptr_t ra)
399 {
400     const int mmu_idx = cpu_mmu_index(env, false);
401     S390Access srca1, srca2, desta;
402     uint32_t i;
403     uint8_t c = 0;
404 
405     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
406                __func__, l, dest, src);
407 
408     /* NC always processes one more byte than specified - maximum is 256 */
409     l++;
410 
411     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
412     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
413     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
414     for (i = 0; i < l; i++) {
415         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
416                           access_get_byte(env, &srca2, i, ra);
417 
418         c |= x;
419         access_set_byte(env, &desta, i, x, ra);
420     }
421     return c != 0;
422 }
423 
424 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
425                     uint64_t src)
426 {
427     return do_helper_nc(env, l, dest, src, GETPC());
428 }
429 
430 /* xor on array */
431 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
432                              uint64_t src, uintptr_t ra)
433 {
434     const int mmu_idx = cpu_mmu_index(env, false);
435     S390Access srca1, srca2, desta;
436     uint32_t i;
437     uint8_t c = 0;
438 
439     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
440                __func__, l, dest, src);
441 
442     /* XC always processes one more byte than specified - maximum is 256 */
443     l++;
444 
445     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
446     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
447     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
448 
449     /* xor with itself is the same as memset(0) */
450     if (src == dest) {
451         access_memset(env, &desta, 0, ra);
452         return 0;
453     }
454 
455     for (i = 0; i < l; i++) {
456         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
457                           access_get_byte(env, &srca2, i, ra);
458 
459         c |= x;
460         access_set_byte(env, &desta, i, x, ra);
461     }
462     return c != 0;
463 }
464 
465 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
466                     uint64_t src)
467 {
468     return do_helper_xc(env, l, dest, src, GETPC());
469 }
470 
471 /* or on array */
472 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
473                              uint64_t src, uintptr_t ra)
474 {
475     const int mmu_idx = cpu_mmu_index(env, false);
476     S390Access srca1, srca2, desta;
477     uint32_t i;
478     uint8_t c = 0;
479 
480     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
481                __func__, l, dest, src);
482 
483     /* OC always processes one more byte than specified - maximum is 256 */
484     l++;
485 
486     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
487     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
488     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
489     for (i = 0; i < l; i++) {
490         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
491                           access_get_byte(env, &srca2, i, ra);
492 
493         c |= x;
494         access_set_byte(env, &desta, i, x, ra);
495     }
496     return c != 0;
497 }
498 
499 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
500                     uint64_t src)
501 {
502     return do_helper_oc(env, l, dest, src, GETPC());
503 }
504 
505 /* memmove */
506 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
507                               uint64_t src, uintptr_t ra)
508 {
509     const int mmu_idx = cpu_mmu_index(env, false);
510     S390Access srca, desta;
511     uint32_t i;
512 
513     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
514                __func__, l, dest, src);
515 
516     /* MVC always copies one more byte than specified - maximum is 256 */
517     l++;
518 
519     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
520     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
521 
522     /*
523      * "When the operands overlap, the result is obtained as if the operands
524      * were processed one byte at a time". Only non-destructive overlaps
525      * behave like memmove().
526      */
527     if (dest == src + 1) {
528         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
529     } else if (!is_destructive_overlap(env, dest, src, l)) {
530         access_memmove(env, &desta, &srca, ra);
531     } else {
532         for (i = 0; i < l; i++) {
533             uint8_t byte = access_get_byte(env, &srca, i, ra);
534 
535             access_set_byte(env, &desta, i, byte, ra);
536         }
537     }
538 
539     return env->cc_op;
540 }
541 
542 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
543 {
544     do_helper_mvc(env, l, dest, src, GETPC());
545 }
546 
547 /* move right to left */
548 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
549 {
550     const int mmu_idx = cpu_mmu_index(env, false);
551     const uint64_t ra = GETPC();
552     S390Access srca, desta;
553     int32_t i;
554 
555     /* MVCRL always copies one more byte than specified - maximum is 256 */
556     l++;
557 
558     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
559     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
560 
561     for (i = l - 1; i >= 0; i--) {
562         uint8_t byte = access_get_byte(env, &srca, i, ra);
563         access_set_byte(env, &desta, i, byte, ra);
564     }
565 }
566 
567 /* move inverse  */
568 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
569 {
570     const int mmu_idx = cpu_mmu_index(env, false);
571     S390Access srca, desta;
572     uintptr_t ra = GETPC();
573     int i;
574 
575     /* MVCIN always copies one more byte than specified - maximum is 256 */
576     l++;
577 
578     src = wrap_address(env, src - l + 1);
579     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
580     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
581     for (i = 0; i < l; i++) {
582         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
583 
584         access_set_byte(env, &desta, i, x, ra);
585     }
586 }
587 
588 /* move numerics  */
589 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
590 {
591     const int mmu_idx = cpu_mmu_index(env, false);
592     S390Access srca1, srca2, desta;
593     uintptr_t ra = GETPC();
594     int i;
595 
596     /* MVN always copies one more byte than specified - maximum is 256 */
597     l++;
598 
599     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
600     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
601     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
602     for (i = 0; i < l; i++) {
603         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
604                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
605 
606         access_set_byte(env, &desta, i, x, ra);
607     }
608 }
609 
610 /* move with offset  */
611 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
612 {
613     const int mmu_idx = cpu_mmu_index(env, false);
614     /* MVO always processes one more byte than specified - maximum is 16 */
615     const int len_dest = (l >> 4) + 1;
616     const int len_src = (l & 0xf) + 1;
617     uintptr_t ra = GETPC();
618     uint8_t byte_dest, byte_src;
619     S390Access srca, desta;
620     int i, j;
621 
622     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
623     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
624 
625     /* Handle rightmost byte */
626     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
627     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
628     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
629     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
630 
631     /* Process remaining bytes from right to left */
632     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
633         byte_dest = byte_src >> 4;
634         if (j >= 0) {
635             byte_src = access_get_byte(env, &srca, j, ra);
636         } else {
637             byte_src = 0;
638         }
639         byte_dest |= byte_src << 4;
640         access_set_byte(env, &desta, i, byte_dest, ra);
641     }
642 }
643 
644 /* move zones  */
645 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
646 {
647     const int mmu_idx = cpu_mmu_index(env, false);
648     S390Access srca1, srca2, desta;
649     uintptr_t ra = GETPC();
650     int i;
651 
652     /* MVZ always copies one more byte than specified - maximum is 256 */
653     l++;
654 
655     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
656     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
657     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
658     for (i = 0; i < l; i++) {
659         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
660                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
661 
662         access_set_byte(env, &desta, i, x, ra);
663     }
664 }
665 
666 /* compare unsigned byte arrays */
667 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
668                               uint64_t s2, uintptr_t ra)
669 {
670     uint32_t i;
671     uint32_t cc = 0;
672 
673     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
674                __func__, l, s1, s2);
675 
676     for (i = 0; i <= l; i++) {
677         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
678         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
679         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
680         if (x < y) {
681             cc = 1;
682             break;
683         } else if (x > y) {
684             cc = 2;
685             break;
686         }
687     }
688 
689     HELPER_LOG("\n");
690     return cc;
691 }
692 
693 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
694 {
695     return do_helper_clc(env, l, s1, s2, GETPC());
696 }
697 
698 /* compare logical under mask */
699 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
700                      uint64_t addr)
701 {
702     uintptr_t ra = GETPC();
703     uint32_t cc = 0;
704 
705     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
706                mask, addr);
707 
708     while (mask) {
709         if (mask & 8) {
710             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
711             uint8_t r = extract32(r1, 24, 8);
712             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
713                        addr);
714             if (r < d) {
715                 cc = 1;
716                 break;
717             } else if (r > d) {
718                 cc = 2;
719                 break;
720             }
721             addr++;
722         }
723         mask = (mask << 1) & 0xf;
724         r1 <<= 8;
725     }
726 
727     HELPER_LOG("\n");
728     return cc;
729 }
730 
731 static inline uint64_t get_address(CPUS390XState *env, int reg)
732 {
733     return wrap_address(env, env->regs[reg]);
734 }
735 
736 /*
737  * Store the address to the given register, zeroing out unused leftmost
738  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
739  */
740 static inline void set_address_zero(CPUS390XState *env, int reg,
741                                     uint64_t address)
742 {
743     if (env->psw.mask & PSW_MASK_64) {
744         env->regs[reg] = address;
745     } else {
746         if (!(env->psw.mask & PSW_MASK_32)) {
747             address &= 0x00ffffff;
748         } else {
749             address &= 0x7fffffff;
750         }
751         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
752     }
753 }
754 
755 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
756 {
757     if (env->psw.mask & PSW_MASK_64) {
758         /* 64-Bit mode */
759         env->regs[reg] = address;
760     } else {
761         if (!(env->psw.mask & PSW_MASK_32)) {
762             /* 24-Bit mode. According to the PoO it is implementation
763             dependent if bits 32-39 remain unchanged or are set to
764             zeros.  Choose the former so that the function can also be
765             used for TRT.  */
766             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
767         } else {
768             /* 31-Bit mode. According to the PoO it is implementation
769             dependent if bit 32 remains unchanged or is set to zero.
770             Choose the latter so that the function can also be used for
771             TRT.  */
772             address &= 0x7fffffff;
773             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
774         }
775     }
776 }
777 
778 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
779 {
780     if (!(env->psw.mask & PSW_MASK_64)) {
781         return (uint32_t)length;
782     }
783     return length;
784 }
785 
786 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
787 {
788     if (!(env->psw.mask & PSW_MASK_64)) {
789         /* 24-Bit and 31-Bit mode */
790         length &= 0x7fffffff;
791     }
792     return length;
793 }
794 
795 static inline uint64_t get_length(CPUS390XState *env, int reg)
796 {
797     return wrap_length31(env, env->regs[reg]);
798 }
799 
800 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
801 {
802     if (env->psw.mask & PSW_MASK_64) {
803         /* 64-Bit mode */
804         env->regs[reg] = length;
805     } else {
806         /* 24-Bit and 31-Bit mode */
807         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
808     }
809 }
810 
811 /* search string (c is byte to search, r2 is string, r1 end of string) */
812 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
813 {
814     uintptr_t ra = GETPC();
815     uint64_t end, str;
816     uint32_t len;
817     uint8_t v, c = env->regs[0];
818 
819     /* Bits 32-55 must contain all 0.  */
820     if (env->regs[0] & 0xffffff00u) {
821         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
822     }
823 
824     str = get_address(env, r2);
825     end = get_address(env, r1);
826 
827     /* Lest we fail to service interrupts in a timely manner, limit the
828        amount of work we're willing to do.  For now, let's cap at 8k.  */
829     for (len = 0; len < 0x2000; ++len) {
830         if (str + len == end) {
831             /* Character not found.  R1 & R2 are unmodified.  */
832             env->cc_op = 2;
833             return;
834         }
835         v = cpu_ldub_data_ra(env, str + len, ra);
836         if (v == c) {
837             /* Character found.  Set R1 to the location; R2 is unmodified.  */
838             env->cc_op = 1;
839             set_address(env, r1, str + len);
840             return;
841         }
842     }
843 
844     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
845     env->cc_op = 3;
846     set_address(env, r2, str + len);
847 }
848 
849 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
850 {
851     uintptr_t ra = GETPC();
852     uint32_t len;
853     uint16_t v, c = env->regs[0];
854     uint64_t end, str, adj_end;
855 
856     /* Bits 32-47 of R0 must be zero.  */
857     if (env->regs[0] & 0xffff0000u) {
858         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
859     }
860 
861     str = get_address(env, r2);
862     end = get_address(env, r1);
863 
864     /* If the LSB of the two addresses differ, use one extra byte.  */
865     adj_end = end + ((str ^ end) & 1);
866 
867     /* Lest we fail to service interrupts in a timely manner, limit the
868        amount of work we're willing to do.  For now, let's cap at 8k.  */
869     for (len = 0; len < 0x2000; len += 2) {
870         if (str + len == adj_end) {
871             /* End of input found.  */
872             env->cc_op = 2;
873             return;
874         }
875         v = cpu_lduw_data_ra(env, str + len, ra);
876         if (v == c) {
877             /* Character found.  Set R1 to the location; R2 is unmodified.  */
878             env->cc_op = 1;
879             set_address(env, r1, str + len);
880             return;
881         }
882     }
883 
884     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
885     env->cc_op = 3;
886     set_address(env, r2, str + len);
887 }
888 
889 /* unsigned string compare (c is string terminator) */
890 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
891 {
892     uintptr_t ra = GETPC();
893     uint32_t len;
894 
895     c = c & 0xff;
896     s1 = wrap_address(env, s1);
897     s2 = wrap_address(env, s2);
898 
899     /* Lest we fail to service interrupts in a timely manner, limit the
900        amount of work we're willing to do.  For now, let's cap at 8k.  */
901     for (len = 0; len < 0x2000; ++len) {
902         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
903         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
904         if (v1 == v2) {
905             if (v1 == c) {
906                 /* Equal.  CC=0, and don't advance the registers.  */
907                 env->cc_op = 0;
908                 return int128_make128(s2, s1);
909             }
910         } else {
911             /* Unequal.  CC={1,2}, and advance the registers.  Note that
912                the terminator need not be zero, but the string that contains
913                the terminator is by definition "low".  */
914             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
915             return int128_make128(s2 + len, s1 + len);
916         }
917     }
918 
919     /* CPU-determined bytes equal; advance the registers.  */
920     env->cc_op = 3;
921     return int128_make128(s2 + len, s1 + len);
922 }
923 
924 /* move page */
925 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
926 {
927     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
928     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
929     const int mmu_idx = cpu_mmu_index(env, false);
930     const bool f = extract64(r0, 11, 1);
931     const bool s = extract64(r0, 10, 1);
932     const bool cco = extract64(r0, 8, 1);
933     uintptr_t ra = GETPC();
934     S390Access srca, desta;
935     int exc;
936 
937     if ((f && s) || extract64(r0, 12, 4)) {
938         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
939     }
940 
941     /*
942      * We always manually handle exceptions such that we can properly store
943      * r1/r2 to the lowcore on page-translation exceptions.
944      *
945      * TODO: Access key handling
946      */
947     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
948                             MMU_DATA_LOAD, mmu_idx, ra);
949     if (exc) {
950         if (cco) {
951             return 2;
952         }
953         goto inject_exc;
954     }
955     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
956                             MMU_DATA_STORE, mmu_idx, ra);
957     if (exc) {
958         if (cco && exc != PGM_PROTECTION) {
959             return 1;
960         }
961         goto inject_exc;
962     }
963     access_memmove(env, &desta, &srca, ra);
964     return 0; /* data moved */
965 inject_exc:
966 #if !defined(CONFIG_USER_ONLY)
967     if (exc != PGM_ADDRESSING) {
968         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
969                  env->tlb_fill_tec);
970     }
971     if (exc == PGM_PAGE_TRANS) {
972         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
973                  r1 << 4 | r2);
974     }
975 #endif
976     tcg_s390_program_interrupt(env, exc, ra);
977 }
978 
979 /* string copy */
980 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
981 {
982     const int mmu_idx = cpu_mmu_index(env, false);
983     const uint64_t d = get_address(env, r1);
984     const uint64_t s = get_address(env, r2);
985     const uint8_t c = env->regs[0];
986     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
987     S390Access srca, desta;
988     uintptr_t ra = GETPC();
989     int i;
990 
991     if (env->regs[0] & 0xffffff00ull) {
992         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
993     }
994 
995     /*
996      * Our access should not exceed single pages, as we must not report access
997      * exceptions exceeding the actually copied range (which we don't know at
998      * this point). We might over-indicate watchpoints within the pages
999      * (if we ever care, we have to limit processing to a single byte).
1000      */
1001     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
1002     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
1003     for (i = 0; i < len; i++) {
1004         const uint8_t v = access_get_byte(env, &srca, i, ra);
1005 
1006         access_set_byte(env, &desta, i, v, ra);
1007         if (v == c) {
1008             set_address_zero(env, r1, d + i);
1009             return 1;
1010         }
1011     }
1012     set_address_zero(env, r1, d + len);
1013     set_address_zero(env, r2, s + len);
1014     return 3;
1015 }
1016 
1017 /* load access registers r1 to r3 from memory at a2 */
1018 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1019 {
1020     uintptr_t ra = GETPC();
1021     int i;
1022 
1023     if (a2 & 0x3) {
1024         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1025     }
1026 
1027     for (i = r1;; i = (i + 1) % 16) {
1028         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1029         a2 += 4;
1030 
1031         if (i == r3) {
1032             break;
1033         }
1034     }
1035 }
1036 
1037 /* store access registers r1 to r3 in memory at a2 */
1038 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1039 {
1040     uintptr_t ra = GETPC();
1041     int i;
1042 
1043     if (a2 & 0x3) {
1044         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1045     }
1046 
1047     for (i = r1;; i = (i + 1) % 16) {
1048         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1049         a2 += 4;
1050 
1051         if (i == r3) {
1052             break;
1053         }
1054     }
1055 }
1056 
1057 /* move long helper */
1058 static inline uint32_t do_mvcl(CPUS390XState *env,
1059                                uint64_t *dest, uint64_t *destlen,
1060                                uint64_t *src, uint64_t *srclen,
1061                                uint16_t pad, int wordsize, uintptr_t ra)
1062 {
1063     const int mmu_idx = cpu_mmu_index(env, false);
1064     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1065     S390Access srca, desta;
1066     int i, cc;
1067 
1068     if (*destlen == *srclen) {
1069         cc = 0;
1070     } else if (*destlen < *srclen) {
1071         cc = 1;
1072     } else {
1073         cc = 2;
1074     }
1075 
1076     if (!*destlen) {
1077         return cc;
1078     }
1079 
1080     /*
1081      * Only perform one type of type of operation (move/pad) at a time.
1082      * Stay within single pages.
1083      */
1084     if (*srclen) {
1085         /* Copy the src array */
1086         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1087         *destlen -= len;
1088         *srclen -= len;
1089         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1090         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091         access_memmove(env, &desta, &srca, ra);
1092         *src = wrap_address(env, *src + len);
1093         *dest = wrap_address(env, *dest + len);
1094     } else if (wordsize == 1) {
1095         /* Pad the remaining area */
1096         *destlen -= len;
1097         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1098         access_memset(env, &desta, pad, ra);
1099         *dest = wrap_address(env, *dest + len);
1100     } else {
1101         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1102 
1103         /* The remaining length selects the padding byte. */
1104         for (i = 0; i < len; (*destlen)--, i++) {
1105             if (*destlen & 1) {
1106                 access_set_byte(env, &desta, i, pad, ra);
1107             } else {
1108                 access_set_byte(env, &desta, i, pad >> 8, ra);
1109             }
1110         }
1111         *dest = wrap_address(env, *dest + len);
1112     }
1113 
1114     return *destlen ? 3 : cc;
1115 }
1116 
1117 /* move long */
1118 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1119 {
1120     const int mmu_idx = cpu_mmu_index(env, false);
1121     uintptr_t ra = GETPC();
1122     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1123     uint64_t dest = get_address(env, r1);
1124     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1125     uint64_t src = get_address(env, r2);
1126     uint8_t pad = env->regs[r2 + 1] >> 24;
1127     CPUState *cs = env_cpu(env);
1128     S390Access srca, desta;
1129     uint32_t cc, cur_len;
1130 
1131     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1132         cc = 3;
1133     } else if (srclen == destlen) {
1134         cc = 0;
1135     } else if (destlen < srclen) {
1136         cc = 1;
1137     } else {
1138         cc = 2;
1139     }
1140 
1141     /* We might have to zero-out some bits even if there was no action. */
1142     if (unlikely(!destlen || cc == 3)) {
1143         set_address_zero(env, r2, src);
1144         set_address_zero(env, r1, dest);
1145         return cc;
1146     } else if (!srclen) {
1147         set_address_zero(env, r2, src);
1148     }
1149 
1150     /*
1151      * Only perform one type of type of operation (move/pad) in one step.
1152      * Stay within single pages.
1153      */
1154     while (destlen) {
1155         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1156         if (!srclen) {
1157             access_prepare(&desta, env, dest, cur_len,
1158                            MMU_DATA_STORE, mmu_idx, ra);
1159             access_memset(env, &desta, pad, ra);
1160         } else {
1161             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1162 
1163             access_prepare(&srca, env, src, cur_len,
1164                            MMU_DATA_LOAD, mmu_idx, ra);
1165             access_prepare(&desta, env, dest, cur_len,
1166                            MMU_DATA_STORE, mmu_idx, ra);
1167             access_memmove(env, &desta, &srca, ra);
1168             src = wrap_address(env, src + cur_len);
1169             srclen -= cur_len;
1170             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1171             set_address_zero(env, r2, src);
1172         }
1173         dest = wrap_address(env, dest + cur_len);
1174         destlen -= cur_len;
1175         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1176         set_address_zero(env, r1, dest);
1177 
1178         /*
1179          * MVCL is interruptible. Return to the main loop if requested after
1180          * writing back all state to registers. If no interrupt will get
1181          * injected, we'll end up back in this handler and continue processing
1182          * the remaining parts.
1183          */
1184         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1185             cpu_loop_exit_restore(cs, ra);
1186         }
1187     }
1188     return cc;
1189 }
1190 
1191 /* move long extended */
1192 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1193                        uint32_t r3)
1194 {
1195     uintptr_t ra = GETPC();
1196     uint64_t destlen = get_length(env, r1 + 1);
1197     uint64_t dest = get_address(env, r1);
1198     uint64_t srclen = get_length(env, r3 + 1);
1199     uint64_t src = get_address(env, r3);
1200     uint8_t pad = a2;
1201     uint32_t cc;
1202 
1203     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1204 
1205     set_length(env, r1 + 1, destlen);
1206     set_length(env, r3 + 1, srclen);
1207     set_address(env, r1, dest);
1208     set_address(env, r3, src);
1209 
1210     return cc;
1211 }
1212 
1213 /* move long unicode */
1214 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1215                        uint32_t r3)
1216 {
1217     uintptr_t ra = GETPC();
1218     uint64_t destlen = get_length(env, r1 + 1);
1219     uint64_t dest = get_address(env, r1);
1220     uint64_t srclen = get_length(env, r3 + 1);
1221     uint64_t src = get_address(env, r3);
1222     uint16_t pad = a2;
1223     uint32_t cc;
1224 
1225     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1226 
1227     set_length(env, r1 + 1, destlen);
1228     set_length(env, r3 + 1, srclen);
1229     set_address(env, r1, dest);
1230     set_address(env, r3, src);
1231 
1232     return cc;
1233 }
1234 
1235 /* compare logical long helper */
1236 static inline uint32_t do_clcl(CPUS390XState *env,
1237                                uint64_t *src1, uint64_t *src1len,
1238                                uint64_t *src3, uint64_t *src3len,
1239                                uint16_t pad, uint64_t limit,
1240                                int wordsize, uintptr_t ra)
1241 {
1242     uint64_t len = MAX(*src1len, *src3len);
1243     uint32_t cc = 0;
1244 
1245     check_alignment(env, *src1len | *src3len, wordsize, ra);
1246 
1247     if (!len) {
1248         return cc;
1249     }
1250 
1251     /* Lest we fail to service interrupts in a timely manner, limit the
1252        amount of work we're willing to do.  */
1253     if (len > limit) {
1254         len = limit;
1255         cc = 3;
1256     }
1257 
1258     for (; len; len -= wordsize) {
1259         uint16_t v1 = pad;
1260         uint16_t v3 = pad;
1261 
1262         if (*src1len) {
1263             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1264         }
1265         if (*src3len) {
1266             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1267         }
1268 
1269         if (v1 != v3) {
1270             cc = (v1 < v3) ? 1 : 2;
1271             break;
1272         }
1273 
1274         if (*src1len) {
1275             *src1 += wordsize;
1276             *src1len -= wordsize;
1277         }
1278         if (*src3len) {
1279             *src3 += wordsize;
1280             *src3len -= wordsize;
1281         }
1282     }
1283 
1284     return cc;
1285 }
1286 
1287 
1288 /* compare logical long */
1289 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1290 {
1291     uintptr_t ra = GETPC();
1292     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1293     uint64_t src1 = get_address(env, r1);
1294     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1295     uint64_t src3 = get_address(env, r2);
1296     uint8_t pad = env->regs[r2 + 1] >> 24;
1297     uint32_t cc;
1298 
1299     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1300 
1301     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1302     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1303     set_address(env, r1, src1);
1304     set_address(env, r2, src3);
1305 
1306     return cc;
1307 }
1308 
1309 /* compare logical long extended memcompare insn with padding */
1310 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1311                        uint32_t r3)
1312 {
1313     uintptr_t ra = GETPC();
1314     uint64_t src1len = get_length(env, r1 + 1);
1315     uint64_t src1 = get_address(env, r1);
1316     uint64_t src3len = get_length(env, r3 + 1);
1317     uint64_t src3 = get_address(env, r3);
1318     uint8_t pad = a2;
1319     uint32_t cc;
1320 
1321     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1322 
1323     set_length(env, r1 + 1, src1len);
1324     set_length(env, r3 + 1, src3len);
1325     set_address(env, r1, src1);
1326     set_address(env, r3, src3);
1327 
1328     return cc;
1329 }
1330 
1331 /* compare logical long unicode memcompare insn with padding */
1332 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1333                        uint32_t r3)
1334 {
1335     uintptr_t ra = GETPC();
1336     uint64_t src1len = get_length(env, r1 + 1);
1337     uint64_t src1 = get_address(env, r1);
1338     uint64_t src3len = get_length(env, r3 + 1);
1339     uint64_t src3 = get_address(env, r3);
1340     uint16_t pad = a2;
1341     uint32_t cc = 0;
1342 
1343     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1344 
1345     set_length(env, r1 + 1, src1len);
1346     set_length(env, r3 + 1, src3len);
1347     set_address(env, r1, src1);
1348     set_address(env, r3, src3);
1349 
1350     return cc;
1351 }
1352 
1353 /* checksum */
1354 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1355                     uint64_t src, uint64_t src_len)
1356 {
1357     uintptr_t ra = GETPC();
1358     uint64_t max_len, len;
1359     uint64_t cksm = (uint32_t)r1;
1360 
1361     /* Lest we fail to service interrupts in a timely manner, limit the
1362        amount of work we're willing to do.  For now, let's cap at 8k.  */
1363     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1364 
1365     /* Process full words as available.  */
1366     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1367         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1368     }
1369 
1370     switch (max_len - len) {
1371     case 1:
1372         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1373         len += 1;
1374         break;
1375     case 2:
1376         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1377         len += 2;
1378         break;
1379     case 3:
1380         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1381         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1382         len += 3;
1383         break;
1384     }
1385 
1386     /* Fold the carry from the checksum.  Note that we can see carry-out
1387        during folding more than once (but probably not more than twice).  */
1388     while (cksm > 0xffffffffull) {
1389         cksm = (uint32_t)cksm + (cksm >> 32);
1390     }
1391 
1392     /* Indicate whether or not we've processed everything.  */
1393     env->cc_op = (len == src_len ? 0 : 3);
1394 
1395     /* Return both cksm and processed length.  */
1396     return int128_make128(cksm, len);
1397 }
1398 
1399 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1400 {
1401     uintptr_t ra = GETPC();
1402     int len_dest = len >> 4;
1403     int len_src = len & 0xf;
1404     uint8_t b;
1405 
1406     dest += len_dest;
1407     src += len_src;
1408 
1409     /* last byte is special, it only flips the nibbles */
1410     b = cpu_ldub_data_ra(env, src, ra);
1411     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1412     src--;
1413     len_src--;
1414 
1415     /* now pack every value */
1416     while (len_dest > 0) {
1417         b = 0;
1418 
1419         if (len_src >= 0) {
1420             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1421             src--;
1422             len_src--;
1423         }
1424         if (len_src >= 0) {
1425             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1426             src--;
1427             len_src--;
1428         }
1429 
1430         len_dest--;
1431         dest--;
1432         cpu_stb_data_ra(env, dest, b, ra);
1433     }
1434 }
1435 
1436 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1437                            uint32_t srclen, int ssize, uintptr_t ra)
1438 {
1439     int i;
1440     /* The destination operand is always 16 bytes long.  */
1441     const int destlen = 16;
1442 
1443     /* The operands are processed from right to left.  */
1444     src += srclen - 1;
1445     dest += destlen - 1;
1446 
1447     for (i = 0; i < destlen; i++) {
1448         uint8_t b = 0;
1449 
1450         /* Start with a positive sign */
1451         if (i == 0) {
1452             b = 0xc;
1453         } else if (srclen > ssize) {
1454             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1455             src -= ssize;
1456             srclen -= ssize;
1457         }
1458 
1459         if (srclen > ssize) {
1460             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1461             src -= ssize;
1462             srclen -= ssize;
1463         }
1464 
1465         cpu_stb_data_ra(env, dest, b, ra);
1466         dest--;
1467     }
1468 }
1469 
1470 
1471 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                  uint32_t srclen)
1473 {
1474     do_pkau(env, dest, src, srclen, 1, GETPC());
1475 }
1476 
1477 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1478                  uint32_t srclen)
1479 {
1480     do_pkau(env, dest, src, srclen, 2, GETPC());
1481 }
1482 
1483 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1484                   uint64_t src)
1485 {
1486     uintptr_t ra = GETPC();
1487     int len_dest = len >> 4;
1488     int len_src = len & 0xf;
1489     uint8_t b;
1490     int second_nibble = 0;
1491 
1492     dest += len_dest;
1493     src += len_src;
1494 
1495     /* last byte is special, it only flips the nibbles */
1496     b = cpu_ldub_data_ra(env, src, ra);
1497     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1498     src--;
1499     len_src--;
1500 
1501     /* now pad every nibble with 0xf0 */
1502 
1503     while (len_dest > 0) {
1504         uint8_t cur_byte = 0;
1505 
1506         if (len_src > 0) {
1507             cur_byte = cpu_ldub_data_ra(env, src, ra);
1508         }
1509 
1510         len_dest--;
1511         dest--;
1512 
1513         /* only advance one nibble at a time */
1514         if (second_nibble) {
1515             cur_byte >>= 4;
1516             len_src--;
1517             src--;
1518         }
1519         second_nibble = !second_nibble;
1520 
1521         /* digit */
1522         cur_byte = (cur_byte & 0xf);
1523         /* zone bits */
1524         cur_byte |= 0xf0;
1525 
1526         cpu_stb_data_ra(env, dest, cur_byte, ra);
1527     }
1528 }
1529 
1530 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1531                                  uint32_t destlen, int dsize, uint64_t src,
1532                                  uintptr_t ra)
1533 {
1534     int i;
1535     uint32_t cc;
1536     uint8_t b;
1537     /* The source operand is always 16 bytes long.  */
1538     const int srclen = 16;
1539 
1540     /* The operands are processed from right to left.  */
1541     src += srclen - 1;
1542     dest += destlen - dsize;
1543 
1544     /* Check for the sign.  */
1545     b = cpu_ldub_data_ra(env, src, ra);
1546     src--;
1547     switch (b & 0xf) {
1548     case 0xa:
1549     case 0xc:
1550     case 0xe ... 0xf:
1551         cc = 0;  /* plus */
1552         break;
1553     case 0xb:
1554     case 0xd:
1555         cc = 1;  /* minus */
1556         break;
1557     default:
1558     case 0x0 ... 0x9:
1559         cc = 3;  /* invalid */
1560         break;
1561     }
1562 
1563     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1564     for (i = 0; i < destlen; i += dsize) {
1565         if (i == (31 * dsize)) {
1566             /* If length is 32/64 bytes, the leftmost byte is 0. */
1567             b = 0;
1568         } else if (i % (2 * dsize)) {
1569             b = cpu_ldub_data_ra(env, src, ra);
1570             src--;
1571         } else {
1572             b >>= 4;
1573         }
1574         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1575         dest -= dsize;
1576     }
1577 
1578     return cc;
1579 }
1580 
1581 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                        uint64_t src)
1583 {
1584     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1585 }
1586 
1587 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1588                        uint64_t src)
1589 {
1590     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1591 }
1592 
1593 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1594 {
1595     uintptr_t ra = GETPC();
1596     uint32_t cc = 0;
1597     int i;
1598 
1599     for (i = 0; i < destlen; i++) {
1600         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1601         /* digit */
1602         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1603 
1604         if (i == (destlen - 1)) {
1605             /* sign */
1606             cc |= (b & 0xf) < 0xa ? 1 : 0;
1607         } else {
1608             /* digit */
1609             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1610         }
1611     }
1612 
1613     return cc;
1614 }
1615 
1616 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1617                              uint64_t trans, uintptr_t ra)
1618 {
1619     uint32_t i;
1620 
1621     for (i = 0; i <= len; i++) {
1622         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1623         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1624         cpu_stb_data_ra(env, array + i, new_byte, ra);
1625     }
1626 
1627     return env->cc_op;
1628 }
1629 
1630 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1631                 uint64_t trans)
1632 {
1633     do_helper_tr(env, len, array, trans, GETPC());
1634 }
1635 
1636 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1637                    uint64_t len, uint64_t trans)
1638 {
1639     uintptr_t ra = GETPC();
1640     uint8_t end = env->regs[0] & 0xff;
1641     uint64_t l = len;
1642     uint64_t i;
1643     uint32_t cc = 0;
1644 
1645     if (!(env->psw.mask & PSW_MASK_64)) {
1646         array &= 0x7fffffff;
1647         l = (uint32_t)l;
1648     }
1649 
1650     /* Lest we fail to service interrupts in a timely manner, limit the
1651        amount of work we're willing to do.  For now, let's cap at 8k.  */
1652     if (l > 0x2000) {
1653         l = 0x2000;
1654         cc = 3;
1655     }
1656 
1657     for (i = 0; i < l; i++) {
1658         uint8_t byte, new_byte;
1659 
1660         byte = cpu_ldub_data_ra(env, array + i, ra);
1661 
1662         if (byte == end) {
1663             cc = 1;
1664             break;
1665         }
1666 
1667         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1668         cpu_stb_data_ra(env, array + i, new_byte, ra);
1669     }
1670 
1671     env->cc_op = cc;
1672     return int128_make128(len - i, array + i);
1673 }
1674 
1675 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1676                                      uint64_t array, uint64_t trans,
1677                                      int inc, uintptr_t ra)
1678 {
1679     int i;
1680 
1681     for (i = 0; i <= len; i++) {
1682         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1683         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1684 
1685         if (sbyte != 0) {
1686             set_address(env, 1, array + i * inc);
1687             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1688             return (i == len) ? 2 : 1;
1689         }
1690     }
1691 
1692     return 0;
1693 }
1694 
1695 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1696                                   uint64_t array, uint64_t trans,
1697                                   uintptr_t ra)
1698 {
1699     return do_helper_trt(env, len, array, trans, 1, ra);
1700 }
1701 
1702 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1703                      uint64_t trans)
1704 {
1705     return do_helper_trt(env, len, array, trans, 1, GETPC());
1706 }
1707 
1708 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1709                                    uint64_t array, uint64_t trans,
1710                                    uintptr_t ra)
1711 {
1712     return do_helper_trt(env, len, array, trans, -1, ra);
1713 }
1714 
1715 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1716                       uint64_t trans)
1717 {
1718     return do_helper_trt(env, len, array, trans, -1, GETPC());
1719 }
1720 
1721 /* Translate one/two to one/two */
1722 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1723                       uint32_t tst, uint32_t sizes)
1724 {
1725     uintptr_t ra = GETPC();
1726     int dsize = (sizes & 1) ? 1 : 2;
1727     int ssize = (sizes & 2) ? 1 : 2;
1728     uint64_t tbl = get_address(env, 1);
1729     uint64_t dst = get_address(env, r1);
1730     uint64_t len = get_length(env, r1 + 1);
1731     uint64_t src = get_address(env, r2);
1732     uint32_t cc = 3;
1733     int i;
1734 
1735     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1736        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1737        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1738     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1739         tbl &= -4096;
1740     } else {
1741         tbl &= -8;
1742     }
1743 
1744     check_alignment(env, len, ssize, ra);
1745 
1746     /* Lest we fail to service interrupts in a timely manner, */
1747     /* limit the amount of work we're willing to do.   */
1748     for (i = 0; i < 0x2000; i++) {
1749         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1750         uint64_t tble = tbl + (sval * dsize);
1751         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1752         if (dval == tst) {
1753             cc = 1;
1754             break;
1755         }
1756         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1757 
1758         len -= ssize;
1759         src += ssize;
1760         dst += dsize;
1761 
1762         if (len == 0) {
1763             cc = 0;
1764             break;
1765         }
1766     }
1767 
1768     set_address(env, r1, dst);
1769     set_length(env, r1 + 1, len);
1770     set_address(env, r2, src);
1771 
1772     return cc;
1773 }
1774 
1775 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1776                         uint64_t a2, bool parallel)
1777 {
1778     uint32_t mem_idx = cpu_mmu_index(env, false);
1779     uintptr_t ra = GETPC();
1780     uint32_t fc = extract32(env->regs[0], 0, 8);
1781     uint32_t sc = extract32(env->regs[0], 8, 8);
1782     uint64_t pl = get_address(env, 1) & -16;
1783     uint64_t svh, svl;
1784     uint32_t cc;
1785 
1786     /* Sanity check the function code and storage characteristic.  */
1787     if (fc > 1 || sc > 3) {
1788         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1789             goto spec_exception;
1790         }
1791         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1792             goto spec_exception;
1793         }
1794     }
1795 
1796     /* Sanity check the alignments.  */
1797     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1798         goto spec_exception;
1799     }
1800 
1801     /* Sanity check writability of the store address.  */
1802     probe_write(env, a2, 1 << sc, mem_idx, ra);
1803 
1804     /*
1805      * Note that the compare-and-swap is atomic, and the store is atomic,
1806      * but the complete operation is not.  Therefore we do not need to
1807      * assert serial context in order to implement this.  That said,
1808      * restart early if we can't support either operation that is supposed
1809      * to be atomic.
1810      */
1811     if (parallel) {
1812         uint32_t max = 2;
1813 #ifdef CONFIG_ATOMIC64
1814         max = 3;
1815 #endif
1816         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1817             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1818             cpu_loop_exit_atomic(env_cpu(env), ra);
1819         }
1820     }
1821 
1822     /* All loads happen before all stores.  For simplicity, load the entire
1823        store value area from the parameter list.  */
1824     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1825     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1826 
1827     switch (fc) {
1828     case 0:
1829         {
1830             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1831             uint32_t cv = env->regs[r3];
1832             uint32_t ov;
1833 
1834             if (parallel) {
1835 #ifdef CONFIG_USER_ONLY
1836                 uint32_t *haddr = g2h(env_cpu(env), a1);
1837                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1838 #else
1839                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1840                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1841 #endif
1842             } else {
1843                 ov = cpu_ldl_data_ra(env, a1, ra);
1844                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1845             }
1846             cc = (ov != cv);
1847             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1848         }
1849         break;
1850 
1851     case 1:
1852         {
1853             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1854             uint64_t cv = env->regs[r3];
1855             uint64_t ov;
1856 
1857             if (parallel) {
1858 #ifdef CONFIG_ATOMIC64
1859                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1860                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1861 #else
1862                 /* Note that we asserted !parallel above.  */
1863                 g_assert_not_reached();
1864 #endif
1865             } else {
1866                 ov = cpu_ldq_data_ra(env, a1, ra);
1867                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1868             }
1869             cc = (ov != cv);
1870             env->regs[r3] = ov;
1871         }
1872         break;
1873 
1874     case 2:
1875         {
1876             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1877             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1878             Int128 nv = int128_make128(nvl, nvh);
1879             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1880             Int128 ov;
1881 
1882             if (!parallel) {
1883                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1884                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1885 
1886                 ov = int128_make128(ol, oh);
1887                 cc = !int128_eq(ov, cv);
1888                 if (cc) {
1889                     nv = ov;
1890                 }
1891 
1892                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1893                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1894             } else if (HAVE_CMPXCHG128) {
1895                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1896                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1897                 cc = !int128_eq(ov, cv);
1898             } else {
1899                 /* Note that we asserted !parallel above.  */
1900                 g_assert_not_reached();
1901             }
1902 
1903             env->regs[r3 + 0] = int128_gethi(ov);
1904             env->regs[r3 + 1] = int128_getlo(ov);
1905         }
1906         break;
1907 
1908     default:
1909         g_assert_not_reached();
1910     }
1911 
1912     /* Store only if the comparison succeeded.  Note that above we use a pair
1913        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1914        from the most-significant bits of svh.  */
1915     if (cc == 0) {
1916         switch (sc) {
1917         case 0:
1918             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1919             break;
1920         case 1:
1921             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1922             break;
1923         case 2:
1924             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1925             break;
1926         case 3:
1927             cpu_stq_data_ra(env, a2, svh, ra);
1928             break;
1929         case 4:
1930             if (!parallel) {
1931                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1932                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1933             } else if (HAVE_ATOMIC128) {
1934                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1935                 Int128 sv = int128_make128(svl, svh);
1936                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1937             } else {
1938                 /* Note that we asserted !parallel above.  */
1939                 g_assert_not_reached();
1940             }
1941             break;
1942         default:
1943             g_assert_not_reached();
1944         }
1945     }
1946 
1947     return cc;
1948 
1949  spec_exception:
1950     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1951 }
1952 
1953 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1954 {
1955     return do_csst(env, r3, a1, a2, false);
1956 }
1957 
1958 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1959                                uint64_t a2)
1960 {
1961     return do_csst(env, r3, a1, a2, true);
1962 }
1963 
1964 #if !defined(CONFIG_USER_ONLY)
1965 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1966 {
1967     uintptr_t ra = GETPC();
1968     bool PERchanged = false;
1969     uint64_t src = a2;
1970     uint32_t i;
1971 
1972     if (src & 0x7) {
1973         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1974     }
1975 
1976     for (i = r1;; i = (i + 1) % 16) {
1977         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1978         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1979             PERchanged = true;
1980         }
1981         env->cregs[i] = val;
1982         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1983                    i, src, val);
1984         src += sizeof(uint64_t);
1985 
1986         if (i == r3) {
1987             break;
1988         }
1989     }
1990 
1991     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1992         s390_cpu_recompute_watchpoints(env_cpu(env));
1993     }
1994 
1995     tlb_flush(env_cpu(env));
1996 }
1997 
1998 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1999 {
2000     uintptr_t ra = GETPC();
2001     bool PERchanged = false;
2002     uint64_t src = a2;
2003     uint32_t i;
2004 
2005     if (src & 0x3) {
2006         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2007     }
2008 
2009     for (i = r1;; i = (i + 1) % 16) {
2010         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2011         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2012             PERchanged = true;
2013         }
2014         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2015         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2016         src += sizeof(uint32_t);
2017 
2018         if (i == r3) {
2019             break;
2020         }
2021     }
2022 
2023     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2024         s390_cpu_recompute_watchpoints(env_cpu(env));
2025     }
2026 
2027     tlb_flush(env_cpu(env));
2028 }
2029 
2030 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2031 {
2032     uintptr_t ra = GETPC();
2033     uint64_t dest = a2;
2034     uint32_t i;
2035 
2036     if (dest & 0x7) {
2037         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2038     }
2039 
2040     for (i = r1;; i = (i + 1) % 16) {
2041         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2042         dest += sizeof(uint64_t);
2043 
2044         if (i == r3) {
2045             break;
2046         }
2047     }
2048 }
2049 
2050 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2051 {
2052     uintptr_t ra = GETPC();
2053     uint64_t dest = a2;
2054     uint32_t i;
2055 
2056     if (dest & 0x3) {
2057         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2058     }
2059 
2060     for (i = r1;; i = (i + 1) % 16) {
2061         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2062         dest += sizeof(uint32_t);
2063 
2064         if (i == r3) {
2065             break;
2066         }
2067     }
2068 }
2069 
2070 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2071 {
2072     uintptr_t ra = GETPC();
2073     int i;
2074 
2075     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2076 
2077     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2078         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2079     }
2080 
2081     return 0;
2082 }
2083 
2084 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2085 {
2086     S390CPU *cpu = env_archcpu(env);
2087     CPUState *cs = env_cpu(env);
2088 
2089     /*
2090      * TODO: we currently don't handle all access protection types
2091      * (including access-list and key-controlled) as well as AR mode.
2092      */
2093     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2094         /* Fetching permitted; storing permitted */
2095         return 0;
2096     }
2097 
2098     if (env->int_pgm_code == PGM_PROTECTION) {
2099         /* retry if reading is possible */
2100         cs->exception_index = -1;
2101         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2102             /* Fetching permitted; storing not permitted */
2103             return 1;
2104         }
2105     }
2106 
2107     switch (env->int_pgm_code) {
2108     case PGM_PROTECTION:
2109         /* Fetching not permitted; storing not permitted */
2110         cs->exception_index = -1;
2111         return 2;
2112     case PGM_ADDRESSING:
2113     case PGM_TRANS_SPEC:
2114         /* exceptions forwarded to the guest */
2115         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2116         return 0;
2117     }
2118 
2119     /* Translation not available */
2120     cs->exception_index = -1;
2121     return 3;
2122 }
2123 
2124 /* insert storage key extended */
2125 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2126 {
2127     static S390SKeysState *ss;
2128     static S390SKeysClass *skeyclass;
2129     uint64_t addr = wrap_address(env, r2);
2130     uint8_t key;
2131     int rc;
2132 
2133     addr = mmu_real2abs(env, addr);
2134     if (!mmu_absolute_addr_valid(addr, false)) {
2135         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2136     }
2137 
2138     if (unlikely(!ss)) {
2139         ss = s390_get_skeys_device();
2140         skeyclass = S390_SKEYS_GET_CLASS(ss);
2141         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2142             tlb_flush_all_cpus_synced(env_cpu(env));
2143         }
2144     }
2145 
2146     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2147     if (rc) {
2148         trace_get_skeys_nonzero(rc);
2149         return 0;
2150     }
2151     return key;
2152 }
2153 
2154 /* set storage key extended */
2155 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2156 {
2157     static S390SKeysState *ss;
2158     static S390SKeysClass *skeyclass;
2159     uint64_t addr = wrap_address(env, r2);
2160     uint8_t key;
2161     int rc;
2162 
2163     addr = mmu_real2abs(env, addr);
2164     if (!mmu_absolute_addr_valid(addr, false)) {
2165         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2166     }
2167 
2168     if (unlikely(!ss)) {
2169         ss = s390_get_skeys_device();
2170         skeyclass = S390_SKEYS_GET_CLASS(ss);
2171         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2172             tlb_flush_all_cpus_synced(env_cpu(env));
2173         }
2174     }
2175 
2176     key = r1 & 0xfe;
2177     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2178     if (rc) {
2179         trace_set_skeys_nonzero(rc);
2180     }
2181    /*
2182     * As we can only flush by virtual address and not all the entries
2183     * that point to a physical address we have to flush the whole TLB.
2184     */
2185     tlb_flush_all_cpus_synced(env_cpu(env));
2186 }
2187 
2188 /* reset reference bit extended */
2189 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2190 {
2191     uint64_t addr = wrap_address(env, r2);
2192     static S390SKeysState *ss;
2193     static S390SKeysClass *skeyclass;
2194     uint8_t re, key;
2195     int rc;
2196 
2197     addr = mmu_real2abs(env, addr);
2198     if (!mmu_absolute_addr_valid(addr, false)) {
2199         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2200     }
2201 
2202     if (unlikely(!ss)) {
2203         ss = s390_get_skeys_device();
2204         skeyclass = S390_SKEYS_GET_CLASS(ss);
2205         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2206             tlb_flush_all_cpus_synced(env_cpu(env));
2207         }
2208     }
2209 
2210     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2211     if (rc) {
2212         trace_get_skeys_nonzero(rc);
2213         return 0;
2214     }
2215 
2216     re = key & (SK_R | SK_C);
2217     key &= ~SK_R;
2218 
2219     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2220     if (rc) {
2221         trace_set_skeys_nonzero(rc);
2222         return 0;
2223     }
2224    /*
2225     * As we can only flush by virtual address and not all the entries
2226     * that point to a physical address we have to flush the whole TLB.
2227     */
2228     tlb_flush_all_cpus_synced(env_cpu(env));
2229 
2230     /*
2231      * cc
2232      *
2233      * 0  Reference bit zero; change bit zero
2234      * 1  Reference bit zero; change bit one
2235      * 2  Reference bit one; change bit zero
2236      * 3  Reference bit one; change bit one
2237      */
2238 
2239     return re >> 1;
2240 }
2241 
2242 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2243                       uint64_t key)
2244 {
2245     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2246     S390Access srca, desta;
2247     uintptr_t ra = GETPC();
2248     int cc = 0;
2249 
2250     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2251                __func__, l, a1, a2);
2252 
2253     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2254         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2255         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2256     }
2257 
2258     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2259         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2260     }
2261 
2262     l = wrap_length32(env, l);
2263     if (l > 256) {
2264         /* max 256 */
2265         l = 256;
2266         cc = 3;
2267     } else if (!l) {
2268         return cc;
2269     }
2270 
2271     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2272     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2273     access_memmove(env, &desta, &srca, ra);
2274     return cc;
2275 }
2276 
2277 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2278                       uint64_t key)
2279 {
2280     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2281     S390Access srca, desta;
2282     uintptr_t ra = GETPC();
2283     int cc = 0;
2284 
2285     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2286                __func__, l, a1, a2);
2287 
2288     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2289         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2290         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2291     }
2292 
2293     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2294         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2295     }
2296 
2297     l = wrap_length32(env, l);
2298     if (l > 256) {
2299         /* max 256 */
2300         l = 256;
2301         cc = 3;
2302     } else if (!l) {
2303         return cc;
2304     }
2305     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2306     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2307     access_memmove(env, &desta, &srca, ra);
2308     return cc;
2309 }
2310 
2311 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2312 {
2313     CPUState *cs = env_cpu(env);
2314     const uintptr_t ra = GETPC();
2315     uint64_t table, entry, raddr;
2316     uint16_t entries, i, index = 0;
2317 
2318     if (r2 & 0xff000) {
2319         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2320     }
2321 
2322     if (!(r2 & 0x800)) {
2323         /* invalidation-and-clearing operation */
2324         table = r1 & ASCE_ORIGIN;
2325         entries = (r2 & 0x7ff) + 1;
2326 
2327         switch (r1 & ASCE_TYPE_MASK) {
2328         case ASCE_TYPE_REGION1:
2329             index = (r2 >> 53) & 0x7ff;
2330             break;
2331         case ASCE_TYPE_REGION2:
2332             index = (r2 >> 42) & 0x7ff;
2333             break;
2334         case ASCE_TYPE_REGION3:
2335             index = (r2 >> 31) & 0x7ff;
2336             break;
2337         case ASCE_TYPE_SEGMENT:
2338             index = (r2 >> 20) & 0x7ff;
2339             break;
2340         }
2341         for (i = 0; i < entries; i++) {
2342             /* addresses are not wrapped in 24/31bit mode but table index is */
2343             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2344             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2345             if (!(entry & REGION_ENTRY_I)) {
2346                 /* we are allowed to not store if already invalid */
2347                 entry |= REGION_ENTRY_I;
2348                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2349             }
2350         }
2351     }
2352 
2353     /* We simply flush the complete tlb, therefore we can ignore r3. */
2354     if (m4 & 1) {
2355         tlb_flush(cs);
2356     } else {
2357         tlb_flush_all_cpus_synced(cs);
2358     }
2359 }
2360 
2361 /* invalidate pte */
2362 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2363                   uint32_t m4)
2364 {
2365     CPUState *cs = env_cpu(env);
2366     const uintptr_t ra = GETPC();
2367     uint64_t page = vaddr & TARGET_PAGE_MASK;
2368     uint64_t pte_addr, pte;
2369 
2370     /* Compute the page table entry address */
2371     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2372     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2373 
2374     /* Mark the page table entry as invalid */
2375     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2376     pte |= PAGE_ENTRY_I;
2377     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2378 
2379     /* XXX we exploit the fact that Linux passes the exact virtual
2380        address here - it's not obliged to! */
2381     if (m4 & 1) {
2382         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2383             tlb_flush_page(cs, page);
2384             /* XXX 31-bit hack */
2385             tlb_flush_page(cs, page ^ 0x80000000);
2386         } else {
2387             /* looks like we don't have a valid virtual address */
2388             tlb_flush(cs);
2389         }
2390     } else {
2391         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2392             tlb_flush_page_all_cpus_synced(cs, page);
2393             /* XXX 31-bit hack */
2394             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2395         } else {
2396             /* looks like we don't have a valid virtual address */
2397             tlb_flush_all_cpus_synced(cs);
2398         }
2399     }
2400 }
2401 
2402 /* flush local tlb */
2403 void HELPER(ptlb)(CPUS390XState *env)
2404 {
2405     tlb_flush(env_cpu(env));
2406 }
2407 
2408 /* flush global tlb */
2409 void HELPER(purge)(CPUS390XState *env)
2410 {
2411     tlb_flush_all_cpus_synced(env_cpu(env));
2412 }
2413 
2414 /* load real address */
2415 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2416 {
2417     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2418     uint64_t ret, tec;
2419     int flags, exc, cc;
2420 
2421     /* XXX incomplete - has more corner cases */
2422     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2423         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2424     }
2425 
2426     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2427     if (exc) {
2428         cc = 3;
2429         ret = exc | 0x80000000;
2430     } else {
2431         cc = 0;
2432         ret |= addr & ~TARGET_PAGE_MASK;
2433     }
2434 
2435     env->cc_op = cc;
2436     return ret;
2437 }
2438 #endif
2439 
2440 /* load pair from quadword */
2441 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2442 {
2443     uintptr_t ra = GETPC();
2444     uint64_t hi, lo;
2445 
2446     check_alignment(env, addr, 16, ra);
2447     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2448     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2449 
2450     env->retxl = lo;
2451     return hi;
2452 }
2453 
2454 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2455 {
2456     uintptr_t ra = GETPC();
2457     uint64_t hi, lo;
2458     int mem_idx;
2459     MemOpIdx oi;
2460     Int128 v;
2461 
2462     assert(HAVE_ATOMIC128);
2463 
2464     mem_idx = cpu_mmu_index(env, false);
2465     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2466     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2467     hi = int128_gethi(v);
2468     lo = int128_getlo(v);
2469 
2470     env->retxl = lo;
2471     return hi;
2472 }
2473 
2474 /* store pair to quadword */
2475 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2476                   uint64_t low, uint64_t high)
2477 {
2478     uintptr_t ra = GETPC();
2479 
2480     check_alignment(env, addr, 16, ra);
2481     cpu_stq_data_ra(env, addr + 0, high, ra);
2482     cpu_stq_data_ra(env, addr + 8, low, ra);
2483 }
2484 
2485 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2486                            uint64_t low, uint64_t high)
2487 {
2488     uintptr_t ra = GETPC();
2489     int mem_idx;
2490     MemOpIdx oi;
2491     Int128 v;
2492 
2493     assert(HAVE_ATOMIC128);
2494 
2495     mem_idx = cpu_mmu_index(env, false);
2496     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2497     v = int128_make128(low, high);
2498     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2499 }
2500 
2501 /* Execute instruction.  This instruction executes an insn modified with
2502    the contents of r1.  It does not change the executed instruction in memory;
2503    it does not change the program counter.
2504 
2505    Perform this by recording the modified instruction in env->ex_value.
2506    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2507 */
2508 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2509 {
2510     uint64_t insn = cpu_lduw_code(env, addr);
2511     uint8_t opc = insn >> 8;
2512 
2513     /* Or in the contents of R1[56:63].  */
2514     insn |= r1 & 0xff;
2515 
2516     /* Load the rest of the instruction.  */
2517     insn <<= 48;
2518     switch (get_ilen(opc)) {
2519     case 2:
2520         break;
2521     case 4:
2522         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2523         break;
2524     case 6:
2525         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2526         break;
2527     default:
2528         g_assert_not_reached();
2529     }
2530 
2531     /* The very most common cases can be sped up by avoiding a new TB.  */
2532     if ((opc & 0xf0) == 0xd0) {
2533         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2534                                       uint64_t, uintptr_t);
2535         static const dx_helper dx[16] = {
2536             [0x0] = do_helper_trt_bkwd,
2537             [0x2] = do_helper_mvc,
2538             [0x4] = do_helper_nc,
2539             [0x5] = do_helper_clc,
2540             [0x6] = do_helper_oc,
2541             [0x7] = do_helper_xc,
2542             [0xc] = do_helper_tr,
2543             [0xd] = do_helper_trt_fwd,
2544         };
2545         dx_helper helper = dx[opc & 0xf];
2546 
2547         if (helper) {
2548             uint32_t l = extract64(insn, 48, 8);
2549             uint32_t b1 = extract64(insn, 44, 4);
2550             uint32_t d1 = extract64(insn, 32, 12);
2551             uint32_t b2 = extract64(insn, 28, 4);
2552             uint32_t d2 = extract64(insn, 16, 12);
2553             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2554             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2555 
2556             env->cc_op = helper(env, l, a1, a2, 0);
2557             env->psw.addr += ilen;
2558             return;
2559         }
2560     } else if (opc == 0x0a) {
2561         env->int_svc_code = extract64(insn, 48, 8);
2562         env->int_svc_ilen = ilen;
2563         helper_exception(env, EXCP_SVC);
2564         g_assert_not_reached();
2565     }
2566 
2567     /* Record the insn we want to execute as well as the ilen to use
2568        during the execution of the target insn.  This will also ensure
2569        that ex_value is non-zero, which flags that we are in a state
2570        that requires such execution.  */
2571     env->ex_value = insn | ilen;
2572 }
2573 
2574 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2575                        uint64_t len)
2576 {
2577     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2578     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2579     const uint64_t r0 = env->regs[0];
2580     const uintptr_t ra = GETPC();
2581     uint8_t dest_key, dest_as, dest_k, dest_a;
2582     uint8_t src_key, src_as, src_k, src_a;
2583     uint64_t val;
2584     int cc = 0;
2585 
2586     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2587                __func__, dest, src, len);
2588 
2589     if (!(env->psw.mask & PSW_MASK_DAT)) {
2590         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2591     }
2592 
2593     /* OAC (operand access control) for the first operand -> dest */
2594     val = (r0 & 0xffff0000ULL) >> 16;
2595     dest_key = (val >> 12) & 0xf;
2596     dest_as = (val >> 6) & 0x3;
2597     dest_k = (val >> 1) & 0x1;
2598     dest_a = val & 0x1;
2599 
2600     /* OAC (operand access control) for the second operand -> src */
2601     val = (r0 & 0x0000ffffULL);
2602     src_key = (val >> 12) & 0xf;
2603     src_as = (val >> 6) & 0x3;
2604     src_k = (val >> 1) & 0x1;
2605     src_a = val & 0x1;
2606 
2607     if (!dest_k) {
2608         dest_key = psw_key;
2609     }
2610     if (!src_k) {
2611         src_key = psw_key;
2612     }
2613     if (!dest_a) {
2614         dest_as = psw_as;
2615     }
2616     if (!src_a) {
2617         src_as = psw_as;
2618     }
2619 
2620     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2621         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2622     }
2623     if (!(env->cregs[0] & CR0_SECONDARY) &&
2624         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2625         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2626     }
2627     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2628         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2629     }
2630 
2631     len = wrap_length32(env, len);
2632     if (len > 4096) {
2633         cc = 3;
2634         len = 4096;
2635     }
2636 
2637     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2638     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2639         (env->psw.mask & PSW_MASK_PSTATE)) {
2640         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2641                       __func__);
2642         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2643     }
2644 
2645     /* FIXME: Access using correct keys and AR-mode */
2646     if (len) {
2647         S390Access srca, desta;
2648 
2649         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2650                        mmu_idx_from_as(src_as), ra);
2651         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2652                        mmu_idx_from_as(dest_as), ra);
2653 
2654         access_memmove(env, &desta, &srca, ra);
2655     }
2656 
2657     return cc;
2658 }
2659 
2660 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2661    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2662    value >= 0 indicates failure, and the CC value to be returned.  */
2663 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2664                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2665                                  uint32_t *ochar, uint32_t *olen);
2666 
2667 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2668    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2669    indicates failure, and the CC value to be returned.  */
2670 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2671                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2672                                  uint32_t *olen);
2673 
2674 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2675                        bool enh_check, uintptr_t ra,
2676                        uint32_t *ochar, uint32_t *olen)
2677 {
2678     uint8_t s0, s1, s2, s3;
2679     uint32_t c, l;
2680 
2681     if (ilen < 1) {
2682         return 0;
2683     }
2684     s0 = cpu_ldub_data_ra(env, addr, ra);
2685     if (s0 <= 0x7f) {
2686         /* one byte character */
2687         l = 1;
2688         c = s0;
2689     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2690         /* invalid character */
2691         return 2;
2692     } else if (s0 <= 0xdf) {
2693         /* two byte character */
2694         l = 2;
2695         if (ilen < 2) {
2696             return 0;
2697         }
2698         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2699         c = s0 & 0x1f;
2700         c = (c << 6) | (s1 & 0x3f);
2701         if (enh_check && (s1 & 0xc0) != 0x80) {
2702             return 2;
2703         }
2704     } else if (s0 <= 0xef) {
2705         /* three byte character */
2706         l = 3;
2707         if (ilen < 3) {
2708             return 0;
2709         }
2710         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2711         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2712         c = s0 & 0x0f;
2713         c = (c << 6) | (s1 & 0x3f);
2714         c = (c << 6) | (s2 & 0x3f);
2715         /* Fold the byte-by-byte range descriptions in the PoO into
2716            tests against the complete value.  It disallows encodings
2717            that could be smaller, and the UTF-16 surrogates.  */
2718         if (enh_check
2719             && ((s1 & 0xc0) != 0x80
2720                 || (s2 & 0xc0) != 0x80
2721                 || c < 0x1000
2722                 || (c >= 0xd800 && c <= 0xdfff))) {
2723             return 2;
2724         }
2725     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2726         /* four byte character */
2727         l = 4;
2728         if (ilen < 4) {
2729             return 0;
2730         }
2731         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2732         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2733         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2734         c = s0 & 0x07;
2735         c = (c << 6) | (s1 & 0x3f);
2736         c = (c << 6) | (s2 & 0x3f);
2737         c = (c << 6) | (s3 & 0x3f);
2738         /* See above.  */
2739         if (enh_check
2740             && ((s1 & 0xc0) != 0x80
2741                 || (s2 & 0xc0) != 0x80
2742                 || (s3 & 0xc0) != 0x80
2743                 || c < 0x010000
2744                 || c > 0x10ffff)) {
2745             return 2;
2746         }
2747     } else {
2748         /* invalid character */
2749         return 2;
2750     }
2751 
2752     *ochar = c;
2753     *olen = l;
2754     return -1;
2755 }
2756 
2757 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2758                         bool enh_check, uintptr_t ra,
2759                         uint32_t *ochar, uint32_t *olen)
2760 {
2761     uint16_t s0, s1;
2762     uint32_t c, l;
2763 
2764     if (ilen < 2) {
2765         return 0;
2766     }
2767     s0 = cpu_lduw_data_ra(env, addr, ra);
2768     if ((s0 & 0xfc00) != 0xd800) {
2769         /* one word character */
2770         l = 2;
2771         c = s0;
2772     } else {
2773         /* two word character */
2774         l = 4;
2775         if (ilen < 4) {
2776             return 0;
2777         }
2778         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2779         c = extract32(s0, 6, 4) + 1;
2780         c = (c << 6) | (s0 & 0x3f);
2781         c = (c << 10) | (s1 & 0x3ff);
2782         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2783             /* invalid surrogate character */
2784             return 2;
2785         }
2786     }
2787 
2788     *ochar = c;
2789     *olen = l;
2790     return -1;
2791 }
2792 
2793 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2794                         bool enh_check, uintptr_t ra,
2795                         uint32_t *ochar, uint32_t *olen)
2796 {
2797     uint32_t c;
2798 
2799     if (ilen < 4) {
2800         return 0;
2801     }
2802     c = cpu_ldl_data_ra(env, addr, ra);
2803     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2804         /* invalid unicode character */
2805         return 2;
2806     }
2807 
2808     *ochar = c;
2809     *olen = 4;
2810     return -1;
2811 }
2812 
2813 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2814                        uintptr_t ra, uint32_t c, uint32_t *olen)
2815 {
2816     uint8_t d[4];
2817     uint32_t l, i;
2818 
2819     if (c <= 0x7f) {
2820         /* one byte character */
2821         l = 1;
2822         d[0] = c;
2823     } else if (c <= 0x7ff) {
2824         /* two byte character */
2825         l = 2;
2826         d[1] = 0x80 | extract32(c, 0, 6);
2827         d[0] = 0xc0 | extract32(c, 6, 5);
2828     } else if (c <= 0xffff) {
2829         /* three byte character */
2830         l = 3;
2831         d[2] = 0x80 | extract32(c, 0, 6);
2832         d[1] = 0x80 | extract32(c, 6, 6);
2833         d[0] = 0xe0 | extract32(c, 12, 4);
2834     } else {
2835         /* four byte character */
2836         l = 4;
2837         d[3] = 0x80 | extract32(c, 0, 6);
2838         d[2] = 0x80 | extract32(c, 6, 6);
2839         d[1] = 0x80 | extract32(c, 12, 6);
2840         d[0] = 0xf0 | extract32(c, 18, 3);
2841     }
2842 
2843     if (ilen < l) {
2844         return 1;
2845     }
2846     for (i = 0; i < l; ++i) {
2847         cpu_stb_data_ra(env, addr + i, d[i], ra);
2848     }
2849 
2850     *olen = l;
2851     return -1;
2852 }
2853 
2854 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2855                         uintptr_t ra, uint32_t c, uint32_t *olen)
2856 {
2857     uint16_t d0, d1;
2858 
2859     if (c <= 0xffff) {
2860         /* one word character */
2861         if (ilen < 2) {
2862             return 1;
2863         }
2864         cpu_stw_data_ra(env, addr, c, ra);
2865         *olen = 2;
2866     } else {
2867         /* two word character */
2868         if (ilen < 4) {
2869             return 1;
2870         }
2871         d1 = 0xdc00 | extract32(c, 0, 10);
2872         d0 = 0xd800 | extract32(c, 10, 6);
2873         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2874         cpu_stw_data_ra(env, addr + 0, d0, ra);
2875         cpu_stw_data_ra(env, addr + 2, d1, ra);
2876         *olen = 4;
2877     }
2878 
2879     return -1;
2880 }
2881 
2882 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2883                         uintptr_t ra, uint32_t c, uint32_t *olen)
2884 {
2885     if (ilen < 4) {
2886         return 1;
2887     }
2888     cpu_stl_data_ra(env, addr, c, ra);
2889     *olen = 4;
2890     return -1;
2891 }
2892 
2893 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2894                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2895                                        decode_unicode_fn decode,
2896                                        encode_unicode_fn encode)
2897 {
2898     uint64_t dst = get_address(env, r1);
2899     uint64_t dlen = get_length(env, r1 + 1);
2900     uint64_t src = get_address(env, r2);
2901     uint64_t slen = get_length(env, r2 + 1);
2902     bool enh_check = m3 & 1;
2903     int cc, i;
2904 
2905     /* Lest we fail to service interrupts in a timely manner, limit the
2906        amount of work we're willing to do.  For now, let's cap at 256.  */
2907     for (i = 0; i < 256; ++i) {
2908         uint32_t c, ilen, olen;
2909 
2910         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2911         if (unlikely(cc >= 0)) {
2912             break;
2913         }
2914         cc = encode(env, dst, dlen, ra, c, &olen);
2915         if (unlikely(cc >= 0)) {
2916             break;
2917         }
2918 
2919         src += ilen;
2920         slen -= ilen;
2921         dst += olen;
2922         dlen -= olen;
2923         cc = 3;
2924     }
2925 
2926     set_address(env, r1, dst);
2927     set_length(env, r1 + 1, dlen);
2928     set_address(env, r2, src);
2929     set_length(env, r2 + 1, slen);
2930 
2931     return cc;
2932 }
2933 
2934 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2935 {
2936     return convert_unicode(env, r1, r2, m3, GETPC(),
2937                            decode_utf8, encode_utf16);
2938 }
2939 
2940 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2941 {
2942     return convert_unicode(env, r1, r2, m3, GETPC(),
2943                            decode_utf8, encode_utf32);
2944 }
2945 
2946 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2947 {
2948     return convert_unicode(env, r1, r2, m3, GETPC(),
2949                            decode_utf16, encode_utf8);
2950 }
2951 
2952 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2953 {
2954     return convert_unicode(env, r1, r2, m3, GETPC(),
2955                            decode_utf16, encode_utf32);
2956 }
2957 
2958 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2959 {
2960     return convert_unicode(env, r1, r2, m3, GETPC(),
2961                            decode_utf32, encode_utf8);
2962 }
2963 
2964 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2965 {
2966     return convert_unicode(env, r1, r2, m3, GETPC(),
2967                            decode_utf32, encode_utf16);
2968 }
2969 
2970 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2971                         uintptr_t ra)
2972 {
2973     /* test the actual access, not just any access to the page due to LAP */
2974     while (len) {
2975         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2976         const uint64_t curlen = MIN(pagelen, len);
2977 
2978         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2979         addr = wrap_address(env, addr + curlen);
2980         len -= curlen;
2981     }
2982 }
2983 
2984 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2985 {
2986     probe_write_access(env, addr, len, GETPC());
2987 }
2988