xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 2bfd3c48)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/page-protection.h"
29 #include "exec/cpu_ldst.h"
30 #include "hw/core/tcg-cpu-ops.h"
31 #include "qemu/int128.h"
32 #include "qemu/atomic128.h"
33 #include "trace.h"
34 
35 #if !defined(CONFIG_USER_ONLY)
36 #include "hw/s390x/storage-keys.h"
37 #include "hw/boards.h"
38 #endif
39 
40 #ifdef CONFIG_USER_ONLY
41 # define user_or_likely(X)    true
42 #else
43 # define user_or_likely(X)    likely(X)
44 #endif
45 
46 /*****************************************************************************/
47 /* Softmmu support */
48 
49 /* #define DEBUG_HELPER */
50 #ifdef DEBUG_HELPER
51 #define HELPER_LOG(x...) qemu_log(x)
52 #else
53 #define HELPER_LOG(x...)
54 #endif
55 
56 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
57 {
58     uint16_t pkm = env->cregs[3] >> 16;
59 
60     if (env->psw.mask & PSW_MASK_PSTATE) {
61         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
62         return pkm & (0x8000 >> psw_key);
63     }
64     return true;
65 }
66 
67 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
68                                    uint64_t src, uint32_t len)
69 {
70     if (!len || src == dest) {
71         return false;
72     }
73     /* Take care of wrapping at the end of address space. */
74     if (unlikely(wrap_address(env, src + len - 1) < src)) {
75         return dest > src || dest <= wrap_address(env, src + len - 1);
76     }
77     return dest > src && dest <= src + len - 1;
78 }
79 
80 /* Trigger a SPECIFICATION exception if an address or a length is not
81    naturally aligned.  */
82 static inline void check_alignment(CPUS390XState *env, uint64_t v,
83                                    int wordsize, uintptr_t ra)
84 {
85     if (v % wordsize) {
86         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
87     }
88 }
89 
90 /* Load a value from memory according to its size.  */
91 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
92                                            int wordsize, uintptr_t ra)
93 {
94     switch (wordsize) {
95     case 1:
96         return cpu_ldub_data_ra(env, addr, ra);
97     case 2:
98         return cpu_lduw_data_ra(env, addr, ra);
99     default:
100         abort();
101     }
102 }
103 
104 /* Store a to memory according to its size.  */
105 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
106                                       uint64_t value, int wordsize,
107                                       uintptr_t ra)
108 {
109     switch (wordsize) {
110     case 1:
111         cpu_stb_data_ra(env, addr, value, ra);
112         break;
113     case 2:
114         cpu_stw_data_ra(env, addr, value, ra);
115         break;
116     default:
117         abort();
118     }
119 }
120 
121 /* An access covers at most 4096 bytes and therefore at most two pages. */
122 typedef struct S390Access {
123     target_ulong vaddr1;
124     target_ulong vaddr2;
125     void *haddr1;
126     void *haddr2;
127     uint16_t size1;
128     uint16_t size2;
129     /*
130      * If we can't access the host page directly, we'll have to do I/O access
131      * via ld/st helpers. These are internal details, so we store the
132      * mmu idx to do the access here instead of passing it around in the
133      * helpers.
134      */
135     int mmu_idx;
136 } S390Access;
137 
138 /*
139  * With nonfault=1, return the PGM_ exception that would have been injected
140  * into the guest; return 0 if no exception was detected.
141  *
142  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
143  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
144  */
145 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
146                                     int size, MMUAccessType access_type,
147                                     int mmu_idx, bool nonfault,
148                                     void **phost, uintptr_t ra)
149 {
150     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
151                                    nonfault, phost, ra);
152 
153     if (unlikely(flags & TLB_INVALID_MASK)) {
154 #ifdef CONFIG_USER_ONLY
155         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
156         env->__excp_addr = addr & TARGET_PAGE_MASK;
157         return (page_get_flags(addr) & PAGE_VALID
158                 ? PGM_PROTECTION : PGM_ADDRESSING);
159 #else
160         return env->tlb_fill_exc;
161 #endif
162     }
163 
164 #ifndef CONFIG_USER_ONLY
165     if (unlikely(flags & TLB_WATCHPOINT)) {
166         /* S390 does not presently use transaction attributes. */
167         cpu_check_watchpoint(env_cpu(env), addr, size,
168                              MEMTXATTRS_UNSPECIFIED,
169                              (access_type == MMU_DATA_STORE
170                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
171     }
172 #endif
173 
174     return 0;
175 }
176 
177 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
178                              bool nonfault, vaddr vaddr1, int size,
179                              MMUAccessType access_type,
180                              int mmu_idx, uintptr_t ra)
181 {
182     int size1, size2, exc;
183 
184     assert(size > 0 && size <= 4096);
185 
186     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
187     size2 = size - size1;
188 
189     memset(access, 0, sizeof(*access));
190     access->vaddr1 = vaddr1;
191     access->size1 = size1;
192     access->size2 = size2;
193     access->mmu_idx = mmu_idx;
194 
195     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
196                             &access->haddr1, ra);
197     if (unlikely(exc)) {
198         return exc;
199     }
200     if (unlikely(size2)) {
201         /* The access crosses page boundaries. */
202         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
203 
204         access->vaddr2 = vaddr2;
205         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
206                                 nonfault, &access->haddr2, ra);
207         if (unlikely(exc)) {
208             return exc;
209         }
210     }
211     return 0;
212 }
213 
214 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
215                                   vaddr vaddr, int size,
216                                   MMUAccessType access_type, int mmu_idx,
217                                   uintptr_t ra)
218 {
219     int exc = access_prepare_nf(ret, env, false, vaddr, size,
220                                 access_type, mmu_idx, ra);
221     assert(!exc);
222 }
223 
224 /* Helper to handle memset on a single page. */
225 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
226                              uint8_t byte, uint16_t size, int mmu_idx,
227                              uintptr_t ra)
228 {
229 #ifdef CONFIG_USER_ONLY
230     memset(haddr, byte, size);
231 #else
232     if (likely(haddr)) {
233         memset(haddr, byte, size);
234     } else {
235         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
236         for (int i = 0; i < size; i++) {
237             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
238         }
239     }
240 #endif
241 }
242 
243 static void access_memset(CPUS390XState *env, S390Access *desta,
244                           uint8_t byte, uintptr_t ra)
245 {
246 
247     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
248                      desta->mmu_idx, ra);
249     if (likely(!desta->size2)) {
250         return;
251     }
252     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
253                      desta->mmu_idx, ra);
254 }
255 
256 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
257                                int offset, uintptr_t ra)
258 {
259     target_ulong vaddr = access->vaddr1;
260     void *haddr = access->haddr1;
261 
262     if (unlikely(offset >= access->size1)) {
263         offset -= access->size1;
264         vaddr = access->vaddr2;
265         haddr = access->haddr2;
266     }
267 
268     if (user_or_likely(haddr)) {
269         return ldub_p(haddr + offset);
270     } else {
271         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
272         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
273     }
274 }
275 
276 static void access_set_byte(CPUS390XState *env, S390Access *access,
277                             int offset, uint8_t byte, uintptr_t ra)
278 {
279     target_ulong vaddr = access->vaddr1;
280     void *haddr = access->haddr1;
281 
282     if (unlikely(offset >= access->size1)) {
283         offset -= access->size1;
284         vaddr = access->vaddr2;
285         haddr = access->haddr2;
286     }
287 
288     if (user_or_likely(haddr)) {
289         stb_p(haddr + offset, byte);
290     } else {
291         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
292         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
293     }
294 }
295 
296 /*
297  * Move data with the same semantics as memmove() in case ranges don't overlap
298  * or src > dest. Undefined behavior on destructive overlaps.
299  */
300 static void access_memmove(CPUS390XState *env, S390Access *desta,
301                            S390Access *srca, uintptr_t ra)
302 {
303     int len = desta->size1 + desta->size2;
304     int diff;
305 
306     assert(len == srca->size1 + srca->size2);
307 
308     /* Fallback to slow access in case we don't have access to all host pages */
309     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
310                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
311         int i;
312 
313         for (i = 0; i < len; i++) {
314             uint8_t byte = access_get_byte(env, srca, i, ra);
315 
316             access_set_byte(env, desta, i, byte, ra);
317         }
318         return;
319     }
320 
321     diff = desta->size1 - srca->size1;
322     if (likely(diff == 0)) {
323         memmove(desta->haddr1, srca->haddr1, srca->size1);
324         if (unlikely(srca->size2)) {
325             memmove(desta->haddr2, srca->haddr2, srca->size2);
326         }
327     } else if (diff > 0) {
328         memmove(desta->haddr1, srca->haddr1, srca->size1);
329         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
330         if (likely(desta->size2)) {
331             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
332         }
333     } else {
334         diff = -diff;
335         memmove(desta->haddr1, srca->haddr1, desta->size1);
336         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
337         if (likely(srca->size2)) {
338             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
339         }
340     }
341 }
342 
343 static int mmu_idx_from_as(uint8_t as)
344 {
345     switch (as) {
346     case AS_PRIMARY:
347         return MMU_PRIMARY_IDX;
348     case AS_SECONDARY:
349         return MMU_SECONDARY_IDX;
350     case AS_HOME:
351         return MMU_HOME_IDX;
352     default:
353         /* FIXME AS_ACCREG */
354         g_assert_not_reached();
355     }
356 }
357 
358 /* and on array */
359 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
360                              uint64_t src, uintptr_t ra)
361 {
362     const int mmu_idx = s390x_env_mmu_index(env, false);
363     S390Access srca1, srca2, desta;
364     uint32_t i;
365     uint8_t c = 0;
366 
367     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
368                __func__, l, dest, src);
369 
370     /* NC always processes one more byte than specified - maximum is 256 */
371     l++;
372 
373     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
374     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
375     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
376     for (i = 0; i < l; i++) {
377         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
378                           access_get_byte(env, &srca2, i, ra);
379 
380         c |= x;
381         access_set_byte(env, &desta, i, x, ra);
382     }
383     return c != 0;
384 }
385 
386 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
387                     uint64_t src)
388 {
389     return do_helper_nc(env, l, dest, src, GETPC());
390 }
391 
392 /* xor on array */
393 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
394                              uint64_t src, uintptr_t ra)
395 {
396     const int mmu_idx = s390x_env_mmu_index(env, false);
397     S390Access srca1, srca2, desta;
398     uint32_t i;
399     uint8_t c = 0;
400 
401     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
402                __func__, l, dest, src);
403 
404     /* XC always processes one more byte than specified - maximum is 256 */
405     l++;
406 
407     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
408     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
409     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
410 
411     /* xor with itself is the same as memset(0) */
412     if (src == dest) {
413         access_memset(env, &desta, 0, ra);
414         return 0;
415     }
416 
417     for (i = 0; i < l; i++) {
418         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
419                           access_get_byte(env, &srca2, i, ra);
420 
421         c |= x;
422         access_set_byte(env, &desta, i, x, ra);
423     }
424     return c != 0;
425 }
426 
427 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
428                     uint64_t src)
429 {
430     return do_helper_xc(env, l, dest, src, GETPC());
431 }
432 
433 /* or on array */
434 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
435                              uint64_t src, uintptr_t ra)
436 {
437     const int mmu_idx = s390x_env_mmu_index(env, false);
438     S390Access srca1, srca2, desta;
439     uint32_t i;
440     uint8_t c = 0;
441 
442     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
443                __func__, l, dest, src);
444 
445     /* OC always processes one more byte than specified - maximum is 256 */
446     l++;
447 
448     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
449     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
450     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
451     for (i = 0; i < l; i++) {
452         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
453                           access_get_byte(env, &srca2, i, ra);
454 
455         c |= x;
456         access_set_byte(env, &desta, i, x, ra);
457     }
458     return c != 0;
459 }
460 
461 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
462                     uint64_t src)
463 {
464     return do_helper_oc(env, l, dest, src, GETPC());
465 }
466 
467 /* memmove */
468 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
469                               uint64_t src, uintptr_t ra)
470 {
471     const int mmu_idx = s390x_env_mmu_index(env, false);
472     S390Access srca, desta;
473     uint32_t i;
474 
475     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
476                __func__, l, dest, src);
477 
478     /* MVC always copies one more byte than specified - maximum is 256 */
479     l++;
480 
481     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
482     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
483 
484     /*
485      * "When the operands overlap, the result is obtained as if the operands
486      * were processed one byte at a time". Only non-destructive overlaps
487      * behave like memmove().
488      */
489     if (dest == src + 1) {
490         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
491     } else if (!is_destructive_overlap(env, dest, src, l)) {
492         access_memmove(env, &desta, &srca, ra);
493     } else {
494         for (i = 0; i < l; i++) {
495             uint8_t byte = access_get_byte(env, &srca, i, ra);
496 
497             access_set_byte(env, &desta, i, byte, ra);
498         }
499     }
500 
501     return env->cc_op;
502 }
503 
504 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
505 {
506     do_helper_mvc(env, l, dest, src, GETPC());
507 }
508 
509 /* move right to left */
510 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
511 {
512     const int mmu_idx = s390x_env_mmu_index(env, false);
513     const uint64_t ra = GETPC();
514     S390Access srca, desta;
515     int32_t i;
516 
517     /* MVCRL always copies one more byte than specified - maximum is 256 */
518     l &= 0xff;
519     l++;
520 
521     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
522     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
523 
524     for (i = l - 1; i >= 0; i--) {
525         uint8_t byte = access_get_byte(env, &srca, i, ra);
526         access_set_byte(env, &desta, i, byte, ra);
527     }
528 }
529 
530 /* move inverse  */
531 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
532 {
533     const int mmu_idx = s390x_env_mmu_index(env, false);
534     S390Access srca, desta;
535     uintptr_t ra = GETPC();
536     int i;
537 
538     /* MVCIN always copies one more byte than specified - maximum is 256 */
539     l++;
540 
541     src = wrap_address(env, src - l + 1);
542     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
543     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
544     for (i = 0; i < l; i++) {
545         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
546 
547         access_set_byte(env, &desta, i, x, ra);
548     }
549 }
550 
551 /* move numerics  */
552 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
553 {
554     const int mmu_idx = s390x_env_mmu_index(env, false);
555     S390Access srca1, srca2, desta;
556     uintptr_t ra = GETPC();
557     int i;
558 
559     /* MVN always copies one more byte than specified - maximum is 256 */
560     l++;
561 
562     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
563     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
564     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
565     for (i = 0; i < l; i++) {
566         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
567                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
568 
569         access_set_byte(env, &desta, i, x, ra);
570     }
571 }
572 
573 /* move with offset  */
574 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
575 {
576     const int mmu_idx = s390x_env_mmu_index(env, false);
577     /* MVO always processes one more byte than specified - maximum is 16 */
578     const int len_dest = (l >> 4) + 1;
579     const int len_src = (l & 0xf) + 1;
580     uintptr_t ra = GETPC();
581     uint8_t byte_dest, byte_src;
582     S390Access srca, desta;
583     int i, j;
584 
585     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
586     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
587 
588     /* Handle rightmost byte */
589     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
590     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
591     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
592     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
593 
594     /* Process remaining bytes from right to left */
595     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
596         byte_dest = byte_src >> 4;
597         if (j >= 0) {
598             byte_src = access_get_byte(env, &srca, j, ra);
599         } else {
600             byte_src = 0;
601         }
602         byte_dest |= byte_src << 4;
603         access_set_byte(env, &desta, i, byte_dest, ra);
604     }
605 }
606 
607 /* move zones  */
608 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
609 {
610     const int mmu_idx = s390x_env_mmu_index(env, false);
611     S390Access srca1, srca2, desta;
612     uintptr_t ra = GETPC();
613     int i;
614 
615     /* MVZ always copies one more byte than specified - maximum is 256 */
616     l++;
617 
618     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
619     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
620     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
621     for (i = 0; i < l; i++) {
622         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
623                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
624 
625         access_set_byte(env, &desta, i, x, ra);
626     }
627 }
628 
629 /* compare unsigned byte arrays */
630 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
631                               uint64_t s2, uintptr_t ra)
632 {
633     uint32_t i;
634     uint32_t cc = 0;
635 
636     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
637                __func__, l, s1, s2);
638 
639     for (i = 0; i <= l; i++) {
640         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
641         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
642         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
643         if (x < y) {
644             cc = 1;
645             break;
646         } else if (x > y) {
647             cc = 2;
648             break;
649         }
650     }
651 
652     HELPER_LOG("\n");
653     return cc;
654 }
655 
656 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
657 {
658     return do_helper_clc(env, l, s1, s2, GETPC());
659 }
660 
661 /* compare logical under mask */
662 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
663                      uint64_t addr)
664 {
665     uintptr_t ra = GETPC();
666     uint32_t cc = 0;
667 
668     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
669                mask, addr);
670 
671     if (!mask) {
672         /* Recognize access exceptions for the first byte */
673         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
674     }
675 
676     while (mask) {
677         if (mask & 8) {
678             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
679             uint8_t r = extract32(r1, 24, 8);
680             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
681                        addr);
682             if (r < d) {
683                 cc = 1;
684                 break;
685             } else if (r > d) {
686                 cc = 2;
687                 break;
688             }
689             addr++;
690         }
691         mask = (mask << 1) & 0xf;
692         r1 <<= 8;
693     }
694 
695     HELPER_LOG("\n");
696     return cc;
697 }
698 
699 static inline uint64_t get_address(CPUS390XState *env, int reg)
700 {
701     return wrap_address(env, env->regs[reg]);
702 }
703 
704 /*
705  * Store the address to the given register, zeroing out unused leftmost
706  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
707  */
708 static inline void set_address_zero(CPUS390XState *env, int reg,
709                                     uint64_t address)
710 {
711     if (env->psw.mask & PSW_MASK_64) {
712         env->regs[reg] = address;
713     } else {
714         if (!(env->psw.mask & PSW_MASK_32)) {
715             address &= 0x00ffffff;
716         } else {
717             address &= 0x7fffffff;
718         }
719         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
720     }
721 }
722 
723 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
724 {
725     if (env->psw.mask & PSW_MASK_64) {
726         /* 64-Bit mode */
727         env->regs[reg] = address;
728     } else {
729         if (!(env->psw.mask & PSW_MASK_32)) {
730             /* 24-Bit mode. According to the PoO it is implementation
731             dependent if bits 32-39 remain unchanged or are set to
732             zeros.  Choose the former so that the function can also be
733             used for TRT.  */
734             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
735         } else {
736             /* 31-Bit mode. According to the PoO it is implementation
737             dependent if bit 32 remains unchanged or is set to zero.
738             Choose the latter so that the function can also be used for
739             TRT.  */
740             address &= 0x7fffffff;
741             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
742         }
743     }
744 }
745 
746 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
747 {
748     if (!(env->psw.mask & PSW_MASK_64)) {
749         return (uint32_t)length;
750     }
751     return length;
752 }
753 
754 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
755 {
756     if (!(env->psw.mask & PSW_MASK_64)) {
757         /* 24-Bit and 31-Bit mode */
758         length &= 0x7fffffff;
759     }
760     return length;
761 }
762 
763 static inline uint64_t get_length(CPUS390XState *env, int reg)
764 {
765     return wrap_length31(env, env->regs[reg]);
766 }
767 
768 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
769 {
770     if (env->psw.mask & PSW_MASK_64) {
771         /* 64-Bit mode */
772         env->regs[reg] = length;
773     } else {
774         /* 24-Bit and 31-Bit mode */
775         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
776     }
777 }
778 
779 /* search string (c is byte to search, r2 is string, r1 end of string) */
780 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
781 {
782     uintptr_t ra = GETPC();
783     uint64_t end, str;
784     uint32_t len;
785     uint8_t v, c = env->regs[0];
786 
787     /* Bits 32-55 must contain all 0.  */
788     if (env->regs[0] & 0xffffff00u) {
789         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
790     }
791 
792     str = get_address(env, r2);
793     end = get_address(env, r1);
794 
795     /* Lest we fail to service interrupts in a timely manner, limit the
796        amount of work we're willing to do.  For now, let's cap at 8k.  */
797     for (len = 0; len < 0x2000; ++len) {
798         if (str + len == end) {
799             /* Character not found.  R1 & R2 are unmodified.  */
800             env->cc_op = 2;
801             return;
802         }
803         v = cpu_ldub_data_ra(env, str + len, ra);
804         if (v == c) {
805             /* Character found.  Set R1 to the location; R2 is unmodified.  */
806             env->cc_op = 1;
807             set_address(env, r1, str + len);
808             return;
809         }
810     }
811 
812     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
813     env->cc_op = 3;
814     set_address(env, r2, str + len);
815 }
816 
817 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
818 {
819     uintptr_t ra = GETPC();
820     uint32_t len;
821     uint16_t v, c = env->regs[0];
822     uint64_t end, str, adj_end;
823 
824     /* Bits 32-47 of R0 must be zero.  */
825     if (env->regs[0] & 0xffff0000u) {
826         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
827     }
828 
829     str = get_address(env, r2);
830     end = get_address(env, r1);
831 
832     /* If the LSB of the two addresses differ, use one extra byte.  */
833     adj_end = end + ((str ^ end) & 1);
834 
835     /* Lest we fail to service interrupts in a timely manner, limit the
836        amount of work we're willing to do.  For now, let's cap at 8k.  */
837     for (len = 0; len < 0x2000; len += 2) {
838         if (str + len == adj_end) {
839             /* End of input found.  */
840             env->cc_op = 2;
841             return;
842         }
843         v = cpu_lduw_data_ra(env, str + len, ra);
844         if (v == c) {
845             /* Character found.  Set R1 to the location; R2 is unmodified.  */
846             env->cc_op = 1;
847             set_address(env, r1, str + len);
848             return;
849         }
850     }
851 
852     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
853     env->cc_op = 3;
854     set_address(env, r2, str + len);
855 }
856 
857 /* unsigned string compare (c is string terminator) */
858 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
859 {
860     uintptr_t ra = GETPC();
861     uint32_t len;
862 
863     c = c & 0xff;
864     s1 = wrap_address(env, s1);
865     s2 = wrap_address(env, s2);
866 
867     /* Lest we fail to service interrupts in a timely manner, limit the
868        amount of work we're willing to do.  For now, let's cap at 8k.  */
869     for (len = 0; len < 0x2000; ++len) {
870         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
871         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
872         if (v1 == v2) {
873             if (v1 == c) {
874                 /* Equal.  CC=0, and don't advance the registers.  */
875                 env->cc_op = 0;
876                 return int128_make128(s2, s1);
877             }
878         } else {
879             /* Unequal.  CC={1,2}, and advance the registers.  Note that
880                the terminator need not be zero, but the string that contains
881                the terminator is by definition "low".  */
882             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
883             return int128_make128(s2 + len, s1 + len);
884         }
885     }
886 
887     /* CPU-determined bytes equal; advance the registers.  */
888     env->cc_op = 3;
889     return int128_make128(s2 + len, s1 + len);
890 }
891 
892 /* move page */
893 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
894 {
895     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
896     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
897     const int mmu_idx = s390x_env_mmu_index(env, false);
898     const bool f = extract64(r0, 11, 1);
899     const bool s = extract64(r0, 10, 1);
900     const bool cco = extract64(r0, 8, 1);
901     uintptr_t ra = GETPC();
902     S390Access srca, desta;
903     int exc;
904 
905     if ((f && s) || extract64(r0, 12, 4)) {
906         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
907     }
908 
909     /*
910      * We always manually handle exceptions such that we can properly store
911      * r1/r2 to the lowcore on page-translation exceptions.
912      *
913      * TODO: Access key handling
914      */
915     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
916                             MMU_DATA_LOAD, mmu_idx, ra);
917     if (exc) {
918         if (cco) {
919             return 2;
920         }
921         goto inject_exc;
922     }
923     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
924                             MMU_DATA_STORE, mmu_idx, ra);
925     if (exc) {
926         if (cco && exc != PGM_PROTECTION) {
927             return 1;
928         }
929         goto inject_exc;
930     }
931     access_memmove(env, &desta, &srca, ra);
932     return 0; /* data moved */
933 inject_exc:
934 #if !defined(CONFIG_USER_ONLY)
935     if (exc != PGM_ADDRESSING) {
936         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
937                  env->tlb_fill_tec);
938     }
939     if (exc == PGM_PAGE_TRANS) {
940         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
941                  r1 << 4 | r2);
942     }
943 #endif
944     tcg_s390_program_interrupt(env, exc, ra);
945 }
946 
947 /* string copy */
948 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
949 {
950     const int mmu_idx = s390x_env_mmu_index(env, false);
951     const uint64_t d = get_address(env, r1);
952     const uint64_t s = get_address(env, r2);
953     const uint8_t c = env->regs[0];
954     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
955     S390Access srca, desta;
956     uintptr_t ra = GETPC();
957     int i;
958 
959     if (env->regs[0] & 0xffffff00ull) {
960         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
961     }
962 
963     /*
964      * Our access should not exceed single pages, as we must not report access
965      * exceptions exceeding the actually copied range (which we don't know at
966      * this point). We might over-indicate watchpoints within the pages
967      * (if we ever care, we have to limit processing to a single byte).
968      */
969     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
970     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
971     for (i = 0; i < len; i++) {
972         const uint8_t v = access_get_byte(env, &srca, i, ra);
973 
974         access_set_byte(env, &desta, i, v, ra);
975         if (v == c) {
976             set_address_zero(env, r1, d + i);
977             return 1;
978         }
979     }
980     set_address_zero(env, r1, d + len);
981     set_address_zero(env, r2, s + len);
982     return 3;
983 }
984 
985 /* load access registers r1 to r3 from memory at a2 */
986 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
987 {
988     uintptr_t ra = GETPC();
989     int i;
990 
991     if (a2 & 0x3) {
992         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
993     }
994 
995     for (i = r1;; i = (i + 1) % 16) {
996         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
997         a2 += 4;
998 
999         if (i == r3) {
1000             break;
1001         }
1002     }
1003 }
1004 
1005 /* store access registers r1 to r3 in memory at a2 */
1006 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1007 {
1008     uintptr_t ra = GETPC();
1009     int i;
1010 
1011     if (a2 & 0x3) {
1012         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1013     }
1014 
1015     for (i = r1;; i = (i + 1) % 16) {
1016         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1017         a2 += 4;
1018 
1019         if (i == r3) {
1020             break;
1021         }
1022     }
1023 }
1024 
1025 /* move long helper */
1026 static inline uint32_t do_mvcl(CPUS390XState *env,
1027                                uint64_t *dest, uint64_t *destlen,
1028                                uint64_t *src, uint64_t *srclen,
1029                                uint16_t pad, int wordsize, uintptr_t ra)
1030 {
1031     const int mmu_idx = s390x_env_mmu_index(env, false);
1032     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1033     S390Access srca, desta;
1034     int i, cc;
1035 
1036     if (*destlen == *srclen) {
1037         cc = 0;
1038     } else if (*destlen < *srclen) {
1039         cc = 1;
1040     } else {
1041         cc = 2;
1042     }
1043 
1044     if (!*destlen) {
1045         return cc;
1046     }
1047 
1048     /*
1049      * Only perform one type of type of operation (move/pad) at a time.
1050      * Stay within single pages.
1051      */
1052     if (*srclen) {
1053         /* Copy the src array */
1054         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1055         *destlen -= len;
1056         *srclen -= len;
1057         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1058         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1059         access_memmove(env, &desta, &srca, ra);
1060         *src = wrap_address(env, *src + len);
1061         *dest = wrap_address(env, *dest + len);
1062     } else if (wordsize == 1) {
1063         /* Pad the remaining area */
1064         *destlen -= len;
1065         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1066         access_memset(env, &desta, pad, ra);
1067         *dest = wrap_address(env, *dest + len);
1068     } else {
1069         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1070 
1071         /* The remaining length selects the padding byte. */
1072         for (i = 0; i < len; (*destlen)--, i++) {
1073             if (*destlen & 1) {
1074                 access_set_byte(env, &desta, i, pad, ra);
1075             } else {
1076                 access_set_byte(env, &desta, i, pad >> 8, ra);
1077             }
1078         }
1079         *dest = wrap_address(env, *dest + len);
1080     }
1081 
1082     return *destlen ? 3 : cc;
1083 }
1084 
1085 /* move long */
1086 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1087 {
1088     const int mmu_idx = s390x_env_mmu_index(env, false);
1089     uintptr_t ra = GETPC();
1090     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1091     uint64_t dest = get_address(env, r1);
1092     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1093     uint64_t src = get_address(env, r2);
1094     uint8_t pad = env->regs[r2 + 1] >> 24;
1095     CPUState *cs = env_cpu(env);
1096     S390Access srca, desta;
1097     uint32_t cc, cur_len;
1098 
1099     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1100         cc = 3;
1101     } else if (srclen == destlen) {
1102         cc = 0;
1103     } else if (destlen < srclen) {
1104         cc = 1;
1105     } else {
1106         cc = 2;
1107     }
1108 
1109     /* We might have to zero-out some bits even if there was no action. */
1110     if (unlikely(!destlen || cc == 3)) {
1111         set_address_zero(env, r2, src);
1112         set_address_zero(env, r1, dest);
1113         return cc;
1114     } else if (!srclen) {
1115         set_address_zero(env, r2, src);
1116     }
1117 
1118     /*
1119      * Only perform one type of type of operation (move/pad) in one step.
1120      * Stay within single pages.
1121      */
1122     while (destlen) {
1123         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1124         if (!srclen) {
1125             access_prepare(&desta, env, dest, cur_len,
1126                            MMU_DATA_STORE, mmu_idx, ra);
1127             access_memset(env, &desta, pad, ra);
1128         } else {
1129             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1130 
1131             access_prepare(&srca, env, src, cur_len,
1132                            MMU_DATA_LOAD, mmu_idx, ra);
1133             access_prepare(&desta, env, dest, cur_len,
1134                            MMU_DATA_STORE, mmu_idx, ra);
1135             access_memmove(env, &desta, &srca, ra);
1136             src = wrap_address(env, src + cur_len);
1137             srclen -= cur_len;
1138             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1139             set_address_zero(env, r2, src);
1140         }
1141         dest = wrap_address(env, dest + cur_len);
1142         destlen -= cur_len;
1143         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1144         set_address_zero(env, r1, dest);
1145 
1146         /*
1147          * MVCL is interruptible. Return to the main loop if requested after
1148          * writing back all state to registers. If no interrupt will get
1149          * injected, we'll end up back in this handler and continue processing
1150          * the remaining parts.
1151          */
1152         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1153             cpu_loop_exit_restore(cs, ra);
1154         }
1155     }
1156     return cc;
1157 }
1158 
1159 /* move long extended */
1160 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1161                        uint32_t r3)
1162 {
1163     uintptr_t ra = GETPC();
1164     uint64_t destlen = get_length(env, r1 + 1);
1165     uint64_t dest = get_address(env, r1);
1166     uint64_t srclen = get_length(env, r3 + 1);
1167     uint64_t src = get_address(env, r3);
1168     uint8_t pad = a2;
1169     uint32_t cc;
1170 
1171     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1172 
1173     set_length(env, r1 + 1, destlen);
1174     set_length(env, r3 + 1, srclen);
1175     set_address(env, r1, dest);
1176     set_address(env, r3, src);
1177 
1178     return cc;
1179 }
1180 
1181 /* move long unicode */
1182 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1183                        uint32_t r3)
1184 {
1185     uintptr_t ra = GETPC();
1186     uint64_t destlen = get_length(env, r1 + 1);
1187     uint64_t dest = get_address(env, r1);
1188     uint64_t srclen = get_length(env, r3 + 1);
1189     uint64_t src = get_address(env, r3);
1190     uint16_t pad = a2;
1191     uint32_t cc;
1192 
1193     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1194 
1195     set_length(env, r1 + 1, destlen);
1196     set_length(env, r3 + 1, srclen);
1197     set_address(env, r1, dest);
1198     set_address(env, r3, src);
1199 
1200     return cc;
1201 }
1202 
1203 /* compare logical long helper */
1204 static inline uint32_t do_clcl(CPUS390XState *env,
1205                                uint64_t *src1, uint64_t *src1len,
1206                                uint64_t *src3, uint64_t *src3len,
1207                                uint16_t pad, uint64_t limit,
1208                                int wordsize, uintptr_t ra)
1209 {
1210     uint64_t len = MAX(*src1len, *src3len);
1211     uint32_t cc = 0;
1212 
1213     check_alignment(env, *src1len | *src3len, wordsize, ra);
1214 
1215     if (!len) {
1216         return cc;
1217     }
1218 
1219     /* Lest we fail to service interrupts in a timely manner, limit the
1220        amount of work we're willing to do.  */
1221     if (len > limit) {
1222         len = limit;
1223         cc = 3;
1224     }
1225 
1226     for (; len; len -= wordsize) {
1227         uint16_t v1 = pad;
1228         uint16_t v3 = pad;
1229 
1230         if (*src1len) {
1231             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1232         }
1233         if (*src3len) {
1234             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1235         }
1236 
1237         if (v1 != v3) {
1238             cc = (v1 < v3) ? 1 : 2;
1239             break;
1240         }
1241 
1242         if (*src1len) {
1243             *src1 += wordsize;
1244             *src1len -= wordsize;
1245         }
1246         if (*src3len) {
1247             *src3 += wordsize;
1248             *src3len -= wordsize;
1249         }
1250     }
1251 
1252     return cc;
1253 }
1254 
1255 
1256 /* compare logical long */
1257 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1258 {
1259     uintptr_t ra = GETPC();
1260     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1261     uint64_t src1 = get_address(env, r1);
1262     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1263     uint64_t src3 = get_address(env, r2);
1264     uint8_t pad = env->regs[r2 + 1] >> 24;
1265     uint32_t cc;
1266 
1267     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1268 
1269     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1270     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1271     set_address(env, r1, src1);
1272     set_address(env, r2, src3);
1273 
1274     return cc;
1275 }
1276 
1277 /* compare logical long extended memcompare insn with padding */
1278 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1279                        uint32_t r3)
1280 {
1281     uintptr_t ra = GETPC();
1282     uint64_t src1len = get_length(env, r1 + 1);
1283     uint64_t src1 = get_address(env, r1);
1284     uint64_t src3len = get_length(env, r3 + 1);
1285     uint64_t src3 = get_address(env, r3);
1286     uint8_t pad = a2;
1287     uint32_t cc;
1288 
1289     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1290 
1291     set_length(env, r1 + 1, src1len);
1292     set_length(env, r3 + 1, src3len);
1293     set_address(env, r1, src1);
1294     set_address(env, r3, src3);
1295 
1296     return cc;
1297 }
1298 
1299 /* compare logical long unicode memcompare insn with padding */
1300 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1301                        uint32_t r3)
1302 {
1303     uintptr_t ra = GETPC();
1304     uint64_t src1len = get_length(env, r1 + 1);
1305     uint64_t src1 = get_address(env, r1);
1306     uint64_t src3len = get_length(env, r3 + 1);
1307     uint64_t src3 = get_address(env, r3);
1308     uint16_t pad = a2;
1309     uint32_t cc = 0;
1310 
1311     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1312 
1313     set_length(env, r1 + 1, src1len);
1314     set_length(env, r3 + 1, src3len);
1315     set_address(env, r1, src1);
1316     set_address(env, r3, src3);
1317 
1318     return cc;
1319 }
1320 
1321 /* checksum */
1322 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1323                     uint64_t src, uint64_t src_len)
1324 {
1325     uintptr_t ra = GETPC();
1326     uint64_t max_len, len;
1327     uint64_t cksm = (uint32_t)r1;
1328 
1329     /* Lest we fail to service interrupts in a timely manner, limit the
1330        amount of work we're willing to do.  For now, let's cap at 8k.  */
1331     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1332 
1333     /* Process full words as available.  */
1334     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1335         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1336     }
1337 
1338     switch (max_len - len) {
1339     case 1:
1340         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1341         len += 1;
1342         break;
1343     case 2:
1344         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1345         len += 2;
1346         break;
1347     case 3:
1348         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1349         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1350         len += 3;
1351         break;
1352     }
1353 
1354     /* Fold the carry from the checksum.  Note that we can see carry-out
1355        during folding more than once (but probably not more than twice).  */
1356     while (cksm > 0xffffffffull) {
1357         cksm = (uint32_t)cksm + (cksm >> 32);
1358     }
1359 
1360     /* Indicate whether or not we've processed everything.  */
1361     env->cc_op = (len == src_len ? 0 : 3);
1362 
1363     /* Return both cksm and processed length.  */
1364     return int128_make128(cksm, len);
1365 }
1366 
1367 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1368 {
1369     uintptr_t ra = GETPC();
1370     int len_dest = len >> 4;
1371     int len_src = len & 0xf;
1372     uint8_t b;
1373 
1374     dest += len_dest;
1375     src += len_src;
1376 
1377     /* last byte is special, it only flips the nibbles */
1378     b = cpu_ldub_data_ra(env, src, ra);
1379     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1380     src--;
1381     len_src--;
1382 
1383     /* now pack every value */
1384     while (len_dest > 0) {
1385         b = 0;
1386 
1387         if (len_src >= 0) {
1388             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1389             src--;
1390             len_src--;
1391         }
1392         if (len_src >= 0) {
1393             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1394             src--;
1395             len_src--;
1396         }
1397 
1398         len_dest--;
1399         dest--;
1400         cpu_stb_data_ra(env, dest, b, ra);
1401     }
1402 }
1403 
1404 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1405                            uint32_t srclen, int ssize, uintptr_t ra)
1406 {
1407     int i;
1408     /* The destination operand is always 16 bytes long.  */
1409     const int destlen = 16;
1410 
1411     /* The operands are processed from right to left.  */
1412     src += srclen - 1;
1413     dest += destlen - 1;
1414 
1415     for (i = 0; i < destlen; i++) {
1416         uint8_t b = 0;
1417 
1418         /* Start with a positive sign */
1419         if (i == 0) {
1420             b = 0xc;
1421         } else if (srclen > ssize) {
1422             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1423             src -= ssize;
1424             srclen -= ssize;
1425         }
1426 
1427         if (srclen > ssize) {
1428             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1429             src -= ssize;
1430             srclen -= ssize;
1431         }
1432 
1433         cpu_stb_data_ra(env, dest, b, ra);
1434         dest--;
1435     }
1436 }
1437 
1438 
1439 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1440                  uint32_t srclen)
1441 {
1442     do_pkau(env, dest, src, srclen, 1, GETPC());
1443 }
1444 
1445 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1446                  uint32_t srclen)
1447 {
1448     do_pkau(env, dest, src, srclen, 2, GETPC());
1449 }
1450 
1451 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1452                   uint64_t src)
1453 {
1454     uintptr_t ra = GETPC();
1455     int len_dest = len >> 4;
1456     int len_src = len & 0xf;
1457     uint8_t b;
1458     int second_nibble = 0;
1459 
1460     dest += len_dest;
1461     src += len_src;
1462 
1463     /* last byte is special, it only flips the nibbles */
1464     b = cpu_ldub_data_ra(env, src, ra);
1465     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1466     src--;
1467     len_src--;
1468 
1469     /* now pad every nibble with 0xf0 */
1470 
1471     while (len_dest > 0) {
1472         uint8_t cur_byte = 0;
1473 
1474         if (len_src > 0) {
1475             cur_byte = cpu_ldub_data_ra(env, src, ra);
1476         }
1477 
1478         len_dest--;
1479         dest--;
1480 
1481         /* only advance one nibble at a time */
1482         if (second_nibble) {
1483             cur_byte >>= 4;
1484             len_src--;
1485             src--;
1486         }
1487         second_nibble = !second_nibble;
1488 
1489         /* digit */
1490         cur_byte = (cur_byte & 0xf);
1491         /* zone bits */
1492         cur_byte |= 0xf0;
1493 
1494         cpu_stb_data_ra(env, dest, cur_byte, ra);
1495     }
1496 }
1497 
1498 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1499                                  uint32_t destlen, int dsize, uint64_t src,
1500                                  uintptr_t ra)
1501 {
1502     int i;
1503     uint32_t cc;
1504     uint8_t b;
1505     /* The source operand is always 16 bytes long.  */
1506     const int srclen = 16;
1507 
1508     /* The operands are processed from right to left.  */
1509     src += srclen - 1;
1510     dest += destlen - dsize;
1511 
1512     /* Check for the sign.  */
1513     b = cpu_ldub_data_ra(env, src, ra);
1514     src--;
1515     switch (b & 0xf) {
1516     case 0xa:
1517     case 0xc:
1518     case 0xe ... 0xf:
1519         cc = 0;  /* plus */
1520         break;
1521     case 0xb:
1522     case 0xd:
1523         cc = 1;  /* minus */
1524         break;
1525     default:
1526     case 0x0 ... 0x9:
1527         cc = 3;  /* invalid */
1528         break;
1529     }
1530 
1531     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1532     for (i = 0; i < destlen; i += dsize) {
1533         if (i == (31 * dsize)) {
1534             /* If length is 32/64 bytes, the leftmost byte is 0. */
1535             b = 0;
1536         } else if (i % (2 * dsize)) {
1537             b = cpu_ldub_data_ra(env, src, ra);
1538             src--;
1539         } else {
1540             b >>= 4;
1541         }
1542         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1543         dest -= dsize;
1544     }
1545 
1546     return cc;
1547 }
1548 
1549 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1550                        uint64_t src)
1551 {
1552     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1553 }
1554 
1555 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1556                        uint64_t src)
1557 {
1558     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1559 }
1560 
1561 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1562 {
1563     uintptr_t ra = GETPC();
1564     uint32_t cc = 0;
1565     int i;
1566 
1567     for (i = 0; i < destlen; i++) {
1568         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1569         /* digit */
1570         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1571 
1572         if (i == (destlen - 1)) {
1573             /* sign */
1574             cc |= (b & 0xf) < 0xa ? 1 : 0;
1575         } else {
1576             /* digit */
1577             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1578         }
1579     }
1580 
1581     return cc;
1582 }
1583 
1584 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1585                              uint64_t trans, uintptr_t ra)
1586 {
1587     uint32_t i;
1588 
1589     for (i = 0; i <= len; i++) {
1590         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1591         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1592         cpu_stb_data_ra(env, array + i, new_byte, ra);
1593     }
1594 
1595     return env->cc_op;
1596 }
1597 
1598 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1599                 uint64_t trans)
1600 {
1601     do_helper_tr(env, len, array, trans, GETPC());
1602 }
1603 
1604 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1605                    uint64_t len, uint64_t trans)
1606 {
1607     uintptr_t ra = GETPC();
1608     uint8_t end = env->regs[0] & 0xff;
1609     uint64_t l = len;
1610     uint64_t i;
1611     uint32_t cc = 0;
1612 
1613     if (!(env->psw.mask & PSW_MASK_64)) {
1614         array &= 0x7fffffff;
1615         l = (uint32_t)l;
1616     }
1617 
1618     /* Lest we fail to service interrupts in a timely manner, limit the
1619        amount of work we're willing to do.  For now, let's cap at 8k.  */
1620     if (l > 0x2000) {
1621         l = 0x2000;
1622         cc = 3;
1623     }
1624 
1625     for (i = 0; i < l; i++) {
1626         uint8_t byte, new_byte;
1627 
1628         byte = cpu_ldub_data_ra(env, array + i, ra);
1629 
1630         if (byte == end) {
1631             cc = 1;
1632             break;
1633         }
1634 
1635         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1636         cpu_stb_data_ra(env, array + i, new_byte, ra);
1637     }
1638 
1639     env->cc_op = cc;
1640     return int128_make128(len - i, array + i);
1641 }
1642 
1643 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1644                                      uint64_t array, uint64_t trans,
1645                                      int inc, uintptr_t ra)
1646 {
1647     int i;
1648 
1649     for (i = 0; i <= len; i++) {
1650         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1651         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1652 
1653         if (sbyte != 0) {
1654             set_address(env, 1, array + i * inc);
1655             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1656             return (i == len) ? 2 : 1;
1657         }
1658     }
1659 
1660     return 0;
1661 }
1662 
1663 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1664                                   uint64_t array, uint64_t trans,
1665                                   uintptr_t ra)
1666 {
1667     return do_helper_trt(env, len, array, trans, 1, ra);
1668 }
1669 
1670 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1671                      uint64_t trans)
1672 {
1673     return do_helper_trt(env, len, array, trans, 1, GETPC());
1674 }
1675 
1676 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1677                                    uint64_t array, uint64_t trans,
1678                                    uintptr_t ra)
1679 {
1680     return do_helper_trt(env, len, array, trans, -1, ra);
1681 }
1682 
1683 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1684                       uint64_t trans)
1685 {
1686     return do_helper_trt(env, len, array, trans, -1, GETPC());
1687 }
1688 
1689 /* Translate one/two to one/two */
1690 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1691                       uint32_t tst, uint32_t sizes)
1692 {
1693     uintptr_t ra = GETPC();
1694     int dsize = (sizes & 1) ? 1 : 2;
1695     int ssize = (sizes & 2) ? 1 : 2;
1696     uint64_t tbl = get_address(env, 1);
1697     uint64_t dst = get_address(env, r1);
1698     uint64_t len = get_length(env, r1 + 1);
1699     uint64_t src = get_address(env, r2);
1700     uint32_t cc = 3;
1701     int i;
1702 
1703     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1704        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1705        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1706     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1707         tbl &= -4096;
1708     } else {
1709         tbl &= -8;
1710     }
1711 
1712     check_alignment(env, len, ssize, ra);
1713 
1714     /* Lest we fail to service interrupts in a timely manner, */
1715     /* limit the amount of work we're willing to do.   */
1716     for (i = 0; i < 0x2000; i++) {
1717         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1718         uint64_t tble = tbl + (sval * dsize);
1719         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1720         if (dval == tst) {
1721             cc = 1;
1722             break;
1723         }
1724         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1725 
1726         len -= ssize;
1727         src += ssize;
1728         dst += dsize;
1729 
1730         if (len == 0) {
1731             cc = 0;
1732             break;
1733         }
1734     }
1735 
1736     set_address(env, r1, dst);
1737     set_length(env, r1 + 1, len);
1738     set_address(env, r2, src);
1739 
1740     return cc;
1741 }
1742 
1743 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1744                         uint64_t a2, bool parallel)
1745 {
1746     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1747     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1748     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1749     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1750     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1751     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1752     uintptr_t ra = GETPC();
1753     uint32_t fc = extract32(env->regs[0], 0, 8);
1754     uint32_t sc = extract32(env->regs[0], 8, 8);
1755     uint64_t pl = get_address(env, 1) & -16;
1756     uint64_t svh, svl;
1757     uint32_t cc;
1758 
1759     /* Sanity check the function code and storage characteristic.  */
1760     if (fc > 1 || sc > 3) {
1761         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1762             goto spec_exception;
1763         }
1764         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1765             goto spec_exception;
1766         }
1767     }
1768 
1769     /* Sanity check the alignments.  */
1770     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1771         goto spec_exception;
1772     }
1773 
1774     /* Sanity check writability of the store address.  */
1775     probe_write(env, a2, 1 << sc, mem_idx, ra);
1776 
1777     /*
1778      * Note that the compare-and-swap is atomic, and the store is atomic,
1779      * but the complete operation is not.  Therefore we do not need to
1780      * assert serial context in order to implement this.  That said,
1781      * restart early if we can't support either operation that is supposed
1782      * to be atomic.
1783      */
1784     if (parallel) {
1785         uint32_t max = 2;
1786 #ifdef CONFIG_ATOMIC64
1787         max = 3;
1788 #endif
1789         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1790             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1791             cpu_loop_exit_atomic(env_cpu(env), ra);
1792         }
1793     }
1794 
1795     /*
1796      * All loads happen before all stores.  For simplicity, load the entire
1797      * store value area from the parameter list.
1798      */
1799     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1800     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1801 
1802     switch (fc) {
1803     case 0:
1804         {
1805             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1806             uint32_t cv = env->regs[r3];
1807             uint32_t ov;
1808 
1809             if (parallel) {
1810                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1811             } else {
1812                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1813                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1814             }
1815             cc = (ov != cv);
1816             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1817         }
1818         break;
1819 
1820     case 1:
1821         {
1822             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1823             uint64_t cv = env->regs[r3];
1824             uint64_t ov;
1825 
1826             if (parallel) {
1827 #ifdef CONFIG_ATOMIC64
1828                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1829 #else
1830                 /* Note that we asserted !parallel above.  */
1831                 g_assert_not_reached();
1832 #endif
1833             } else {
1834                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1835                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1836             }
1837             cc = (ov != cv);
1838             env->regs[r3] = ov;
1839         }
1840         break;
1841 
1842     case 2:
1843         {
1844             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1845             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1846             Int128 ov;
1847 
1848             if (!parallel) {
1849                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1850                 cc = !int128_eq(ov, cv);
1851                 if (cc) {
1852                     nv = ov;
1853                 }
1854                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1855             } else if (HAVE_CMPXCHG128) {
1856                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1857                 cc = !int128_eq(ov, cv);
1858             } else {
1859                 /* Note that we asserted !parallel above.  */
1860                 g_assert_not_reached();
1861             }
1862 
1863             env->regs[r3 + 0] = int128_gethi(ov);
1864             env->regs[r3 + 1] = int128_getlo(ov);
1865         }
1866         break;
1867 
1868     default:
1869         g_assert_not_reached();
1870     }
1871 
1872     /* Store only if the comparison succeeded.  Note that above we use a pair
1873        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1874        from the most-significant bits of svh.  */
1875     if (cc == 0) {
1876         switch (sc) {
1877         case 0:
1878             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1879             break;
1880         case 1:
1881             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1882             break;
1883         case 2:
1884             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1885             break;
1886         case 3:
1887             cpu_stq_mmu(env, a2, svh, oi8, ra);
1888             break;
1889         case 4:
1890             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1891             break;
1892         default:
1893             g_assert_not_reached();
1894         }
1895     }
1896 
1897     return cc;
1898 
1899  spec_exception:
1900     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1901 }
1902 
1903 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1904 {
1905     return do_csst(env, r3, a1, a2, false);
1906 }
1907 
1908 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1909                                uint64_t a2)
1910 {
1911     return do_csst(env, r3, a1, a2, true);
1912 }
1913 
1914 #if !defined(CONFIG_USER_ONLY)
1915 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1916 {
1917     uintptr_t ra = GETPC();
1918     bool PERchanged = false;
1919     uint64_t src = a2;
1920     uint32_t i;
1921 
1922     if (src & 0x7) {
1923         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1924     }
1925 
1926     for (i = r1;; i = (i + 1) % 16) {
1927         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1928         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1929             PERchanged = true;
1930         }
1931         env->cregs[i] = val;
1932         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1933                    i, src, val);
1934         src += sizeof(uint64_t);
1935 
1936         if (i == r3) {
1937             break;
1938         }
1939     }
1940 
1941     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1942         s390_cpu_recompute_watchpoints(env_cpu(env));
1943     }
1944 
1945     tlb_flush(env_cpu(env));
1946 }
1947 
1948 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1949 {
1950     uintptr_t ra = GETPC();
1951     bool PERchanged = false;
1952     uint64_t src = a2;
1953     uint32_t i;
1954 
1955     if (src & 0x3) {
1956         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1957     }
1958 
1959     for (i = r1;; i = (i + 1) % 16) {
1960         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1961         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1962             PERchanged = true;
1963         }
1964         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1965         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1966         src += sizeof(uint32_t);
1967 
1968         if (i == r3) {
1969             break;
1970         }
1971     }
1972 
1973     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1974         s390_cpu_recompute_watchpoints(env_cpu(env));
1975     }
1976 
1977     tlb_flush(env_cpu(env));
1978 }
1979 
1980 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1981 {
1982     uintptr_t ra = GETPC();
1983     uint64_t dest = a2;
1984     uint32_t i;
1985 
1986     if (dest & 0x7) {
1987         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1988     }
1989 
1990     for (i = r1;; i = (i + 1) % 16) {
1991         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1992         dest += sizeof(uint64_t);
1993 
1994         if (i == r3) {
1995             break;
1996         }
1997     }
1998 }
1999 
2000 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2001 {
2002     uintptr_t ra = GETPC();
2003     uint64_t dest = a2;
2004     uint32_t i;
2005 
2006     if (dest & 0x3) {
2007         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2008     }
2009 
2010     for (i = r1;; i = (i + 1) % 16) {
2011         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2012         dest += sizeof(uint32_t);
2013 
2014         if (i == r3) {
2015             break;
2016         }
2017     }
2018 }
2019 
2020 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2021 {
2022     uintptr_t ra = GETPC();
2023     int i;
2024 
2025     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2026 
2027     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2028         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2029     }
2030 
2031     return 0;
2032 }
2033 
2034 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2035 {
2036     S390CPU *cpu = env_archcpu(env);
2037     CPUState *cs = env_cpu(env);
2038 
2039     /*
2040      * TODO: we currently don't handle all access protection types
2041      * (including access-list and key-controlled) as well as AR mode.
2042      */
2043     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2044         /* Fetching permitted; storing permitted */
2045         return 0;
2046     }
2047 
2048     if (env->int_pgm_code == PGM_PROTECTION) {
2049         /* retry if reading is possible */
2050         cs->exception_index = -1;
2051         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2052             /* Fetching permitted; storing not permitted */
2053             return 1;
2054         }
2055     }
2056 
2057     switch (env->int_pgm_code) {
2058     case PGM_PROTECTION:
2059         /* Fetching not permitted; storing not permitted */
2060         cs->exception_index = -1;
2061         return 2;
2062     case PGM_ADDRESSING:
2063     case PGM_TRANS_SPEC:
2064         /* exceptions forwarded to the guest */
2065         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2066         return 0;
2067     }
2068 
2069     /* Translation not available */
2070     cs->exception_index = -1;
2071     return 3;
2072 }
2073 
2074 /* insert storage key extended */
2075 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2076 {
2077     static S390SKeysState *ss;
2078     static S390SKeysClass *skeyclass;
2079     uint64_t addr = wrap_address(env, r2);
2080     uint8_t key;
2081     int rc;
2082 
2083     addr = mmu_real2abs(env, addr);
2084     if (!mmu_absolute_addr_valid(addr, false)) {
2085         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2086     }
2087 
2088     if (unlikely(!ss)) {
2089         ss = s390_get_skeys_device();
2090         skeyclass = S390_SKEYS_GET_CLASS(ss);
2091         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2092             tlb_flush_all_cpus_synced(env_cpu(env));
2093         }
2094     }
2095 
2096     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2097     if (rc) {
2098         trace_get_skeys_nonzero(rc);
2099         return 0;
2100     }
2101     return key;
2102 }
2103 
2104 /* set storage key extended */
2105 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2106 {
2107     static S390SKeysState *ss;
2108     static S390SKeysClass *skeyclass;
2109     uint64_t addr = wrap_address(env, r2);
2110     uint8_t key;
2111     int rc;
2112 
2113     addr = mmu_real2abs(env, addr);
2114     if (!mmu_absolute_addr_valid(addr, false)) {
2115         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2116     }
2117 
2118     if (unlikely(!ss)) {
2119         ss = s390_get_skeys_device();
2120         skeyclass = S390_SKEYS_GET_CLASS(ss);
2121         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2122             tlb_flush_all_cpus_synced(env_cpu(env));
2123         }
2124     }
2125 
2126     key = r1 & 0xfe;
2127     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2128     if (rc) {
2129         trace_set_skeys_nonzero(rc);
2130     }
2131    /*
2132     * As we can only flush by virtual address and not all the entries
2133     * that point to a physical address we have to flush the whole TLB.
2134     */
2135     tlb_flush_all_cpus_synced(env_cpu(env));
2136 }
2137 
2138 /* reset reference bit extended */
2139 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2140 {
2141     uint64_t addr = wrap_address(env, r2);
2142     static S390SKeysState *ss;
2143     static S390SKeysClass *skeyclass;
2144     uint8_t re, key;
2145     int rc;
2146 
2147     addr = mmu_real2abs(env, addr);
2148     if (!mmu_absolute_addr_valid(addr, false)) {
2149         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2150     }
2151 
2152     if (unlikely(!ss)) {
2153         ss = s390_get_skeys_device();
2154         skeyclass = S390_SKEYS_GET_CLASS(ss);
2155         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2156             tlb_flush_all_cpus_synced(env_cpu(env));
2157         }
2158     }
2159 
2160     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2161     if (rc) {
2162         trace_get_skeys_nonzero(rc);
2163         return 0;
2164     }
2165 
2166     re = key & (SK_R | SK_C);
2167     key &= ~SK_R;
2168 
2169     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2170     if (rc) {
2171         trace_set_skeys_nonzero(rc);
2172         return 0;
2173     }
2174    /*
2175     * As we can only flush by virtual address and not all the entries
2176     * that point to a physical address we have to flush the whole TLB.
2177     */
2178     tlb_flush_all_cpus_synced(env_cpu(env));
2179 
2180     /*
2181      * cc
2182      *
2183      * 0  Reference bit zero; change bit zero
2184      * 1  Reference bit zero; change bit one
2185      * 2  Reference bit one; change bit zero
2186      * 3  Reference bit one; change bit one
2187      */
2188 
2189     return re >> 1;
2190 }
2191 
2192 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2193                       uint64_t key)
2194 {
2195     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2196     S390Access srca, desta;
2197     uintptr_t ra = GETPC();
2198     int cc = 0;
2199 
2200     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2201                __func__, l, a1, a2);
2202 
2203     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2204         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2205         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2206     }
2207 
2208     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2209         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2210     }
2211 
2212     l = wrap_length32(env, l);
2213     if (l > 256) {
2214         /* max 256 */
2215         l = 256;
2216         cc = 3;
2217     } else if (!l) {
2218         return cc;
2219     }
2220 
2221     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2222     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2223     access_memmove(env, &desta, &srca, ra);
2224     return cc;
2225 }
2226 
2227 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2228                       uint64_t key)
2229 {
2230     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2231     S390Access srca, desta;
2232     uintptr_t ra = GETPC();
2233     int cc = 0;
2234 
2235     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2236                __func__, l, a1, a2);
2237 
2238     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2239         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2240         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2241     }
2242 
2243     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2244         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2245     }
2246 
2247     l = wrap_length32(env, l);
2248     if (l > 256) {
2249         /* max 256 */
2250         l = 256;
2251         cc = 3;
2252     } else if (!l) {
2253         return cc;
2254     }
2255     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2256     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2257     access_memmove(env, &desta, &srca, ra);
2258     return cc;
2259 }
2260 
2261 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2262 {
2263     CPUState *cs = env_cpu(env);
2264     const uintptr_t ra = GETPC();
2265     uint64_t table, entry, raddr;
2266     uint16_t entries, i, index = 0;
2267 
2268     if (r2 & 0xff000) {
2269         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2270     }
2271 
2272     if (!(r2 & 0x800)) {
2273         /* invalidation-and-clearing operation */
2274         table = r1 & ASCE_ORIGIN;
2275         entries = (r2 & 0x7ff) + 1;
2276 
2277         switch (r1 & ASCE_TYPE_MASK) {
2278         case ASCE_TYPE_REGION1:
2279             index = (r2 >> 53) & 0x7ff;
2280             break;
2281         case ASCE_TYPE_REGION2:
2282             index = (r2 >> 42) & 0x7ff;
2283             break;
2284         case ASCE_TYPE_REGION3:
2285             index = (r2 >> 31) & 0x7ff;
2286             break;
2287         case ASCE_TYPE_SEGMENT:
2288             index = (r2 >> 20) & 0x7ff;
2289             break;
2290         }
2291         for (i = 0; i < entries; i++) {
2292             /* addresses are not wrapped in 24/31bit mode but table index is */
2293             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2294             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2295             if (!(entry & REGION_ENTRY_I)) {
2296                 /* we are allowed to not store if already invalid */
2297                 entry |= REGION_ENTRY_I;
2298                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2299             }
2300         }
2301     }
2302 
2303     /* We simply flush the complete tlb, therefore we can ignore r3. */
2304     if (m4 & 1) {
2305         tlb_flush(cs);
2306     } else {
2307         tlb_flush_all_cpus_synced(cs);
2308     }
2309 }
2310 
2311 /* invalidate pte */
2312 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2313                   uint32_t m4)
2314 {
2315     CPUState *cs = env_cpu(env);
2316     const uintptr_t ra = GETPC();
2317     uint64_t page = vaddr & TARGET_PAGE_MASK;
2318     uint64_t pte_addr, pte;
2319 
2320     /* Compute the page table entry address */
2321     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2322     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2323 
2324     /* Mark the page table entry as invalid */
2325     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2326     pte |= PAGE_ENTRY_I;
2327     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2328 
2329     /* XXX we exploit the fact that Linux passes the exact virtual
2330        address here - it's not obliged to! */
2331     if (m4 & 1) {
2332         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2333             tlb_flush_page(cs, page);
2334             /* XXX 31-bit hack */
2335             tlb_flush_page(cs, page ^ 0x80000000);
2336         } else {
2337             /* looks like we don't have a valid virtual address */
2338             tlb_flush(cs);
2339         }
2340     } else {
2341         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2342             tlb_flush_page_all_cpus_synced(cs, page);
2343             /* XXX 31-bit hack */
2344             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2345         } else {
2346             /* looks like we don't have a valid virtual address */
2347             tlb_flush_all_cpus_synced(cs);
2348         }
2349     }
2350 }
2351 
2352 /* flush local tlb */
2353 void HELPER(ptlb)(CPUS390XState *env)
2354 {
2355     tlb_flush(env_cpu(env));
2356 }
2357 
2358 /* flush global tlb */
2359 void HELPER(purge)(CPUS390XState *env)
2360 {
2361     tlb_flush_all_cpus_synced(env_cpu(env));
2362 }
2363 
2364 /* load real address */
2365 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2366 {
2367     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2368     uint64_t ret, tec;
2369     int flags, exc, cc;
2370 
2371     /* XXX incomplete - has more corner cases */
2372     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2373         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2374     }
2375 
2376     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2377     if (exc) {
2378         cc = 3;
2379         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2380     } else {
2381         cc = 0;
2382         ret |= addr & ~TARGET_PAGE_MASK;
2383     }
2384 
2385     env->cc_op = cc;
2386     return ret;
2387 }
2388 #endif
2389 
2390 /* Execute instruction.  This instruction executes an insn modified with
2391    the contents of r1.  It does not change the executed instruction in memory;
2392    it does not change the program counter.
2393 
2394    Perform this by recording the modified instruction in env->ex_value.
2395    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2396 */
2397 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2398 {
2399     uint64_t insn;
2400     uint8_t opc;
2401 
2402     /* EXECUTE targets must be at even addresses.  */
2403     if (addr & 1) {
2404         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2405     }
2406 
2407     insn = cpu_lduw_code(env, addr);
2408     opc = insn >> 8;
2409 
2410     /* Or in the contents of R1[56:63].  */
2411     insn |= r1 & 0xff;
2412 
2413     /* Load the rest of the instruction.  */
2414     insn <<= 48;
2415     switch (get_ilen(opc)) {
2416     case 2:
2417         break;
2418     case 4:
2419         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2420         break;
2421     case 6:
2422         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2423         break;
2424     default:
2425         g_assert_not_reached();
2426     }
2427 
2428     /* The very most common cases can be sped up by avoiding a new TB.  */
2429     if ((opc & 0xf0) == 0xd0) {
2430         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2431                                       uint64_t, uintptr_t);
2432         static const dx_helper dx[16] = {
2433             [0x0] = do_helper_trt_bkwd,
2434             [0x2] = do_helper_mvc,
2435             [0x4] = do_helper_nc,
2436             [0x5] = do_helper_clc,
2437             [0x6] = do_helper_oc,
2438             [0x7] = do_helper_xc,
2439             [0xc] = do_helper_tr,
2440             [0xd] = do_helper_trt_fwd,
2441         };
2442         dx_helper helper = dx[opc & 0xf];
2443 
2444         if (helper) {
2445             uint32_t l = extract64(insn, 48, 8);
2446             uint32_t b1 = extract64(insn, 44, 4);
2447             uint32_t d1 = extract64(insn, 32, 12);
2448             uint32_t b2 = extract64(insn, 28, 4);
2449             uint32_t d2 = extract64(insn, 16, 12);
2450             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2451             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2452 
2453             env->cc_op = helper(env, l, a1, a2, 0);
2454             env->psw.addr += ilen;
2455             return;
2456         }
2457     } else if (opc == 0x0a) {
2458         env->int_svc_code = extract64(insn, 48, 8);
2459         env->int_svc_ilen = ilen;
2460         helper_exception(env, EXCP_SVC);
2461         g_assert_not_reached();
2462     }
2463 
2464     /* Record the insn we want to execute as well as the ilen to use
2465        during the execution of the target insn.  This will also ensure
2466        that ex_value is non-zero, which flags that we are in a state
2467        that requires such execution.  */
2468     env->ex_value = insn | ilen;
2469     env->ex_target = addr;
2470 }
2471 
2472 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2473                        uint64_t len)
2474 {
2475     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2476     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2477     const uint64_t r0 = env->regs[0];
2478     const uintptr_t ra = GETPC();
2479     uint8_t dest_key, dest_as, dest_k, dest_a;
2480     uint8_t src_key, src_as, src_k, src_a;
2481     uint64_t val;
2482     int cc = 0;
2483 
2484     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2485                __func__, dest, src, len);
2486 
2487     if (!(env->psw.mask & PSW_MASK_DAT)) {
2488         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2489     }
2490 
2491     /* OAC (operand access control) for the first operand -> dest */
2492     val = (r0 & 0xffff0000ULL) >> 16;
2493     dest_key = (val >> 12) & 0xf;
2494     dest_as = (val >> 6) & 0x3;
2495     dest_k = (val >> 1) & 0x1;
2496     dest_a = val & 0x1;
2497 
2498     /* OAC (operand access control) for the second operand -> src */
2499     val = (r0 & 0x0000ffffULL);
2500     src_key = (val >> 12) & 0xf;
2501     src_as = (val >> 6) & 0x3;
2502     src_k = (val >> 1) & 0x1;
2503     src_a = val & 0x1;
2504 
2505     if (!dest_k) {
2506         dest_key = psw_key;
2507     }
2508     if (!src_k) {
2509         src_key = psw_key;
2510     }
2511     if (!dest_a) {
2512         dest_as = psw_as;
2513     }
2514     if (!src_a) {
2515         src_as = psw_as;
2516     }
2517 
2518     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2519         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2520     }
2521     if (!(env->cregs[0] & CR0_SECONDARY) &&
2522         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2523         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2524     }
2525     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2526         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2527     }
2528 
2529     len = wrap_length32(env, len);
2530     if (len > 4096) {
2531         cc = 3;
2532         len = 4096;
2533     }
2534 
2535     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2536     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2537         (env->psw.mask & PSW_MASK_PSTATE)) {
2538         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2539                       __func__);
2540         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2541     }
2542 
2543     /* FIXME: Access using correct keys and AR-mode */
2544     if (len) {
2545         S390Access srca, desta;
2546 
2547         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2548                        mmu_idx_from_as(src_as), ra);
2549         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2550                        mmu_idx_from_as(dest_as), ra);
2551 
2552         access_memmove(env, &desta, &srca, ra);
2553     }
2554 
2555     return cc;
2556 }
2557 
2558 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2559    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2560    value >= 0 indicates failure, and the CC value to be returned.  */
2561 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2562                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2563                                  uint32_t *ochar, uint32_t *olen);
2564 
2565 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2566    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2567    indicates failure, and the CC value to be returned.  */
2568 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2569                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2570                                  uint32_t *olen);
2571 
2572 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2573                        bool enh_check, uintptr_t ra,
2574                        uint32_t *ochar, uint32_t *olen)
2575 {
2576     uint8_t s0, s1, s2, s3;
2577     uint32_t c, l;
2578 
2579     if (ilen < 1) {
2580         return 0;
2581     }
2582     s0 = cpu_ldub_data_ra(env, addr, ra);
2583     if (s0 <= 0x7f) {
2584         /* one byte character */
2585         l = 1;
2586         c = s0;
2587     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2588         /* invalid character */
2589         return 2;
2590     } else if (s0 <= 0xdf) {
2591         /* two byte character */
2592         l = 2;
2593         if (ilen < 2) {
2594             return 0;
2595         }
2596         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2597         c = s0 & 0x1f;
2598         c = (c << 6) | (s1 & 0x3f);
2599         if (enh_check && (s1 & 0xc0) != 0x80) {
2600             return 2;
2601         }
2602     } else if (s0 <= 0xef) {
2603         /* three byte character */
2604         l = 3;
2605         if (ilen < 3) {
2606             return 0;
2607         }
2608         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2609         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2610         c = s0 & 0x0f;
2611         c = (c << 6) | (s1 & 0x3f);
2612         c = (c << 6) | (s2 & 0x3f);
2613         /* Fold the byte-by-byte range descriptions in the PoO into
2614            tests against the complete value.  It disallows encodings
2615            that could be smaller, and the UTF-16 surrogates.  */
2616         if (enh_check
2617             && ((s1 & 0xc0) != 0x80
2618                 || (s2 & 0xc0) != 0x80
2619                 || c < 0x1000
2620                 || (c >= 0xd800 && c <= 0xdfff))) {
2621             return 2;
2622         }
2623     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2624         /* four byte character */
2625         l = 4;
2626         if (ilen < 4) {
2627             return 0;
2628         }
2629         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2630         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2631         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2632         c = s0 & 0x07;
2633         c = (c << 6) | (s1 & 0x3f);
2634         c = (c << 6) | (s2 & 0x3f);
2635         c = (c << 6) | (s3 & 0x3f);
2636         /* See above.  */
2637         if (enh_check
2638             && ((s1 & 0xc0) != 0x80
2639                 || (s2 & 0xc0) != 0x80
2640                 || (s3 & 0xc0) != 0x80
2641                 || c < 0x010000
2642                 || c > 0x10ffff)) {
2643             return 2;
2644         }
2645     } else {
2646         /* invalid character */
2647         return 2;
2648     }
2649 
2650     *ochar = c;
2651     *olen = l;
2652     return -1;
2653 }
2654 
2655 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2656                         bool enh_check, uintptr_t ra,
2657                         uint32_t *ochar, uint32_t *olen)
2658 {
2659     uint16_t s0, s1;
2660     uint32_t c, l;
2661 
2662     if (ilen < 2) {
2663         return 0;
2664     }
2665     s0 = cpu_lduw_data_ra(env, addr, ra);
2666     if ((s0 & 0xfc00) != 0xd800) {
2667         /* one word character */
2668         l = 2;
2669         c = s0;
2670     } else {
2671         /* two word character */
2672         l = 4;
2673         if (ilen < 4) {
2674             return 0;
2675         }
2676         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2677         c = extract32(s0, 6, 4) + 1;
2678         c = (c << 6) | (s0 & 0x3f);
2679         c = (c << 10) | (s1 & 0x3ff);
2680         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2681             /* invalid surrogate character */
2682             return 2;
2683         }
2684     }
2685 
2686     *ochar = c;
2687     *olen = l;
2688     return -1;
2689 }
2690 
2691 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2692                         bool enh_check, uintptr_t ra,
2693                         uint32_t *ochar, uint32_t *olen)
2694 {
2695     uint32_t c;
2696 
2697     if (ilen < 4) {
2698         return 0;
2699     }
2700     c = cpu_ldl_data_ra(env, addr, ra);
2701     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2702         /* invalid unicode character */
2703         return 2;
2704     }
2705 
2706     *ochar = c;
2707     *olen = 4;
2708     return -1;
2709 }
2710 
2711 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2712                        uintptr_t ra, uint32_t c, uint32_t *olen)
2713 {
2714     uint8_t d[4];
2715     uint32_t l, i;
2716 
2717     if (c <= 0x7f) {
2718         /* one byte character */
2719         l = 1;
2720         d[0] = c;
2721     } else if (c <= 0x7ff) {
2722         /* two byte character */
2723         l = 2;
2724         d[1] = 0x80 | extract32(c, 0, 6);
2725         d[0] = 0xc0 | extract32(c, 6, 5);
2726     } else if (c <= 0xffff) {
2727         /* three byte character */
2728         l = 3;
2729         d[2] = 0x80 | extract32(c, 0, 6);
2730         d[1] = 0x80 | extract32(c, 6, 6);
2731         d[0] = 0xe0 | extract32(c, 12, 4);
2732     } else {
2733         /* four byte character */
2734         l = 4;
2735         d[3] = 0x80 | extract32(c, 0, 6);
2736         d[2] = 0x80 | extract32(c, 6, 6);
2737         d[1] = 0x80 | extract32(c, 12, 6);
2738         d[0] = 0xf0 | extract32(c, 18, 3);
2739     }
2740 
2741     if (ilen < l) {
2742         return 1;
2743     }
2744     for (i = 0; i < l; ++i) {
2745         cpu_stb_data_ra(env, addr + i, d[i], ra);
2746     }
2747 
2748     *olen = l;
2749     return -1;
2750 }
2751 
2752 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2753                         uintptr_t ra, uint32_t c, uint32_t *olen)
2754 {
2755     uint16_t d0, d1;
2756 
2757     if (c <= 0xffff) {
2758         /* one word character */
2759         if (ilen < 2) {
2760             return 1;
2761         }
2762         cpu_stw_data_ra(env, addr, c, ra);
2763         *olen = 2;
2764     } else {
2765         /* two word character */
2766         if (ilen < 4) {
2767             return 1;
2768         }
2769         d1 = 0xdc00 | extract32(c, 0, 10);
2770         d0 = 0xd800 | extract32(c, 10, 6);
2771         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2772         cpu_stw_data_ra(env, addr + 0, d0, ra);
2773         cpu_stw_data_ra(env, addr + 2, d1, ra);
2774         *olen = 4;
2775     }
2776 
2777     return -1;
2778 }
2779 
2780 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2781                         uintptr_t ra, uint32_t c, uint32_t *olen)
2782 {
2783     if (ilen < 4) {
2784         return 1;
2785     }
2786     cpu_stl_data_ra(env, addr, c, ra);
2787     *olen = 4;
2788     return -1;
2789 }
2790 
2791 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2792                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2793                                        decode_unicode_fn decode,
2794                                        encode_unicode_fn encode)
2795 {
2796     uint64_t dst = get_address(env, r1);
2797     uint64_t dlen = get_length(env, r1 + 1);
2798     uint64_t src = get_address(env, r2);
2799     uint64_t slen = get_length(env, r2 + 1);
2800     bool enh_check = m3 & 1;
2801     int cc, i;
2802 
2803     /* Lest we fail to service interrupts in a timely manner, limit the
2804        amount of work we're willing to do.  For now, let's cap at 256.  */
2805     for (i = 0; i < 256; ++i) {
2806         uint32_t c, ilen, olen;
2807 
2808         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2809         if (unlikely(cc >= 0)) {
2810             break;
2811         }
2812         cc = encode(env, dst, dlen, ra, c, &olen);
2813         if (unlikely(cc >= 0)) {
2814             break;
2815         }
2816 
2817         src += ilen;
2818         slen -= ilen;
2819         dst += olen;
2820         dlen -= olen;
2821         cc = 3;
2822     }
2823 
2824     set_address(env, r1, dst);
2825     set_length(env, r1 + 1, dlen);
2826     set_address(env, r2, src);
2827     set_length(env, r2 + 1, slen);
2828 
2829     return cc;
2830 }
2831 
2832 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2833 {
2834     return convert_unicode(env, r1, r2, m3, GETPC(),
2835                            decode_utf8, encode_utf16);
2836 }
2837 
2838 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2839 {
2840     return convert_unicode(env, r1, r2, m3, GETPC(),
2841                            decode_utf8, encode_utf32);
2842 }
2843 
2844 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2845 {
2846     return convert_unicode(env, r1, r2, m3, GETPC(),
2847                            decode_utf16, encode_utf8);
2848 }
2849 
2850 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2851 {
2852     return convert_unicode(env, r1, r2, m3, GETPC(),
2853                            decode_utf16, encode_utf32);
2854 }
2855 
2856 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2857 {
2858     return convert_unicode(env, r1, r2, m3, GETPC(),
2859                            decode_utf32, encode_utf8);
2860 }
2861 
2862 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2863 {
2864     return convert_unicode(env, r1, r2, m3, GETPC(),
2865                            decode_utf32, encode_utf16);
2866 }
2867 
2868 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2869                         uintptr_t ra)
2870 {
2871     const int mmu_idx = s390x_env_mmu_index(env, false);
2872 
2873     /* test the actual access, not just any access to the page due to LAP */
2874     while (len) {
2875         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2876         const uint64_t curlen = MIN(pagelen, len);
2877 
2878         probe_write(env, addr, curlen, mmu_idx, ra);
2879         addr = wrap_address(env, addr + curlen);
2880         len -= curlen;
2881     }
2882 }
2883 
2884 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2885 {
2886     probe_write_access(env, addr, len, GETPC());
2887 }
2888