xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 814e46594da891955a6e111e2c253137fcd43f07)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/page-protection.h"
29 #include "exec/cpu_ldst.h"
30 #include "hw/core/tcg-cpu-ops.h"
31 #include "qemu/int128.h"
32 #include "qemu/atomic128.h"
33 
34 #if !defined(CONFIG_USER_ONLY)
35 #include "hw/s390x/storage-keys.h"
36 #include "hw/boards.h"
37 #endif
38 
39 #ifdef CONFIG_USER_ONLY
40 # define user_or_likely(X)    true
41 #else
42 # define user_or_likely(X)    likely(X)
43 #endif
44 
45 /*****************************************************************************/
46 /* Softmmu support */
47 
48 /* #define DEBUG_HELPER */
49 #ifdef DEBUG_HELPER
50 #define HELPER_LOG(x...) qemu_log(x)
51 #else
52 #define HELPER_LOG(x...)
53 #endif
54 
55 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
56 {
57     uint16_t pkm = env->cregs[3] >> 16;
58 
59     if (env->psw.mask & PSW_MASK_PSTATE) {
60         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
61         return pkm & (0x8000 >> psw_key);
62     }
63     return true;
64 }
65 
66 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
67                                    uint64_t src, uint32_t len)
68 {
69     if (!len || src == dest) {
70         return false;
71     }
72     /* Take care of wrapping at the end of address space. */
73     if (unlikely(wrap_address(env, src + len - 1) < src)) {
74         return dest > src || dest <= wrap_address(env, src + len - 1);
75     }
76     return dest > src && dest <= src + len - 1;
77 }
78 
79 /* Trigger a SPECIFICATION exception if an address or a length is not
80    naturally aligned.  */
81 static inline void check_alignment(CPUS390XState *env, uint64_t v,
82                                    int wordsize, uintptr_t ra)
83 {
84     if (v % wordsize) {
85         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
86     }
87 }
88 
89 /* Load a value from memory according to its size.  */
90 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
91                                            int wordsize, uintptr_t ra)
92 {
93     switch (wordsize) {
94     case 1:
95         return cpu_ldub_data_ra(env, addr, ra);
96     case 2:
97         return cpu_lduw_data_ra(env, addr, ra);
98     default:
99         abort();
100     }
101 }
102 
103 /* Store a to memory according to its size.  */
104 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
105                                       uint64_t value, int wordsize,
106                                       uintptr_t ra)
107 {
108     switch (wordsize) {
109     case 1:
110         cpu_stb_data_ra(env, addr, value, ra);
111         break;
112     case 2:
113         cpu_stw_data_ra(env, addr, value, ra);
114         break;
115     default:
116         abort();
117     }
118 }
119 
120 /* An access covers at most 4096 bytes and therefore at most two pages. */
121 typedef struct S390Access {
122     target_ulong vaddr1;
123     target_ulong vaddr2;
124     void *haddr1;
125     void *haddr2;
126     uint16_t size1;
127     uint16_t size2;
128     /*
129      * If we can't access the host page directly, we'll have to do I/O access
130      * via ld/st helpers. These are internal details, so we store the
131      * mmu idx to do the access here instead of passing it around in the
132      * helpers.
133      */
134     int mmu_idx;
135 } S390Access;
136 
137 /*
138  * With nonfault=1, return the PGM_ exception that would have been injected
139  * into the guest; return 0 if no exception was detected.
140  *
141  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
142  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
143  */
144 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
145                                     int size, MMUAccessType access_type,
146                                     int mmu_idx, bool nonfault,
147                                     void **phost, uintptr_t ra)
148 {
149     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
150                                    nonfault, phost, ra);
151 
152     if (unlikely(flags & TLB_INVALID_MASK)) {
153 #ifdef CONFIG_USER_ONLY
154         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
155         env->__excp_addr = addr & TARGET_PAGE_MASK;
156         return (page_get_flags(addr) & PAGE_VALID
157                 ? PGM_PROTECTION : PGM_ADDRESSING);
158 #else
159         return env->tlb_fill_exc;
160 #endif
161     }
162 
163 #ifndef CONFIG_USER_ONLY
164     if (unlikely(flags & TLB_WATCHPOINT)) {
165         /* S390 does not presently use transaction attributes. */
166         cpu_check_watchpoint(env_cpu(env), addr, size,
167                              MEMTXATTRS_UNSPECIFIED,
168                              (access_type == MMU_DATA_STORE
169                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
170     }
171 #endif
172 
173     return 0;
174 }
175 
176 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
177                              bool nonfault, vaddr vaddr1, int size,
178                              MMUAccessType access_type,
179                              int mmu_idx, uintptr_t ra)
180 {
181     int size1, size2, exc;
182 
183     assert(size > 0 && size <= 4096);
184 
185     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
186     size2 = size - size1;
187 
188     memset(access, 0, sizeof(*access));
189     access->vaddr1 = vaddr1;
190     access->size1 = size1;
191     access->size2 = size2;
192     access->mmu_idx = mmu_idx;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &access->haddr1, ra);
196     if (unlikely(exc)) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
202 
203         access->vaddr2 = vaddr2;
204         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
205                                 nonfault, &access->haddr2, ra);
206         if (unlikely(exc)) {
207             return exc;
208         }
209     }
210     return 0;
211 }
212 
213 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
214                                   vaddr vaddr, int size,
215                                   MMUAccessType access_type, int mmu_idx,
216                                   uintptr_t ra)
217 {
218     int exc = access_prepare_nf(ret, env, false, vaddr, size,
219                                 access_type, mmu_idx, ra);
220     assert(!exc);
221 }
222 
223 /* Helper to handle memset on a single page. */
224 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
225                              uint8_t byte, uint16_t size, int mmu_idx,
226                              uintptr_t ra)
227 {
228     if (user_or_likely(haddr)) {
229         memset(haddr, byte, size);
230     } else {
231         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
232         for (int i = 0; i < size; i++) {
233             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
234         }
235     }
236 }
237 
238 static void access_memset(CPUS390XState *env, S390Access *desta,
239                           uint8_t byte, uintptr_t ra)
240 {
241 
242     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
243                      desta->mmu_idx, ra);
244     if (likely(!desta->size2)) {
245         return;
246     }
247     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
248                      desta->mmu_idx, ra);
249 }
250 
251 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
252                                int offset, uintptr_t ra)
253 {
254     target_ulong vaddr = access->vaddr1;
255     void *haddr = access->haddr1;
256 
257     if (unlikely(offset >= access->size1)) {
258         offset -= access->size1;
259         vaddr = access->vaddr2;
260         haddr = access->haddr2;
261     }
262 
263     if (user_or_likely(haddr)) {
264         return ldub_p(haddr + offset);
265     } else {
266         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
267         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
268     }
269 }
270 
271 static void access_set_byte(CPUS390XState *env, S390Access *access,
272                             int offset, uint8_t byte, uintptr_t ra)
273 {
274     target_ulong vaddr = access->vaddr1;
275     void *haddr = access->haddr1;
276 
277     if (unlikely(offset >= access->size1)) {
278         offset -= access->size1;
279         vaddr = access->vaddr2;
280         haddr = access->haddr2;
281     }
282 
283     if (user_or_likely(haddr)) {
284         stb_p(haddr + offset, byte);
285     } else {
286         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
287         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
288     }
289 }
290 
291 /*
292  * Move data with the same semantics as memmove() in case ranges don't overlap
293  * or src > dest. Undefined behavior on destructive overlaps.
294  */
295 static void access_memmove(CPUS390XState *env, S390Access *desta,
296                            S390Access *srca, uintptr_t ra)
297 {
298     int len = desta->size1 + desta->size2;
299     int diff;
300 
301     assert(len == srca->size1 + srca->size2);
302 
303     /* Fallback to slow access in case we don't have access to all host pages */
304     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
305                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
306         int i;
307 
308         for (i = 0; i < len; i++) {
309             uint8_t byte = access_get_byte(env, srca, i, ra);
310 
311             access_set_byte(env, desta, i, byte, ra);
312         }
313         return;
314     }
315 
316     diff = desta->size1 - srca->size1;
317     if (likely(diff == 0)) {
318         memmove(desta->haddr1, srca->haddr1, srca->size1);
319         if (unlikely(srca->size2)) {
320             memmove(desta->haddr2, srca->haddr2, srca->size2);
321         }
322     } else if (diff > 0) {
323         memmove(desta->haddr1, srca->haddr1, srca->size1);
324         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
325         if (likely(desta->size2)) {
326             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
327         }
328     } else {
329         diff = -diff;
330         memmove(desta->haddr1, srca->haddr1, desta->size1);
331         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
332         if (likely(srca->size2)) {
333             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
334         }
335     }
336 }
337 
338 static int mmu_idx_from_as(uint8_t as)
339 {
340     switch (as) {
341     case AS_PRIMARY:
342         return MMU_PRIMARY_IDX;
343     case AS_SECONDARY:
344         return MMU_SECONDARY_IDX;
345     case AS_HOME:
346         return MMU_HOME_IDX;
347     default:
348         /* FIXME AS_ACCREG */
349         g_assert_not_reached();
350     }
351 }
352 
353 /* and on array */
354 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
355                              uint64_t src, uintptr_t ra)
356 {
357     const int mmu_idx = s390x_env_mmu_index(env, false);
358     S390Access srca1, srca2, desta;
359     uint32_t i;
360     uint8_t c = 0;
361 
362     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
363                __func__, l, dest, src);
364 
365     /* NC always processes one more byte than specified - maximum is 256 */
366     l++;
367 
368     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
369     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
370     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
371     for (i = 0; i < l; i++) {
372         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
373                           access_get_byte(env, &srca2, i, ra);
374 
375         c |= x;
376         access_set_byte(env, &desta, i, x, ra);
377     }
378     return c != 0;
379 }
380 
381 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
382                     uint64_t src)
383 {
384     return do_helper_nc(env, l, dest, src, GETPC());
385 }
386 
387 /* xor on array */
388 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
389                              uint64_t src, uintptr_t ra)
390 {
391     const int mmu_idx = s390x_env_mmu_index(env, false);
392     S390Access srca1, srca2, desta;
393     uint32_t i;
394     uint8_t c = 0;
395 
396     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
397                __func__, l, dest, src);
398 
399     /* XC always processes one more byte than specified - maximum is 256 */
400     l++;
401 
402     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
403     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
404     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
405 
406     /* xor with itself is the same as memset(0) */
407     if (src == dest) {
408         access_memset(env, &desta, 0, ra);
409         return 0;
410     }
411 
412     for (i = 0; i < l; i++) {
413         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
414                           access_get_byte(env, &srca2, i, ra);
415 
416         c |= x;
417         access_set_byte(env, &desta, i, x, ra);
418     }
419     return c != 0;
420 }
421 
422 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
423                     uint64_t src)
424 {
425     return do_helper_xc(env, l, dest, src, GETPC());
426 }
427 
428 /* or on array */
429 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
430                              uint64_t src, uintptr_t ra)
431 {
432     const int mmu_idx = s390x_env_mmu_index(env, false);
433     S390Access srca1, srca2, desta;
434     uint32_t i;
435     uint8_t c = 0;
436 
437     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
438                __func__, l, dest, src);
439 
440     /* OC always processes one more byte than specified - maximum is 256 */
441     l++;
442 
443     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
444     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
445     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
446     for (i = 0; i < l; i++) {
447         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
448                           access_get_byte(env, &srca2, i, ra);
449 
450         c |= x;
451         access_set_byte(env, &desta, i, x, ra);
452     }
453     return c != 0;
454 }
455 
456 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
457                     uint64_t src)
458 {
459     return do_helper_oc(env, l, dest, src, GETPC());
460 }
461 
462 /* memmove */
463 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
464                               uint64_t src, uintptr_t ra)
465 {
466     const int mmu_idx = s390x_env_mmu_index(env, false);
467     S390Access srca, desta;
468     uint32_t i;
469 
470     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
471                __func__, l, dest, src);
472 
473     /* MVC always copies one more byte than specified - maximum is 256 */
474     l++;
475 
476     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
477     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
478 
479     /*
480      * "When the operands overlap, the result is obtained as if the operands
481      * were processed one byte at a time". Only non-destructive overlaps
482      * behave like memmove().
483      */
484     if (dest == src + 1) {
485         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
486     } else if (!is_destructive_overlap(env, dest, src, l)) {
487         access_memmove(env, &desta, &srca, ra);
488     } else {
489         for (i = 0; i < l; i++) {
490             uint8_t byte = access_get_byte(env, &srca, i, ra);
491 
492             access_set_byte(env, &desta, i, byte, ra);
493         }
494     }
495 
496     return env->cc_op;
497 }
498 
499 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
500 {
501     do_helper_mvc(env, l, dest, src, GETPC());
502 }
503 
504 /* move right to left */
505 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
506 {
507     const int mmu_idx = s390x_env_mmu_index(env, false);
508     const uint64_t ra = GETPC();
509     S390Access srca, desta;
510     int32_t i;
511 
512     /* MVCRL always copies one more byte than specified - maximum is 256 */
513     l &= 0xff;
514     l++;
515 
516     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
517     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
518 
519     for (i = l - 1; i >= 0; i--) {
520         uint8_t byte = access_get_byte(env, &srca, i, ra);
521         access_set_byte(env, &desta, i, byte, ra);
522     }
523 }
524 
525 /* move inverse  */
526 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
527 {
528     const int mmu_idx = s390x_env_mmu_index(env, false);
529     S390Access srca, desta;
530     uintptr_t ra = GETPC();
531     int i;
532 
533     /* MVCIN always copies one more byte than specified - maximum is 256 */
534     l++;
535 
536     src = wrap_address(env, src - l + 1);
537     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
538     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
539     for (i = 0; i < l; i++) {
540         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
541 
542         access_set_byte(env, &desta, i, x, ra);
543     }
544 }
545 
546 /* move numerics  */
547 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
548 {
549     const int mmu_idx = s390x_env_mmu_index(env, false);
550     S390Access srca1, srca2, desta;
551     uintptr_t ra = GETPC();
552     int i;
553 
554     /* MVN always copies one more byte than specified - maximum is 256 */
555     l++;
556 
557     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
558     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
559     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
560     for (i = 0; i < l; i++) {
561         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
562                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
563 
564         access_set_byte(env, &desta, i, x, ra);
565     }
566 }
567 
568 /* move with offset  */
569 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
570 {
571     const int mmu_idx = s390x_env_mmu_index(env, false);
572     /* MVO always processes one more byte than specified - maximum is 16 */
573     const int len_dest = (l >> 4) + 1;
574     const int len_src = (l & 0xf) + 1;
575     uintptr_t ra = GETPC();
576     uint8_t byte_dest, byte_src;
577     S390Access srca, desta;
578     int i, j;
579 
580     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
581     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
582 
583     /* Handle rightmost byte */
584     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
585     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
586     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
587     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
588 
589     /* Process remaining bytes from right to left */
590     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
591         byte_dest = byte_src >> 4;
592         if (j >= 0) {
593             byte_src = access_get_byte(env, &srca, j, ra);
594         } else {
595             byte_src = 0;
596         }
597         byte_dest |= byte_src << 4;
598         access_set_byte(env, &desta, i, byte_dest, ra);
599     }
600 }
601 
602 /* move zones  */
603 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
604 {
605     const int mmu_idx = s390x_env_mmu_index(env, false);
606     S390Access srca1, srca2, desta;
607     uintptr_t ra = GETPC();
608     int i;
609 
610     /* MVZ always copies one more byte than specified - maximum is 256 */
611     l++;
612 
613     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
614     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
615     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
616     for (i = 0; i < l; i++) {
617         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
618                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
619 
620         access_set_byte(env, &desta, i, x, ra);
621     }
622 }
623 
624 /* compare unsigned byte arrays */
625 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
626                               uint64_t s2, uintptr_t ra)
627 {
628     uint32_t i;
629     uint32_t cc = 0;
630 
631     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
632                __func__, l, s1, s2);
633 
634     for (i = 0; i <= l; i++) {
635         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
636         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
637         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
638         if (x < y) {
639             cc = 1;
640             break;
641         } else if (x > y) {
642             cc = 2;
643             break;
644         }
645     }
646 
647     HELPER_LOG("\n");
648     return cc;
649 }
650 
651 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
652 {
653     return do_helper_clc(env, l, s1, s2, GETPC());
654 }
655 
656 /* compare logical under mask */
657 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
658                      uint64_t addr)
659 {
660     uintptr_t ra = GETPC();
661     uint32_t cc = 0;
662 
663     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
664                mask, addr);
665 
666     if (!mask) {
667         /* Recognize access exceptions for the first byte */
668         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
669     }
670 
671     while (mask) {
672         if (mask & 8) {
673             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
674             uint8_t r = extract32(r1, 24, 8);
675             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
676                        addr);
677             if (r < d) {
678                 cc = 1;
679                 break;
680             } else if (r > d) {
681                 cc = 2;
682                 break;
683             }
684             addr++;
685         }
686         mask = (mask << 1) & 0xf;
687         r1 <<= 8;
688     }
689 
690     HELPER_LOG("\n");
691     return cc;
692 }
693 
694 static inline uint64_t get_address(CPUS390XState *env, int reg)
695 {
696     return wrap_address(env, env->regs[reg]);
697 }
698 
699 /*
700  * Store the address to the given register, zeroing out unused leftmost
701  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
702  */
703 static inline void set_address_zero(CPUS390XState *env, int reg,
704                                     uint64_t address)
705 {
706     if (env->psw.mask & PSW_MASK_64) {
707         env->regs[reg] = address;
708     } else {
709         if (!(env->psw.mask & PSW_MASK_32)) {
710             address &= 0x00ffffff;
711         } else {
712             address &= 0x7fffffff;
713         }
714         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
715     }
716 }
717 
718 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
719 {
720     if (env->psw.mask & PSW_MASK_64) {
721         /* 64-Bit mode */
722         env->regs[reg] = address;
723     } else {
724         if (!(env->psw.mask & PSW_MASK_32)) {
725             /* 24-Bit mode. According to the PoO it is implementation
726             dependent if bits 32-39 remain unchanged or are set to
727             zeros.  Choose the former so that the function can also be
728             used for TRT.  */
729             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
730         } else {
731             /* 31-Bit mode. According to the PoO it is implementation
732             dependent if bit 32 remains unchanged or is set to zero.
733             Choose the latter so that the function can also be used for
734             TRT.  */
735             address &= 0x7fffffff;
736             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
737         }
738     }
739 }
740 
741 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
742 {
743     if (!(env->psw.mask & PSW_MASK_64)) {
744         return (uint32_t)length;
745     }
746     return length;
747 }
748 
749 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
750 {
751     if (!(env->psw.mask & PSW_MASK_64)) {
752         /* 24-Bit and 31-Bit mode */
753         length &= 0x7fffffff;
754     }
755     return length;
756 }
757 
758 static inline uint64_t get_length(CPUS390XState *env, int reg)
759 {
760     return wrap_length31(env, env->regs[reg]);
761 }
762 
763 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
764 {
765     if (env->psw.mask & PSW_MASK_64) {
766         /* 64-Bit mode */
767         env->regs[reg] = length;
768     } else {
769         /* 24-Bit and 31-Bit mode */
770         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
771     }
772 }
773 
774 /* search string (c is byte to search, r2 is string, r1 end of string) */
775 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
776 {
777     uintptr_t ra = GETPC();
778     uint64_t end, str;
779     uint32_t len;
780     uint8_t v, c = env->regs[0];
781 
782     /* Bits 32-55 must contain all 0.  */
783     if (env->regs[0] & 0xffffff00u) {
784         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
785     }
786 
787     str = get_address(env, r2);
788     end = get_address(env, r1);
789 
790     /* Lest we fail to service interrupts in a timely manner, limit the
791        amount of work we're willing to do.  For now, let's cap at 8k.  */
792     for (len = 0; len < 0x2000; ++len) {
793         if (str + len == end) {
794             /* Character not found.  R1 & R2 are unmodified.  */
795             env->cc_op = 2;
796             return;
797         }
798         v = cpu_ldub_data_ra(env, str + len, ra);
799         if (v == c) {
800             /* Character found.  Set R1 to the location; R2 is unmodified.  */
801             env->cc_op = 1;
802             set_address(env, r1, str + len);
803             return;
804         }
805     }
806 
807     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
808     env->cc_op = 3;
809     set_address(env, r2, str + len);
810 }
811 
812 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
813 {
814     uintptr_t ra = GETPC();
815     uint32_t len;
816     uint16_t v, c = env->regs[0];
817     uint64_t end, str, adj_end;
818 
819     /* Bits 32-47 of R0 must be zero.  */
820     if (env->regs[0] & 0xffff0000u) {
821         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
822     }
823 
824     str = get_address(env, r2);
825     end = get_address(env, r1);
826 
827     /* If the LSB of the two addresses differ, use one extra byte.  */
828     adj_end = end + ((str ^ end) & 1);
829 
830     /* Lest we fail to service interrupts in a timely manner, limit the
831        amount of work we're willing to do.  For now, let's cap at 8k.  */
832     for (len = 0; len < 0x2000; len += 2) {
833         if (str + len == adj_end) {
834             /* End of input found.  */
835             env->cc_op = 2;
836             return;
837         }
838         v = cpu_lduw_data_ra(env, str + len, ra);
839         if (v == c) {
840             /* Character found.  Set R1 to the location; R2 is unmodified.  */
841             env->cc_op = 1;
842             set_address(env, r1, str + len);
843             return;
844         }
845     }
846 
847     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
848     env->cc_op = 3;
849     set_address(env, r2, str + len);
850 }
851 
852 /* unsigned string compare (c is string terminator) */
853 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
854 {
855     uintptr_t ra = GETPC();
856     uint32_t len;
857 
858     c = c & 0xff;
859     s1 = wrap_address(env, s1);
860     s2 = wrap_address(env, s2);
861 
862     /* Lest we fail to service interrupts in a timely manner, limit the
863        amount of work we're willing to do.  For now, let's cap at 8k.  */
864     for (len = 0; len < 0x2000; ++len) {
865         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
866         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
867         if (v1 == v2) {
868             if (v1 == c) {
869                 /* Equal.  CC=0, and don't advance the registers.  */
870                 env->cc_op = 0;
871                 return int128_make128(s2, s1);
872             }
873         } else {
874             /* Unequal.  CC={1,2}, and advance the registers.  Note that
875                the terminator need not be zero, but the string that contains
876                the terminator is by definition "low".  */
877             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
878             return int128_make128(s2 + len, s1 + len);
879         }
880     }
881 
882     /* CPU-determined bytes equal; advance the registers.  */
883     env->cc_op = 3;
884     return int128_make128(s2 + len, s1 + len);
885 }
886 
887 /* move page */
888 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
889 {
890     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
891     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
892     const int mmu_idx = s390x_env_mmu_index(env, false);
893     const bool f = extract64(r0, 11, 1);
894     const bool s = extract64(r0, 10, 1);
895     const bool cco = extract64(r0, 8, 1);
896     uintptr_t ra = GETPC();
897     S390Access srca, desta;
898     int exc;
899 
900     if ((f && s) || extract64(r0, 12, 4)) {
901         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
902     }
903 
904     /*
905      * We always manually handle exceptions such that we can properly store
906      * r1/r2 to the lowcore on page-translation exceptions.
907      *
908      * TODO: Access key handling
909      */
910     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
911                             MMU_DATA_LOAD, mmu_idx, ra);
912     if (exc) {
913         if (cco) {
914             return 2;
915         }
916         goto inject_exc;
917     }
918     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
919                             MMU_DATA_STORE, mmu_idx, ra);
920     if (exc) {
921         if (cco && exc != PGM_PROTECTION) {
922             return 1;
923         }
924         goto inject_exc;
925     }
926     access_memmove(env, &desta, &srca, ra);
927     return 0; /* data moved */
928 inject_exc:
929 #if !defined(CONFIG_USER_ONLY)
930     if (exc != PGM_ADDRESSING) {
931         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
932                  env->tlb_fill_tec);
933     }
934     if (exc == PGM_PAGE_TRANS) {
935         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
936                  r1 << 4 | r2);
937     }
938 #endif
939     tcg_s390_program_interrupt(env, exc, ra);
940 }
941 
942 /* string copy */
943 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
944 {
945     const int mmu_idx = s390x_env_mmu_index(env, false);
946     const uint64_t d = get_address(env, r1);
947     const uint64_t s = get_address(env, r2);
948     const uint8_t c = env->regs[0];
949     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
950     S390Access srca, desta;
951     uintptr_t ra = GETPC();
952     int i;
953 
954     if (env->regs[0] & 0xffffff00ull) {
955         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
956     }
957 
958     /*
959      * Our access should not exceed single pages, as we must not report access
960      * exceptions exceeding the actually copied range (which we don't know at
961      * this point). We might over-indicate watchpoints within the pages
962      * (if we ever care, we have to limit processing to a single byte).
963      */
964     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
965     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
966     for (i = 0; i < len; i++) {
967         const uint8_t v = access_get_byte(env, &srca, i, ra);
968 
969         access_set_byte(env, &desta, i, v, ra);
970         if (v == c) {
971             set_address_zero(env, r1, d + i);
972             return 1;
973         }
974     }
975     set_address_zero(env, r1, d + len);
976     set_address_zero(env, r2, s + len);
977     return 3;
978 }
979 
980 /* load access registers r1 to r3 from memory at a2 */
981 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
982 {
983     uintptr_t ra = GETPC();
984     int i;
985 
986     if (a2 & 0x3) {
987         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
988     }
989 
990     for (i = r1;; i = (i + 1) % 16) {
991         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
992         a2 += 4;
993 
994         if (i == r3) {
995             break;
996         }
997     }
998 }
999 
1000 /* store access registers r1 to r3 in memory at a2 */
1001 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1002 {
1003     uintptr_t ra = GETPC();
1004     int i;
1005 
1006     if (a2 & 0x3) {
1007         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1008     }
1009 
1010     for (i = r1;; i = (i + 1) % 16) {
1011         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1012         a2 += 4;
1013 
1014         if (i == r3) {
1015             break;
1016         }
1017     }
1018 }
1019 
1020 /* move long helper */
1021 static inline uint32_t do_mvcl(CPUS390XState *env,
1022                                uint64_t *dest, uint64_t *destlen,
1023                                uint64_t *src, uint64_t *srclen,
1024                                uint16_t pad, int wordsize, uintptr_t ra)
1025 {
1026     const int mmu_idx = s390x_env_mmu_index(env, false);
1027     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1028     S390Access srca, desta;
1029     int i, cc;
1030 
1031     if (*destlen == *srclen) {
1032         cc = 0;
1033     } else if (*destlen < *srclen) {
1034         cc = 1;
1035     } else {
1036         cc = 2;
1037     }
1038 
1039     if (!*destlen) {
1040         return cc;
1041     }
1042 
1043     /*
1044      * Only perform one type of type of operation (move/pad) at a time.
1045      * Stay within single pages.
1046      */
1047     if (*srclen) {
1048         /* Copy the src array */
1049         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1050         *destlen -= len;
1051         *srclen -= len;
1052         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1053         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1054         access_memmove(env, &desta, &srca, ra);
1055         *src = wrap_address(env, *src + len);
1056         *dest = wrap_address(env, *dest + len);
1057     } else if (wordsize == 1) {
1058         /* Pad the remaining area */
1059         *destlen -= len;
1060         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1061         access_memset(env, &desta, pad, ra);
1062         *dest = wrap_address(env, *dest + len);
1063     } else {
1064         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1065 
1066         /* The remaining length selects the padding byte. */
1067         for (i = 0; i < len; (*destlen)--, i++) {
1068             if (*destlen & 1) {
1069                 access_set_byte(env, &desta, i, pad, ra);
1070             } else {
1071                 access_set_byte(env, &desta, i, pad >> 8, ra);
1072             }
1073         }
1074         *dest = wrap_address(env, *dest + len);
1075     }
1076 
1077     return *destlen ? 3 : cc;
1078 }
1079 
1080 /* move long */
1081 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1082 {
1083     const int mmu_idx = s390x_env_mmu_index(env, false);
1084     uintptr_t ra = GETPC();
1085     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1086     uint64_t dest = get_address(env, r1);
1087     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1088     uint64_t src = get_address(env, r2);
1089     uint8_t pad = env->regs[r2 + 1] >> 24;
1090     CPUState *cs = env_cpu(env);
1091     S390Access srca, desta;
1092     uint32_t cc, cur_len;
1093 
1094     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1095         cc = 3;
1096     } else if (srclen == destlen) {
1097         cc = 0;
1098     } else if (destlen < srclen) {
1099         cc = 1;
1100     } else {
1101         cc = 2;
1102     }
1103 
1104     /* We might have to zero-out some bits even if there was no action. */
1105     if (unlikely(!destlen || cc == 3)) {
1106         set_address_zero(env, r2, src);
1107         set_address_zero(env, r1, dest);
1108         return cc;
1109     } else if (!srclen) {
1110         set_address_zero(env, r2, src);
1111     }
1112 
1113     /*
1114      * Only perform one type of type of operation (move/pad) in one step.
1115      * Stay within single pages.
1116      */
1117     while (destlen) {
1118         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1119         if (!srclen) {
1120             access_prepare(&desta, env, dest, cur_len,
1121                            MMU_DATA_STORE, mmu_idx, ra);
1122             access_memset(env, &desta, pad, ra);
1123         } else {
1124             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1125 
1126             access_prepare(&srca, env, src, cur_len,
1127                            MMU_DATA_LOAD, mmu_idx, ra);
1128             access_prepare(&desta, env, dest, cur_len,
1129                            MMU_DATA_STORE, mmu_idx, ra);
1130             access_memmove(env, &desta, &srca, ra);
1131             src = wrap_address(env, src + cur_len);
1132             srclen -= cur_len;
1133             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1134             set_address_zero(env, r2, src);
1135         }
1136         dest = wrap_address(env, dest + cur_len);
1137         destlen -= cur_len;
1138         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1139         set_address_zero(env, r1, dest);
1140 
1141         /*
1142          * MVCL is interruptible. Return to the main loop if requested after
1143          * writing back all state to registers. If no interrupt will get
1144          * injected, we'll end up back in this handler and continue processing
1145          * the remaining parts.
1146          */
1147         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1148             cpu_loop_exit_restore(cs, ra);
1149         }
1150     }
1151     return cc;
1152 }
1153 
1154 /* move long extended */
1155 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1156                        uint32_t r3)
1157 {
1158     uintptr_t ra = GETPC();
1159     uint64_t destlen = get_length(env, r1 + 1);
1160     uint64_t dest = get_address(env, r1);
1161     uint64_t srclen = get_length(env, r3 + 1);
1162     uint64_t src = get_address(env, r3);
1163     uint8_t pad = a2;
1164     uint32_t cc;
1165 
1166     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1167 
1168     set_length(env, r1 + 1, destlen);
1169     set_length(env, r3 + 1, srclen);
1170     set_address(env, r1, dest);
1171     set_address(env, r3, src);
1172 
1173     return cc;
1174 }
1175 
1176 /* move long unicode */
1177 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1178                        uint32_t r3)
1179 {
1180     uintptr_t ra = GETPC();
1181     uint64_t destlen = get_length(env, r1 + 1);
1182     uint64_t dest = get_address(env, r1);
1183     uint64_t srclen = get_length(env, r3 + 1);
1184     uint64_t src = get_address(env, r3);
1185     uint16_t pad = a2;
1186     uint32_t cc;
1187 
1188     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1189 
1190     set_length(env, r1 + 1, destlen);
1191     set_length(env, r3 + 1, srclen);
1192     set_address(env, r1, dest);
1193     set_address(env, r3, src);
1194 
1195     return cc;
1196 }
1197 
1198 /* compare logical long helper */
1199 static inline uint32_t do_clcl(CPUS390XState *env,
1200                                uint64_t *src1, uint64_t *src1len,
1201                                uint64_t *src3, uint64_t *src3len,
1202                                uint16_t pad, uint64_t limit,
1203                                int wordsize, uintptr_t ra)
1204 {
1205     uint64_t len = MAX(*src1len, *src3len);
1206     uint32_t cc = 0;
1207 
1208     check_alignment(env, *src1len | *src3len, wordsize, ra);
1209 
1210     if (!len) {
1211         return cc;
1212     }
1213 
1214     /* Lest we fail to service interrupts in a timely manner, limit the
1215        amount of work we're willing to do.  */
1216     if (len > limit) {
1217         len = limit;
1218         cc = 3;
1219     }
1220 
1221     for (; len; len -= wordsize) {
1222         uint16_t v1 = pad;
1223         uint16_t v3 = pad;
1224 
1225         if (*src1len) {
1226             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1227         }
1228         if (*src3len) {
1229             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1230         }
1231 
1232         if (v1 != v3) {
1233             cc = (v1 < v3) ? 1 : 2;
1234             break;
1235         }
1236 
1237         if (*src1len) {
1238             *src1 += wordsize;
1239             *src1len -= wordsize;
1240         }
1241         if (*src3len) {
1242             *src3 += wordsize;
1243             *src3len -= wordsize;
1244         }
1245     }
1246 
1247     return cc;
1248 }
1249 
1250 
1251 /* compare logical long */
1252 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1253 {
1254     uintptr_t ra = GETPC();
1255     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1256     uint64_t src1 = get_address(env, r1);
1257     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1258     uint64_t src3 = get_address(env, r2);
1259     uint8_t pad = env->regs[r2 + 1] >> 24;
1260     uint32_t cc;
1261 
1262     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1263 
1264     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1265     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1266     set_address(env, r1, src1);
1267     set_address(env, r2, src3);
1268 
1269     return cc;
1270 }
1271 
1272 /* compare logical long extended memcompare insn with padding */
1273 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1274                        uint32_t r3)
1275 {
1276     uintptr_t ra = GETPC();
1277     uint64_t src1len = get_length(env, r1 + 1);
1278     uint64_t src1 = get_address(env, r1);
1279     uint64_t src3len = get_length(env, r3 + 1);
1280     uint64_t src3 = get_address(env, r3);
1281     uint8_t pad = a2;
1282     uint32_t cc;
1283 
1284     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1285 
1286     set_length(env, r1 + 1, src1len);
1287     set_length(env, r3 + 1, src3len);
1288     set_address(env, r1, src1);
1289     set_address(env, r3, src3);
1290 
1291     return cc;
1292 }
1293 
1294 /* compare logical long unicode memcompare insn with padding */
1295 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1296                        uint32_t r3)
1297 {
1298     uintptr_t ra = GETPC();
1299     uint64_t src1len = get_length(env, r1 + 1);
1300     uint64_t src1 = get_address(env, r1);
1301     uint64_t src3len = get_length(env, r3 + 1);
1302     uint64_t src3 = get_address(env, r3);
1303     uint16_t pad = a2;
1304     uint32_t cc = 0;
1305 
1306     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1307 
1308     set_length(env, r1 + 1, src1len);
1309     set_length(env, r3 + 1, src3len);
1310     set_address(env, r1, src1);
1311     set_address(env, r3, src3);
1312 
1313     return cc;
1314 }
1315 
1316 /* checksum */
1317 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1318                     uint64_t src, uint64_t src_len)
1319 {
1320     uintptr_t ra = GETPC();
1321     uint64_t max_len, len;
1322     uint64_t cksm = (uint32_t)r1;
1323 
1324     /* Lest we fail to service interrupts in a timely manner, limit the
1325        amount of work we're willing to do.  For now, let's cap at 8k.  */
1326     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1327 
1328     /* Process full words as available.  */
1329     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1330         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1331     }
1332 
1333     switch (max_len - len) {
1334     case 1:
1335         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1336         len += 1;
1337         break;
1338     case 2:
1339         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1340         len += 2;
1341         break;
1342     case 3:
1343         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1344         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1345         len += 3;
1346         break;
1347     }
1348 
1349     /* Fold the carry from the checksum.  Note that we can see carry-out
1350        during folding more than once (but probably not more than twice).  */
1351     while (cksm > 0xffffffffull) {
1352         cksm = (uint32_t)cksm + (cksm >> 32);
1353     }
1354 
1355     /* Indicate whether or not we've processed everything.  */
1356     env->cc_op = (len == src_len ? 0 : 3);
1357 
1358     /* Return both cksm and processed length.  */
1359     return int128_make128(cksm, len);
1360 }
1361 
1362 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1363 {
1364     uintptr_t ra = GETPC();
1365     int len_dest = len >> 4;
1366     int len_src = len & 0xf;
1367     uint8_t b;
1368 
1369     dest += len_dest;
1370     src += len_src;
1371 
1372     /* last byte is special, it only flips the nibbles */
1373     b = cpu_ldub_data_ra(env, src, ra);
1374     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1375     src--;
1376     len_src--;
1377 
1378     /* now pack every value */
1379     while (len_dest > 0) {
1380         b = 0;
1381 
1382         if (len_src >= 0) {
1383             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1384             src--;
1385             len_src--;
1386         }
1387         if (len_src >= 0) {
1388             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1389             src--;
1390             len_src--;
1391         }
1392 
1393         len_dest--;
1394         dest--;
1395         cpu_stb_data_ra(env, dest, b, ra);
1396     }
1397 }
1398 
1399 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1400                            uint32_t srclen, int ssize, uintptr_t ra)
1401 {
1402     int i;
1403     /* The destination operand is always 16 bytes long.  */
1404     const int destlen = 16;
1405 
1406     /* The operands are processed from right to left.  */
1407     src += srclen - 1;
1408     dest += destlen - 1;
1409 
1410     for (i = 0; i < destlen; i++) {
1411         uint8_t b = 0;
1412 
1413         /* Start with a positive sign */
1414         if (i == 0) {
1415             b = 0xc;
1416         } else if (srclen > ssize) {
1417             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1418             src -= ssize;
1419             srclen -= ssize;
1420         }
1421 
1422         if (srclen > ssize) {
1423             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1424             src -= ssize;
1425             srclen -= ssize;
1426         }
1427 
1428         cpu_stb_data_ra(env, dest, b, ra);
1429         dest--;
1430     }
1431 }
1432 
1433 
1434 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1435                  uint32_t srclen)
1436 {
1437     do_pkau(env, dest, src, srclen, 1, GETPC());
1438 }
1439 
1440 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1441                  uint32_t srclen)
1442 {
1443     do_pkau(env, dest, src, srclen, 2, GETPC());
1444 }
1445 
1446 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1447                   uint64_t src)
1448 {
1449     uintptr_t ra = GETPC();
1450     int len_dest = len >> 4;
1451     int len_src = len & 0xf;
1452     uint8_t b;
1453     int second_nibble = 0;
1454 
1455     dest += len_dest;
1456     src += len_src;
1457 
1458     /* last byte is special, it only flips the nibbles */
1459     b = cpu_ldub_data_ra(env, src, ra);
1460     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1461     src--;
1462     len_src--;
1463 
1464     /* now pad every nibble with 0xf0 */
1465 
1466     while (len_dest > 0) {
1467         uint8_t cur_byte = 0;
1468 
1469         if (len_src > 0) {
1470             cur_byte = cpu_ldub_data_ra(env, src, ra);
1471         }
1472 
1473         len_dest--;
1474         dest--;
1475 
1476         /* only advance one nibble at a time */
1477         if (second_nibble) {
1478             cur_byte >>= 4;
1479             len_src--;
1480             src--;
1481         }
1482         second_nibble = !second_nibble;
1483 
1484         /* digit */
1485         cur_byte = (cur_byte & 0xf);
1486         /* zone bits */
1487         cur_byte |= 0xf0;
1488 
1489         cpu_stb_data_ra(env, dest, cur_byte, ra);
1490     }
1491 }
1492 
1493 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1494                                  uint32_t destlen, int dsize, uint64_t src,
1495                                  uintptr_t ra)
1496 {
1497     int i;
1498     uint32_t cc;
1499     uint8_t b;
1500     /* The source operand is always 16 bytes long.  */
1501     const int srclen = 16;
1502 
1503     /* The operands are processed from right to left.  */
1504     src += srclen - 1;
1505     dest += destlen - dsize;
1506 
1507     /* Check for the sign.  */
1508     b = cpu_ldub_data_ra(env, src, ra);
1509     src--;
1510     switch (b & 0xf) {
1511     case 0xa:
1512     case 0xc:
1513     case 0xe ... 0xf:
1514         cc = 0;  /* plus */
1515         break;
1516     case 0xb:
1517     case 0xd:
1518         cc = 1;  /* minus */
1519         break;
1520     default:
1521     case 0x0 ... 0x9:
1522         cc = 3;  /* invalid */
1523         break;
1524     }
1525 
1526     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1527     for (i = 0; i < destlen; i += dsize) {
1528         if (i == (31 * dsize)) {
1529             /* If length is 32/64 bytes, the leftmost byte is 0. */
1530             b = 0;
1531         } else if (i % (2 * dsize)) {
1532             b = cpu_ldub_data_ra(env, src, ra);
1533             src--;
1534         } else {
1535             b >>= 4;
1536         }
1537         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1538         dest -= dsize;
1539     }
1540 
1541     return cc;
1542 }
1543 
1544 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1545                        uint64_t src)
1546 {
1547     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1548 }
1549 
1550 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1551                        uint64_t src)
1552 {
1553     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1554 }
1555 
1556 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1557 {
1558     uintptr_t ra = GETPC();
1559     uint32_t cc = 0;
1560     int i;
1561 
1562     for (i = 0; i < destlen; i++) {
1563         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1564         /* digit */
1565         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1566 
1567         if (i == (destlen - 1)) {
1568             /* sign */
1569             cc |= (b & 0xf) < 0xa ? 1 : 0;
1570         } else {
1571             /* digit */
1572             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1573         }
1574     }
1575 
1576     return cc;
1577 }
1578 
1579 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1580                              uint64_t trans, uintptr_t ra)
1581 {
1582     uint32_t i;
1583 
1584     for (i = 0; i <= len; i++) {
1585         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1586         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1587         cpu_stb_data_ra(env, array + i, new_byte, ra);
1588     }
1589 
1590     return env->cc_op;
1591 }
1592 
1593 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1594                 uint64_t trans)
1595 {
1596     do_helper_tr(env, len, array, trans, GETPC());
1597 }
1598 
1599 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1600                    uint64_t len, uint64_t trans)
1601 {
1602     uintptr_t ra = GETPC();
1603     uint8_t end = env->regs[0] & 0xff;
1604     uint64_t l = len;
1605     uint64_t i;
1606     uint32_t cc = 0;
1607 
1608     if (!(env->psw.mask & PSW_MASK_64)) {
1609         array &= 0x7fffffff;
1610         l = (uint32_t)l;
1611     }
1612 
1613     /* Lest we fail to service interrupts in a timely manner, limit the
1614        amount of work we're willing to do.  For now, let's cap at 8k.  */
1615     if (l > 0x2000) {
1616         l = 0x2000;
1617         cc = 3;
1618     }
1619 
1620     for (i = 0; i < l; i++) {
1621         uint8_t byte, new_byte;
1622 
1623         byte = cpu_ldub_data_ra(env, array + i, ra);
1624 
1625         if (byte == end) {
1626             cc = 1;
1627             break;
1628         }
1629 
1630         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1631         cpu_stb_data_ra(env, array + i, new_byte, ra);
1632     }
1633 
1634     env->cc_op = cc;
1635     return int128_make128(len - i, array + i);
1636 }
1637 
1638 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1639                                      uint64_t array, uint64_t trans,
1640                                      int inc, uintptr_t ra)
1641 {
1642     int i;
1643 
1644     for (i = 0; i <= len; i++) {
1645         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1646         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1647 
1648         if (sbyte != 0) {
1649             set_address(env, 1, array + i * inc);
1650             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1651             return (i == len) ? 2 : 1;
1652         }
1653     }
1654 
1655     return 0;
1656 }
1657 
1658 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1659                                   uint64_t array, uint64_t trans,
1660                                   uintptr_t ra)
1661 {
1662     return do_helper_trt(env, len, array, trans, 1, ra);
1663 }
1664 
1665 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1666                      uint64_t trans)
1667 {
1668     return do_helper_trt(env, len, array, trans, 1, GETPC());
1669 }
1670 
1671 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1672                                    uint64_t array, uint64_t trans,
1673                                    uintptr_t ra)
1674 {
1675     return do_helper_trt(env, len, array, trans, -1, ra);
1676 }
1677 
1678 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1679                       uint64_t trans)
1680 {
1681     return do_helper_trt(env, len, array, trans, -1, GETPC());
1682 }
1683 
1684 /* Translate one/two to one/two */
1685 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1686                       uint32_t tst, uint32_t sizes)
1687 {
1688     uintptr_t ra = GETPC();
1689     int dsize = (sizes & 1) ? 1 : 2;
1690     int ssize = (sizes & 2) ? 1 : 2;
1691     uint64_t tbl = get_address(env, 1);
1692     uint64_t dst = get_address(env, r1);
1693     uint64_t len = get_length(env, r1 + 1);
1694     uint64_t src = get_address(env, r2);
1695     uint32_t cc = 3;
1696     int i;
1697 
1698     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1699        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1700        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1701     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1702         tbl &= -4096;
1703     } else {
1704         tbl &= -8;
1705     }
1706 
1707     check_alignment(env, len, ssize, ra);
1708 
1709     /* Lest we fail to service interrupts in a timely manner, */
1710     /* limit the amount of work we're willing to do.   */
1711     for (i = 0; i < 0x2000; i++) {
1712         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1713         uint64_t tble = tbl + (sval * dsize);
1714         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1715         if (dval == tst) {
1716             cc = 1;
1717             break;
1718         }
1719         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1720 
1721         len -= ssize;
1722         src += ssize;
1723         dst += dsize;
1724 
1725         if (len == 0) {
1726             cc = 0;
1727             break;
1728         }
1729     }
1730 
1731     set_address(env, r1, dst);
1732     set_length(env, r1 + 1, len);
1733     set_address(env, r2, src);
1734 
1735     return cc;
1736 }
1737 
1738 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1739                         uint64_t a2, bool parallel)
1740 {
1741     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1742     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1743     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1744     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1745     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1746     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1747     uintptr_t ra = GETPC();
1748     uint32_t fc = extract32(env->regs[0], 0, 8);
1749     uint32_t sc = extract32(env->regs[0], 8, 8);
1750     uint64_t pl = get_address(env, 1) & -16;
1751     uint64_t svh, svl;
1752     uint32_t cc;
1753 
1754     /* Sanity check the function code and storage characteristic.  */
1755     if (fc > 1 || sc > 3) {
1756         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1757             goto spec_exception;
1758         }
1759         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1760             goto spec_exception;
1761         }
1762     }
1763 
1764     /* Sanity check the alignments.  */
1765     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1766         goto spec_exception;
1767     }
1768 
1769     /* Sanity check writability of the store address.  */
1770     probe_write(env, a2, 1 << sc, mem_idx, ra);
1771 
1772     /*
1773      * Note that the compare-and-swap is atomic, and the store is atomic,
1774      * but the complete operation is not.  Therefore we do not need to
1775      * assert serial context in order to implement this.  That said,
1776      * restart early if we can't support either operation that is supposed
1777      * to be atomic.
1778      */
1779     if (parallel) {
1780         uint32_t max = 2;
1781 #ifdef CONFIG_ATOMIC64
1782         max = 3;
1783 #endif
1784         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1785             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1786             cpu_loop_exit_atomic(env_cpu(env), ra);
1787         }
1788     }
1789 
1790     /*
1791      * All loads happen before all stores.  For simplicity, load the entire
1792      * store value area from the parameter list.
1793      */
1794     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1795     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1796 
1797     switch (fc) {
1798     case 0:
1799         {
1800             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1801             uint32_t cv = env->regs[r3];
1802             uint32_t ov;
1803 
1804             if (parallel) {
1805                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1806             } else {
1807                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1808                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1809             }
1810             cc = (ov != cv);
1811             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1812         }
1813         break;
1814 
1815     case 1:
1816         {
1817             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1818             uint64_t cv = env->regs[r3];
1819             uint64_t ov;
1820 
1821             if (parallel) {
1822 #ifdef CONFIG_ATOMIC64
1823                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1824 #else
1825                 /* Note that we asserted !parallel above.  */
1826                 g_assert_not_reached();
1827 #endif
1828             } else {
1829                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1830                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1831             }
1832             cc = (ov != cv);
1833             env->regs[r3] = ov;
1834         }
1835         break;
1836 
1837     case 2:
1838         {
1839             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1840             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1841             Int128 ov;
1842 
1843             if (!parallel) {
1844                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1845                 cc = !int128_eq(ov, cv);
1846                 if (cc) {
1847                     nv = ov;
1848                 }
1849                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1850             } else if (HAVE_CMPXCHG128) {
1851                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1852                 cc = !int128_eq(ov, cv);
1853             } else {
1854                 /* Note that we asserted !parallel above.  */
1855                 g_assert_not_reached();
1856             }
1857 
1858             env->regs[r3 + 0] = int128_gethi(ov);
1859             env->regs[r3 + 1] = int128_getlo(ov);
1860         }
1861         break;
1862 
1863     default:
1864         g_assert_not_reached();
1865     }
1866 
1867     /* Store only if the comparison succeeded.  Note that above we use a pair
1868        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1869        from the most-significant bits of svh.  */
1870     if (cc == 0) {
1871         switch (sc) {
1872         case 0:
1873             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1874             break;
1875         case 1:
1876             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1877             break;
1878         case 2:
1879             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1880             break;
1881         case 3:
1882             cpu_stq_mmu(env, a2, svh, oi8, ra);
1883             break;
1884         case 4:
1885             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1886             break;
1887         default:
1888             g_assert_not_reached();
1889         }
1890     }
1891 
1892     return cc;
1893 
1894  spec_exception:
1895     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1896 }
1897 
1898 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1899 {
1900     return do_csst(env, r3, a1, a2, false);
1901 }
1902 
1903 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1904                                uint64_t a2)
1905 {
1906     return do_csst(env, r3, a1, a2, true);
1907 }
1908 
1909 #if !defined(CONFIG_USER_ONLY)
1910 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1911 {
1912     uintptr_t ra = GETPC();
1913     bool PERchanged = false;
1914     uint64_t src = a2;
1915     uint32_t i;
1916 
1917     if (src & 0x7) {
1918         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1919     }
1920 
1921     for (i = r1;; i = (i + 1) % 16) {
1922         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1923         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1924             PERchanged = true;
1925         }
1926         env->cregs[i] = val;
1927         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1928                    i, src, val);
1929         src += sizeof(uint64_t);
1930 
1931         if (i == r3) {
1932             break;
1933         }
1934     }
1935 
1936     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1937         s390_cpu_recompute_watchpoints(env_cpu(env));
1938     }
1939 
1940     tlb_flush(env_cpu(env));
1941 }
1942 
1943 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1944 {
1945     uintptr_t ra = GETPC();
1946     bool PERchanged = false;
1947     uint64_t src = a2;
1948     uint32_t i;
1949 
1950     if (src & 0x3) {
1951         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1952     }
1953 
1954     for (i = r1;; i = (i + 1) % 16) {
1955         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1956         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1957             PERchanged = true;
1958         }
1959         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1960         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1961         src += sizeof(uint32_t);
1962 
1963         if (i == r3) {
1964             break;
1965         }
1966     }
1967 
1968     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1969         s390_cpu_recompute_watchpoints(env_cpu(env));
1970     }
1971 
1972     tlb_flush(env_cpu(env));
1973 }
1974 
1975 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1976 {
1977     uintptr_t ra = GETPC();
1978     uint64_t dest = a2;
1979     uint32_t i;
1980 
1981     if (dest & 0x7) {
1982         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1983     }
1984 
1985     for (i = r1;; i = (i + 1) % 16) {
1986         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1987         dest += sizeof(uint64_t);
1988 
1989         if (i == r3) {
1990             break;
1991         }
1992     }
1993 }
1994 
1995 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1996 {
1997     uintptr_t ra = GETPC();
1998     uint64_t dest = a2;
1999     uint32_t i;
2000 
2001     if (dest & 0x3) {
2002         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2003     }
2004 
2005     for (i = r1;; i = (i + 1) % 16) {
2006         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2007         dest += sizeof(uint32_t);
2008 
2009         if (i == r3) {
2010             break;
2011         }
2012     }
2013 }
2014 
2015 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2016 {
2017     uintptr_t ra = GETPC();
2018     int i;
2019 
2020     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2021 
2022     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2023         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2024     }
2025 
2026     return 0;
2027 }
2028 
2029 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2030 {
2031     S390CPU *cpu = env_archcpu(env);
2032     CPUState *cs = env_cpu(env);
2033 
2034     /*
2035      * TODO: we currently don't handle all access protection types
2036      * (including access-list and key-controlled) as well as AR mode.
2037      */
2038     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2039         /* Fetching permitted; storing permitted */
2040         return 0;
2041     }
2042 
2043     if (env->int_pgm_code == PGM_PROTECTION) {
2044         /* retry if reading is possible */
2045         cs->exception_index = -1;
2046         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2047             /* Fetching permitted; storing not permitted */
2048             return 1;
2049         }
2050     }
2051 
2052     switch (env->int_pgm_code) {
2053     case PGM_PROTECTION:
2054         /* Fetching not permitted; storing not permitted */
2055         cs->exception_index = -1;
2056         return 2;
2057     case PGM_ADDRESSING:
2058     case PGM_TRANS_SPEC:
2059         /* exceptions forwarded to the guest */
2060         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2061         return 0;
2062     }
2063 
2064     /* Translation not available */
2065     cs->exception_index = -1;
2066     return 3;
2067 }
2068 
2069 /* insert storage key extended */
2070 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2071 {
2072     static S390SKeysState *ss;
2073     static S390SKeysClass *skeyclass;
2074     uint64_t addr = wrap_address(env, r2);
2075     uint8_t key;
2076     int rc;
2077 
2078     addr = mmu_real2abs(env, addr);
2079     if (!mmu_absolute_addr_valid(addr, false)) {
2080         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2081     }
2082 
2083     if (unlikely(!ss)) {
2084         ss = s390_get_skeys_device();
2085         skeyclass = S390_SKEYS_GET_CLASS(ss);
2086         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2087             tlb_flush_all_cpus_synced(env_cpu(env));
2088         }
2089     }
2090 
2091     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2092     if (rc) {
2093         return 0;
2094     }
2095     return key;
2096 }
2097 
2098 /* set storage key extended */
2099 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2100 {
2101     static S390SKeysState *ss;
2102     static S390SKeysClass *skeyclass;
2103     uint64_t addr = wrap_address(env, r2);
2104     uint8_t key;
2105 
2106     addr = mmu_real2abs(env, addr);
2107     if (!mmu_absolute_addr_valid(addr, false)) {
2108         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2109     }
2110 
2111     if (unlikely(!ss)) {
2112         ss = s390_get_skeys_device();
2113         skeyclass = S390_SKEYS_GET_CLASS(ss);
2114         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2115             tlb_flush_all_cpus_synced(env_cpu(env));
2116         }
2117     }
2118 
2119     key = r1 & 0xfe;
2120     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2121    /*
2122     * As we can only flush by virtual address and not all the entries
2123     * that point to a physical address we have to flush the whole TLB.
2124     */
2125     tlb_flush_all_cpus_synced(env_cpu(env));
2126 }
2127 
2128 /* reset reference bit extended */
2129 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2130 {
2131     uint64_t addr = wrap_address(env, r2);
2132     static S390SKeysState *ss;
2133     static S390SKeysClass *skeyclass;
2134     uint8_t re, key;
2135     int rc;
2136 
2137     addr = mmu_real2abs(env, addr);
2138     if (!mmu_absolute_addr_valid(addr, false)) {
2139         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2140     }
2141 
2142     if (unlikely(!ss)) {
2143         ss = s390_get_skeys_device();
2144         skeyclass = S390_SKEYS_GET_CLASS(ss);
2145         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2146             tlb_flush_all_cpus_synced(env_cpu(env));
2147         }
2148     }
2149 
2150     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2151     if (rc) {
2152         return 0;
2153     }
2154 
2155     re = key & (SK_R | SK_C);
2156     key &= ~SK_R;
2157 
2158     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2159     if (rc) {
2160         return 0;
2161     }
2162    /*
2163     * As we can only flush by virtual address and not all the entries
2164     * that point to a physical address we have to flush the whole TLB.
2165     */
2166     tlb_flush_all_cpus_synced(env_cpu(env));
2167 
2168     /*
2169      * cc
2170      *
2171      * 0  Reference bit zero; change bit zero
2172      * 1  Reference bit zero; change bit one
2173      * 2  Reference bit one; change bit zero
2174      * 3  Reference bit one; change bit one
2175      */
2176 
2177     return re >> 1;
2178 }
2179 
2180 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2181                       uint64_t key)
2182 {
2183     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2184     S390Access srca, desta;
2185     uintptr_t ra = GETPC();
2186     int cc = 0;
2187 
2188     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2189                __func__, l, a1, a2);
2190 
2191     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2192         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2193         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2194     }
2195 
2196     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2197         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2198     }
2199 
2200     l = wrap_length32(env, l);
2201     if (l > 256) {
2202         /* max 256 */
2203         l = 256;
2204         cc = 3;
2205     } else if (!l) {
2206         return cc;
2207     }
2208 
2209     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2210     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2211     access_memmove(env, &desta, &srca, ra);
2212     return cc;
2213 }
2214 
2215 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2216                       uint64_t key)
2217 {
2218     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2219     S390Access srca, desta;
2220     uintptr_t ra = GETPC();
2221     int cc = 0;
2222 
2223     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2224                __func__, l, a1, a2);
2225 
2226     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2227         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2228         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2229     }
2230 
2231     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2232         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2233     }
2234 
2235     l = wrap_length32(env, l);
2236     if (l > 256) {
2237         /* max 256 */
2238         l = 256;
2239         cc = 3;
2240     } else if (!l) {
2241         return cc;
2242     }
2243     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2244     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2245     access_memmove(env, &desta, &srca, ra);
2246     return cc;
2247 }
2248 
2249 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2250 {
2251     CPUState *cs = env_cpu(env);
2252     const uintptr_t ra = GETPC();
2253     uint64_t table, entry, raddr;
2254     uint16_t entries, i, index = 0;
2255 
2256     if (r2 & 0xff000) {
2257         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2258     }
2259 
2260     if (!(r2 & 0x800)) {
2261         /* invalidation-and-clearing operation */
2262         table = r1 & ASCE_ORIGIN;
2263         entries = (r2 & 0x7ff) + 1;
2264 
2265         switch (r1 & ASCE_TYPE_MASK) {
2266         case ASCE_TYPE_REGION1:
2267             index = (r2 >> 53) & 0x7ff;
2268             break;
2269         case ASCE_TYPE_REGION2:
2270             index = (r2 >> 42) & 0x7ff;
2271             break;
2272         case ASCE_TYPE_REGION3:
2273             index = (r2 >> 31) & 0x7ff;
2274             break;
2275         case ASCE_TYPE_SEGMENT:
2276             index = (r2 >> 20) & 0x7ff;
2277             break;
2278         }
2279         for (i = 0; i < entries; i++) {
2280             /* addresses are not wrapped in 24/31bit mode but table index is */
2281             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2282             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2283             if (!(entry & REGION_ENTRY_I)) {
2284                 /* we are allowed to not store if already invalid */
2285                 entry |= REGION_ENTRY_I;
2286                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2287             }
2288         }
2289     }
2290 
2291     /* We simply flush the complete tlb, therefore we can ignore r3. */
2292     if (m4 & 1) {
2293         tlb_flush(cs);
2294     } else {
2295         tlb_flush_all_cpus_synced(cs);
2296     }
2297 }
2298 
2299 /* invalidate pte */
2300 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2301                   uint32_t m4)
2302 {
2303     CPUState *cs = env_cpu(env);
2304     const uintptr_t ra = GETPC();
2305     uint64_t page = vaddr & TARGET_PAGE_MASK;
2306     uint64_t pte_addr, pte;
2307 
2308     /* Compute the page table entry address */
2309     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2310     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2311 
2312     /* Mark the page table entry as invalid */
2313     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2314     pte |= PAGE_ENTRY_I;
2315     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2316 
2317     /* XXX we exploit the fact that Linux passes the exact virtual
2318        address here - it's not obliged to! */
2319     if (m4 & 1) {
2320         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2321             tlb_flush_page(cs, page);
2322             /* XXX 31-bit hack */
2323             tlb_flush_page(cs, page ^ 0x80000000);
2324         } else {
2325             /* looks like we don't have a valid virtual address */
2326             tlb_flush(cs);
2327         }
2328     } else {
2329         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2330             tlb_flush_page_all_cpus_synced(cs, page);
2331             /* XXX 31-bit hack */
2332             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2333         } else {
2334             /* looks like we don't have a valid virtual address */
2335             tlb_flush_all_cpus_synced(cs);
2336         }
2337     }
2338 }
2339 
2340 /* flush local tlb */
2341 void HELPER(ptlb)(CPUS390XState *env)
2342 {
2343     tlb_flush(env_cpu(env));
2344 }
2345 
2346 /* flush global tlb */
2347 void HELPER(purge)(CPUS390XState *env)
2348 {
2349     tlb_flush_all_cpus_synced(env_cpu(env));
2350 }
2351 
2352 /* load real address */
2353 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2354 {
2355     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2356     uint64_t ret, tec;
2357     int flags, exc, cc;
2358 
2359     /* XXX incomplete - has more corner cases */
2360     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2361         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2362     }
2363 
2364     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2365     if (exc) {
2366         cc = 3;
2367         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2368     } else {
2369         cc = 0;
2370         ret |= addr & ~TARGET_PAGE_MASK;
2371     }
2372 
2373     env->cc_op = cc;
2374     return ret;
2375 }
2376 #endif
2377 
2378 /* Execute instruction.  This instruction executes an insn modified with
2379    the contents of r1.  It does not change the executed instruction in memory;
2380    it does not change the program counter.
2381 
2382    Perform this by recording the modified instruction in env->ex_value.
2383    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2384 */
2385 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2386 {
2387     uint64_t insn;
2388     uint8_t opc;
2389 
2390     /* EXECUTE targets must be at even addresses.  */
2391     if (addr & 1) {
2392         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2393     }
2394 
2395     insn = cpu_lduw_code(env, addr);
2396     opc = insn >> 8;
2397 
2398     /* Or in the contents of R1[56:63].  */
2399     insn |= r1 & 0xff;
2400 
2401     /* Load the rest of the instruction.  */
2402     insn <<= 48;
2403     switch (get_ilen(opc)) {
2404     case 2:
2405         break;
2406     case 4:
2407         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2408         break;
2409     case 6:
2410         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2411         break;
2412     default:
2413         g_assert_not_reached();
2414     }
2415 
2416     /* The very most common cases can be sped up by avoiding a new TB.  */
2417     if ((opc & 0xf0) == 0xd0) {
2418         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2419                                       uint64_t, uintptr_t);
2420         static const dx_helper dx[16] = {
2421             [0x0] = do_helper_trt_bkwd,
2422             [0x2] = do_helper_mvc,
2423             [0x4] = do_helper_nc,
2424             [0x5] = do_helper_clc,
2425             [0x6] = do_helper_oc,
2426             [0x7] = do_helper_xc,
2427             [0xc] = do_helper_tr,
2428             [0xd] = do_helper_trt_fwd,
2429         };
2430         dx_helper helper = dx[opc & 0xf];
2431 
2432         if (helper) {
2433             uint32_t l = extract64(insn, 48, 8);
2434             uint32_t b1 = extract64(insn, 44, 4);
2435             uint32_t d1 = extract64(insn, 32, 12);
2436             uint32_t b2 = extract64(insn, 28, 4);
2437             uint32_t d2 = extract64(insn, 16, 12);
2438             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2439             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2440 
2441             env->cc_op = helper(env, l, a1, a2, 0);
2442             env->psw.addr += ilen;
2443             return;
2444         }
2445     } else if (opc == 0x0a) {
2446         env->int_svc_code = extract64(insn, 48, 8);
2447         env->int_svc_ilen = ilen;
2448         helper_exception(env, EXCP_SVC);
2449         g_assert_not_reached();
2450     }
2451 
2452     /* Record the insn we want to execute as well as the ilen to use
2453        during the execution of the target insn.  This will also ensure
2454        that ex_value is non-zero, which flags that we are in a state
2455        that requires such execution.  */
2456     env->ex_value = insn | ilen;
2457     env->ex_target = addr;
2458 }
2459 
2460 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2461                        uint64_t len)
2462 {
2463     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2464     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2465     const uint64_t r0 = env->regs[0];
2466     const uintptr_t ra = GETPC();
2467     uint8_t dest_key, dest_as, dest_k, dest_a;
2468     uint8_t src_key, src_as, src_k, src_a;
2469     uint64_t val;
2470     int cc = 0;
2471 
2472     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2473                __func__, dest, src, len);
2474 
2475     if (!(env->psw.mask & PSW_MASK_DAT)) {
2476         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2477     }
2478 
2479     /* OAC (operand access control) for the first operand -> dest */
2480     val = (r0 & 0xffff0000ULL) >> 16;
2481     dest_key = (val >> 12) & 0xf;
2482     dest_as = (val >> 6) & 0x3;
2483     dest_k = (val >> 1) & 0x1;
2484     dest_a = val & 0x1;
2485 
2486     /* OAC (operand access control) for the second operand -> src */
2487     val = (r0 & 0x0000ffffULL);
2488     src_key = (val >> 12) & 0xf;
2489     src_as = (val >> 6) & 0x3;
2490     src_k = (val >> 1) & 0x1;
2491     src_a = val & 0x1;
2492 
2493     if (!dest_k) {
2494         dest_key = psw_key;
2495     }
2496     if (!src_k) {
2497         src_key = psw_key;
2498     }
2499     if (!dest_a) {
2500         dest_as = psw_as;
2501     }
2502     if (!src_a) {
2503         src_as = psw_as;
2504     }
2505 
2506     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2507         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2508     }
2509     if (!(env->cregs[0] & CR0_SECONDARY) &&
2510         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2511         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2512     }
2513     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2514         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2515     }
2516 
2517     len = wrap_length32(env, len);
2518     if (len > 4096) {
2519         cc = 3;
2520         len = 4096;
2521     }
2522 
2523     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2524     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2525         (env->psw.mask & PSW_MASK_PSTATE)) {
2526         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2527                       __func__);
2528         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2529     }
2530 
2531     /* FIXME: Access using correct keys and AR-mode */
2532     if (len) {
2533         S390Access srca, desta;
2534 
2535         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2536                        mmu_idx_from_as(src_as), ra);
2537         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2538                        mmu_idx_from_as(dest_as), ra);
2539 
2540         access_memmove(env, &desta, &srca, ra);
2541     }
2542 
2543     return cc;
2544 }
2545 
2546 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2547    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2548    value >= 0 indicates failure, and the CC value to be returned.  */
2549 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2550                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2551                                  uint32_t *ochar, uint32_t *olen);
2552 
2553 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2554    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2555    indicates failure, and the CC value to be returned.  */
2556 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2557                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2558                                  uint32_t *olen);
2559 
2560 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2561                        bool enh_check, uintptr_t ra,
2562                        uint32_t *ochar, uint32_t *olen)
2563 {
2564     uint8_t s0, s1, s2, s3;
2565     uint32_t c, l;
2566 
2567     if (ilen < 1) {
2568         return 0;
2569     }
2570     s0 = cpu_ldub_data_ra(env, addr, ra);
2571     if (s0 <= 0x7f) {
2572         /* one byte character */
2573         l = 1;
2574         c = s0;
2575     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2576         /* invalid character */
2577         return 2;
2578     } else if (s0 <= 0xdf) {
2579         /* two byte character */
2580         l = 2;
2581         if (ilen < 2) {
2582             return 0;
2583         }
2584         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2585         c = s0 & 0x1f;
2586         c = (c << 6) | (s1 & 0x3f);
2587         if (enh_check && (s1 & 0xc0) != 0x80) {
2588             return 2;
2589         }
2590     } else if (s0 <= 0xef) {
2591         /* three byte character */
2592         l = 3;
2593         if (ilen < 3) {
2594             return 0;
2595         }
2596         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2597         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2598         c = s0 & 0x0f;
2599         c = (c << 6) | (s1 & 0x3f);
2600         c = (c << 6) | (s2 & 0x3f);
2601         /* Fold the byte-by-byte range descriptions in the PoO into
2602            tests against the complete value.  It disallows encodings
2603            that could be smaller, and the UTF-16 surrogates.  */
2604         if (enh_check
2605             && ((s1 & 0xc0) != 0x80
2606                 || (s2 & 0xc0) != 0x80
2607                 || c < 0x1000
2608                 || (c >= 0xd800 && c <= 0xdfff))) {
2609             return 2;
2610         }
2611     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2612         /* four byte character */
2613         l = 4;
2614         if (ilen < 4) {
2615             return 0;
2616         }
2617         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2618         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2619         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2620         c = s0 & 0x07;
2621         c = (c << 6) | (s1 & 0x3f);
2622         c = (c << 6) | (s2 & 0x3f);
2623         c = (c << 6) | (s3 & 0x3f);
2624         /* See above.  */
2625         if (enh_check
2626             && ((s1 & 0xc0) != 0x80
2627                 || (s2 & 0xc0) != 0x80
2628                 || (s3 & 0xc0) != 0x80
2629                 || c < 0x010000
2630                 || c > 0x10ffff)) {
2631             return 2;
2632         }
2633     } else {
2634         /* invalid character */
2635         return 2;
2636     }
2637 
2638     *ochar = c;
2639     *olen = l;
2640     return -1;
2641 }
2642 
2643 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2644                         bool enh_check, uintptr_t ra,
2645                         uint32_t *ochar, uint32_t *olen)
2646 {
2647     uint16_t s0, s1;
2648     uint32_t c, l;
2649 
2650     if (ilen < 2) {
2651         return 0;
2652     }
2653     s0 = cpu_lduw_data_ra(env, addr, ra);
2654     if ((s0 & 0xfc00) != 0xd800) {
2655         /* one word character */
2656         l = 2;
2657         c = s0;
2658     } else {
2659         /* two word character */
2660         l = 4;
2661         if (ilen < 4) {
2662             return 0;
2663         }
2664         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2665         c = extract32(s0, 6, 4) + 1;
2666         c = (c << 6) | (s0 & 0x3f);
2667         c = (c << 10) | (s1 & 0x3ff);
2668         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2669             /* invalid surrogate character */
2670             return 2;
2671         }
2672     }
2673 
2674     *ochar = c;
2675     *olen = l;
2676     return -1;
2677 }
2678 
2679 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2680                         bool enh_check, uintptr_t ra,
2681                         uint32_t *ochar, uint32_t *olen)
2682 {
2683     uint32_t c;
2684 
2685     if (ilen < 4) {
2686         return 0;
2687     }
2688     c = cpu_ldl_data_ra(env, addr, ra);
2689     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2690         /* invalid unicode character */
2691         return 2;
2692     }
2693 
2694     *ochar = c;
2695     *olen = 4;
2696     return -1;
2697 }
2698 
2699 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2700                        uintptr_t ra, uint32_t c, uint32_t *olen)
2701 {
2702     uint8_t d[4];
2703     uint32_t l, i;
2704 
2705     if (c <= 0x7f) {
2706         /* one byte character */
2707         l = 1;
2708         d[0] = c;
2709     } else if (c <= 0x7ff) {
2710         /* two byte character */
2711         l = 2;
2712         d[1] = 0x80 | extract32(c, 0, 6);
2713         d[0] = 0xc0 | extract32(c, 6, 5);
2714     } else if (c <= 0xffff) {
2715         /* three byte character */
2716         l = 3;
2717         d[2] = 0x80 | extract32(c, 0, 6);
2718         d[1] = 0x80 | extract32(c, 6, 6);
2719         d[0] = 0xe0 | extract32(c, 12, 4);
2720     } else {
2721         /* four byte character */
2722         l = 4;
2723         d[3] = 0x80 | extract32(c, 0, 6);
2724         d[2] = 0x80 | extract32(c, 6, 6);
2725         d[1] = 0x80 | extract32(c, 12, 6);
2726         d[0] = 0xf0 | extract32(c, 18, 3);
2727     }
2728 
2729     if (ilen < l) {
2730         return 1;
2731     }
2732     for (i = 0; i < l; ++i) {
2733         cpu_stb_data_ra(env, addr + i, d[i], ra);
2734     }
2735 
2736     *olen = l;
2737     return -1;
2738 }
2739 
2740 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2741                         uintptr_t ra, uint32_t c, uint32_t *olen)
2742 {
2743     uint16_t d0, d1;
2744 
2745     if (c <= 0xffff) {
2746         /* one word character */
2747         if (ilen < 2) {
2748             return 1;
2749         }
2750         cpu_stw_data_ra(env, addr, c, ra);
2751         *olen = 2;
2752     } else {
2753         /* two word character */
2754         if (ilen < 4) {
2755             return 1;
2756         }
2757         d1 = 0xdc00 | extract32(c, 0, 10);
2758         d0 = 0xd800 | extract32(c, 10, 6);
2759         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2760         cpu_stw_data_ra(env, addr + 0, d0, ra);
2761         cpu_stw_data_ra(env, addr + 2, d1, ra);
2762         *olen = 4;
2763     }
2764 
2765     return -1;
2766 }
2767 
2768 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2769                         uintptr_t ra, uint32_t c, uint32_t *olen)
2770 {
2771     if (ilen < 4) {
2772         return 1;
2773     }
2774     cpu_stl_data_ra(env, addr, c, ra);
2775     *olen = 4;
2776     return -1;
2777 }
2778 
2779 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2780                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2781                                        decode_unicode_fn decode,
2782                                        encode_unicode_fn encode)
2783 {
2784     uint64_t dst = get_address(env, r1);
2785     uint64_t dlen = get_length(env, r1 + 1);
2786     uint64_t src = get_address(env, r2);
2787     uint64_t slen = get_length(env, r2 + 1);
2788     bool enh_check = m3 & 1;
2789     int cc, i;
2790 
2791     /* Lest we fail to service interrupts in a timely manner, limit the
2792        amount of work we're willing to do.  For now, let's cap at 256.  */
2793     for (i = 0; i < 256; ++i) {
2794         uint32_t c, ilen, olen;
2795 
2796         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2797         if (unlikely(cc >= 0)) {
2798             break;
2799         }
2800         cc = encode(env, dst, dlen, ra, c, &olen);
2801         if (unlikely(cc >= 0)) {
2802             break;
2803         }
2804 
2805         src += ilen;
2806         slen -= ilen;
2807         dst += olen;
2808         dlen -= olen;
2809         cc = 3;
2810     }
2811 
2812     set_address(env, r1, dst);
2813     set_length(env, r1 + 1, dlen);
2814     set_address(env, r2, src);
2815     set_length(env, r2 + 1, slen);
2816 
2817     return cc;
2818 }
2819 
2820 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2821 {
2822     return convert_unicode(env, r1, r2, m3, GETPC(),
2823                            decode_utf8, encode_utf16);
2824 }
2825 
2826 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2827 {
2828     return convert_unicode(env, r1, r2, m3, GETPC(),
2829                            decode_utf8, encode_utf32);
2830 }
2831 
2832 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2833 {
2834     return convert_unicode(env, r1, r2, m3, GETPC(),
2835                            decode_utf16, encode_utf8);
2836 }
2837 
2838 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2839 {
2840     return convert_unicode(env, r1, r2, m3, GETPC(),
2841                            decode_utf16, encode_utf32);
2842 }
2843 
2844 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2845 {
2846     return convert_unicode(env, r1, r2, m3, GETPC(),
2847                            decode_utf32, encode_utf8);
2848 }
2849 
2850 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2851 {
2852     return convert_unicode(env, r1, r2, m3, GETPC(),
2853                            decode_utf32, encode_utf16);
2854 }
2855 
2856 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2857                         uintptr_t ra)
2858 {
2859     const int mmu_idx = s390x_env_mmu_index(env, false);
2860 
2861     /* test the actual access, not just any access to the page due to LAP */
2862     while (len) {
2863         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2864         const uint64_t curlen = MIN(pagelen, len);
2865 
2866         probe_write(env, addr, curlen, mmu_idx, ra);
2867         addr = wrap_address(env, addr + curlen);
2868         len -= curlen;
2869     }
2870 }
2871 
2872 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2873 {
2874     probe_write_access(env, addr, len, GETPC());
2875 }
2876