xref: /openbmc/qemu/target/s390x/tcg/mem_helper.c (revision 48805df9c22a0700fba4b3b548fafaa21726ca68)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 #ifdef CONFIG_USER_ONLY
39 # define user_or_likely(X)    true
40 #else
41 # define user_or_likely(X)    likely(X)
42 #endif
43 
44 /*****************************************************************************/
45 /* Softmmu support */
46 
47 /* #define DEBUG_HELPER */
48 #ifdef DEBUG_HELPER
49 #define HELPER_LOG(x...) qemu_log(x)
50 #else
51 #define HELPER_LOG(x...)
52 #endif
53 
54 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
55 {
56     uint16_t pkm = env->cregs[3] >> 16;
57 
58     if (env->psw.mask & PSW_MASK_PSTATE) {
59         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
60         return pkm & (0x8000 >> psw_key);
61     }
62     return true;
63 }
64 
65 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
66                                    uint64_t src, uint32_t len)
67 {
68     if (!len || src == dest) {
69         return false;
70     }
71     /* Take care of wrapping at the end of address space. */
72     if (unlikely(wrap_address(env, src + len - 1) < src)) {
73         return dest > src || dest <= wrap_address(env, src + len - 1);
74     }
75     return dest > src && dest <= src + len - 1;
76 }
77 
78 /* Trigger a SPECIFICATION exception if an address or a length is not
79    naturally aligned.  */
80 static inline void check_alignment(CPUS390XState *env, uint64_t v,
81                                    int wordsize, uintptr_t ra)
82 {
83     if (v % wordsize) {
84         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
85     }
86 }
87 
88 /* Load a value from memory according to its size.  */
89 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
90                                            int wordsize, uintptr_t ra)
91 {
92     switch (wordsize) {
93     case 1:
94         return cpu_ldub_data_ra(env, addr, ra);
95     case 2:
96         return cpu_lduw_data_ra(env, addr, ra);
97     default:
98         abort();
99     }
100 }
101 
102 /* Store a to memory according to its size.  */
103 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
104                                       uint64_t value, int wordsize,
105                                       uintptr_t ra)
106 {
107     switch (wordsize) {
108     case 1:
109         cpu_stb_data_ra(env, addr, value, ra);
110         break;
111     case 2:
112         cpu_stw_data_ra(env, addr, value, ra);
113         break;
114     default:
115         abort();
116     }
117 }
118 
119 /* An access covers at most 4096 bytes and therefore at most two pages. */
120 typedef struct S390Access {
121     target_ulong vaddr1;
122     target_ulong vaddr2;
123     void *haddr1;
124     void *haddr2;
125     uint16_t size1;
126     uint16_t size2;
127     /*
128      * If we can't access the host page directly, we'll have to do I/O access
129      * via ld/st helpers. These are internal details, so we store the
130      * mmu idx to do the access here instead of passing it around in the
131      * helpers.
132      */
133     int mmu_idx;
134 } S390Access;
135 
136 /*
137  * With nonfault=1, return the PGM_ exception that would have been injected
138  * into the guest; return 0 if no exception was detected.
139  *
140  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
141  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
142  */
143 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
144                                     int size, MMUAccessType access_type,
145                                     int mmu_idx, bool nonfault,
146                                     void **phost, uintptr_t ra)
147 {
148     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
149                                    nonfault, phost, ra);
150 
151     if (unlikely(flags & TLB_INVALID_MASK)) {
152 #ifdef CONFIG_USER_ONLY
153         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
154         env->__excp_addr = addr & TARGET_PAGE_MASK;
155         return (page_get_flags(addr) & PAGE_VALID
156                 ? PGM_PROTECTION : PGM_ADDRESSING);
157 #else
158         return env->tlb_fill_exc;
159 #endif
160     }
161 
162 #ifndef CONFIG_USER_ONLY
163     if (unlikely(flags & TLB_WATCHPOINT)) {
164         /* S390 does not presently use transaction attributes. */
165         cpu_check_watchpoint(env_cpu(env), addr, size,
166                              MEMTXATTRS_UNSPECIFIED,
167                              (access_type == MMU_DATA_STORE
168                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
169     }
170 #endif
171 
172     return 0;
173 }
174 
175 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
176                              bool nonfault, vaddr vaddr1, int size,
177                              MMUAccessType access_type,
178                              int mmu_idx, uintptr_t ra)
179 {
180     int size1, size2, exc;
181 
182     assert(size > 0 && size <= 4096);
183 
184     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
185     size2 = size - size1;
186 
187     memset(access, 0, sizeof(*access));
188     access->vaddr1 = vaddr1;
189     access->size1 = size1;
190     access->size2 = size2;
191     access->mmu_idx = mmu_idx;
192 
193     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
194                             &access->haddr1, ra);
195     if (unlikely(exc)) {
196         return exc;
197     }
198     if (unlikely(size2)) {
199         /* The access crosses page boundaries. */
200         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
201 
202         access->vaddr2 = vaddr2;
203         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
204                                 nonfault, &access->haddr2, ra);
205         if (unlikely(exc)) {
206             return exc;
207         }
208     }
209     return 0;
210 }
211 
212 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
213                                   vaddr vaddr, int size,
214                                   MMUAccessType access_type, int mmu_idx,
215                                   uintptr_t ra)
216 {
217     int exc = access_prepare_nf(ret, env, false, vaddr, size,
218                                 access_type, mmu_idx, ra);
219     assert(!exc);
220 }
221 
222 /* Helper to handle memset on a single page. */
223 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
224                              uint8_t byte, uint16_t size, int mmu_idx,
225                              uintptr_t ra)
226 {
227 #ifdef CONFIG_USER_ONLY
228     memset(haddr, byte, size);
229 #else
230     if (likely(haddr)) {
231         memset(haddr, byte, size);
232     } else {
233         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
234         for (int i = 0; i < size; i++) {
235             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
236         }
237     }
238 #endif
239 }
240 
241 static void access_memset(CPUS390XState *env, S390Access *desta,
242                           uint8_t byte, uintptr_t ra)
243 {
244 
245     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
246                      desta->mmu_idx, ra);
247     if (likely(!desta->size2)) {
248         return;
249     }
250     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
251                      desta->mmu_idx, ra);
252 }
253 
254 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
255                                int offset, uintptr_t ra)
256 {
257     target_ulong vaddr = access->vaddr1;
258     void *haddr = access->haddr1;
259 
260     if (unlikely(offset >= access->size1)) {
261         offset -= access->size1;
262         vaddr = access->vaddr2;
263         haddr = access->haddr2;
264     }
265 
266     if (user_or_likely(haddr)) {
267         return ldub_p(haddr + offset);
268     } else {
269         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
270         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
271     }
272 }
273 
274 static void access_set_byte(CPUS390XState *env, S390Access *access,
275                             int offset, uint8_t byte, uintptr_t ra)
276 {
277     target_ulong vaddr = access->vaddr1;
278     void *haddr = access->haddr1;
279 
280     if (unlikely(offset >= access->size1)) {
281         offset -= access->size1;
282         vaddr = access->vaddr2;
283         haddr = access->haddr2;
284     }
285 
286     if (user_or_likely(haddr)) {
287         stb_p(haddr + offset, byte);
288     } else {
289         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
290         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
291     }
292 }
293 
294 /*
295  * Move data with the same semantics as memmove() in case ranges don't overlap
296  * or src > dest. Undefined behavior on destructive overlaps.
297  */
298 static void access_memmove(CPUS390XState *env, S390Access *desta,
299                            S390Access *srca, uintptr_t ra)
300 {
301     int len = desta->size1 + desta->size2;
302     int diff;
303 
304     assert(len == srca->size1 + srca->size2);
305 
306     /* Fallback to slow access in case we don't have access to all host pages */
307     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
308                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
309         int i;
310 
311         for (i = 0; i < len; i++) {
312             uint8_t byte = access_get_byte(env, srca, i, ra);
313 
314             access_set_byte(env, desta, i, byte, ra);
315         }
316         return;
317     }
318 
319     diff = desta->size1 - srca->size1;
320     if (likely(diff == 0)) {
321         memmove(desta->haddr1, srca->haddr1, srca->size1);
322         if (unlikely(srca->size2)) {
323             memmove(desta->haddr2, srca->haddr2, srca->size2);
324         }
325     } else if (diff > 0) {
326         memmove(desta->haddr1, srca->haddr1, srca->size1);
327         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
328         if (likely(desta->size2)) {
329             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
330         }
331     } else {
332         diff = -diff;
333         memmove(desta->haddr1, srca->haddr1, desta->size1);
334         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
335         if (likely(srca->size2)) {
336             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
337         }
338     }
339 }
340 
341 static int mmu_idx_from_as(uint8_t as)
342 {
343     switch (as) {
344     case AS_PRIMARY:
345         return MMU_PRIMARY_IDX;
346     case AS_SECONDARY:
347         return MMU_SECONDARY_IDX;
348     case AS_HOME:
349         return MMU_HOME_IDX;
350     default:
351         /* FIXME AS_ACCREG */
352         g_assert_not_reached();
353     }
354 }
355 
356 /* and on array */
357 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
358                              uint64_t src, uintptr_t ra)
359 {
360     const int mmu_idx = cpu_mmu_index(env, false);
361     S390Access srca1, srca2, desta;
362     uint32_t i;
363     uint8_t c = 0;
364 
365     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
366                __func__, l, dest, src);
367 
368     /* NC always processes one more byte than specified - maximum is 256 */
369     l++;
370 
371     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
372     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
373     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
374     for (i = 0; i < l; i++) {
375         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
376                           access_get_byte(env, &srca2, i, ra);
377 
378         c |= x;
379         access_set_byte(env, &desta, i, x, ra);
380     }
381     return c != 0;
382 }
383 
384 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
385                     uint64_t src)
386 {
387     return do_helper_nc(env, l, dest, src, GETPC());
388 }
389 
390 /* xor on array */
391 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
392                              uint64_t src, uintptr_t ra)
393 {
394     const int mmu_idx = cpu_mmu_index(env, false);
395     S390Access srca1, srca2, desta;
396     uint32_t i;
397     uint8_t c = 0;
398 
399     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
400                __func__, l, dest, src);
401 
402     /* XC always processes one more byte than specified - maximum is 256 */
403     l++;
404 
405     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
406     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
407     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
408 
409     /* xor with itself is the same as memset(0) */
410     if (src == dest) {
411         access_memset(env, &desta, 0, ra);
412         return 0;
413     }
414 
415     for (i = 0; i < l; i++) {
416         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
417                           access_get_byte(env, &srca2, i, ra);
418 
419         c |= x;
420         access_set_byte(env, &desta, i, x, ra);
421     }
422     return c != 0;
423 }
424 
425 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
426                     uint64_t src)
427 {
428     return do_helper_xc(env, l, dest, src, GETPC());
429 }
430 
431 /* or on array */
432 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
433                              uint64_t src, uintptr_t ra)
434 {
435     const int mmu_idx = cpu_mmu_index(env, false);
436     S390Access srca1, srca2, desta;
437     uint32_t i;
438     uint8_t c = 0;
439 
440     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
441                __func__, l, dest, src);
442 
443     /* OC always processes one more byte than specified - maximum is 256 */
444     l++;
445 
446     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
447     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
448     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
449     for (i = 0; i < l; i++) {
450         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
451                           access_get_byte(env, &srca2, i, ra);
452 
453         c |= x;
454         access_set_byte(env, &desta, i, x, ra);
455     }
456     return c != 0;
457 }
458 
459 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
460                     uint64_t src)
461 {
462     return do_helper_oc(env, l, dest, src, GETPC());
463 }
464 
465 /* memmove */
466 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
467                               uint64_t src, uintptr_t ra)
468 {
469     const int mmu_idx = cpu_mmu_index(env, false);
470     S390Access srca, desta;
471     uint32_t i;
472 
473     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
474                __func__, l, dest, src);
475 
476     /* MVC always copies one more byte than specified - maximum is 256 */
477     l++;
478 
479     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
480     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
481 
482     /*
483      * "When the operands overlap, the result is obtained as if the operands
484      * were processed one byte at a time". Only non-destructive overlaps
485      * behave like memmove().
486      */
487     if (dest == src + 1) {
488         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
489     } else if (!is_destructive_overlap(env, dest, src, l)) {
490         access_memmove(env, &desta, &srca, ra);
491     } else {
492         for (i = 0; i < l; i++) {
493             uint8_t byte = access_get_byte(env, &srca, i, ra);
494 
495             access_set_byte(env, &desta, i, byte, ra);
496         }
497     }
498 
499     return env->cc_op;
500 }
501 
502 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
503 {
504     do_helper_mvc(env, l, dest, src, GETPC());
505 }
506 
507 /* move right to left */
508 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
509 {
510     const int mmu_idx = cpu_mmu_index(env, false);
511     const uint64_t ra = GETPC();
512     S390Access srca, desta;
513     int32_t i;
514 
515     /* MVCRL always copies one more byte than specified - maximum is 256 */
516     l++;
517 
518     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
519     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
520 
521     for (i = l - 1; i >= 0; i--) {
522         uint8_t byte = access_get_byte(env, &srca, i, ra);
523         access_set_byte(env, &desta, i, byte, ra);
524     }
525 }
526 
527 /* move inverse  */
528 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
529 {
530     const int mmu_idx = cpu_mmu_index(env, false);
531     S390Access srca, desta;
532     uintptr_t ra = GETPC();
533     int i;
534 
535     /* MVCIN always copies one more byte than specified - maximum is 256 */
536     l++;
537 
538     src = wrap_address(env, src - l + 1);
539     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
540     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
541     for (i = 0; i < l; i++) {
542         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
543 
544         access_set_byte(env, &desta, i, x, ra);
545     }
546 }
547 
548 /* move numerics  */
549 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
550 {
551     const int mmu_idx = cpu_mmu_index(env, false);
552     S390Access srca1, srca2, desta;
553     uintptr_t ra = GETPC();
554     int i;
555 
556     /* MVN always copies one more byte than specified - maximum is 256 */
557     l++;
558 
559     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
560     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
561     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
562     for (i = 0; i < l; i++) {
563         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
564                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
565 
566         access_set_byte(env, &desta, i, x, ra);
567     }
568 }
569 
570 /* move with offset  */
571 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
572 {
573     const int mmu_idx = cpu_mmu_index(env, false);
574     /* MVO always processes one more byte than specified - maximum is 16 */
575     const int len_dest = (l >> 4) + 1;
576     const int len_src = (l & 0xf) + 1;
577     uintptr_t ra = GETPC();
578     uint8_t byte_dest, byte_src;
579     S390Access srca, desta;
580     int i, j;
581 
582     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
583     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
584 
585     /* Handle rightmost byte */
586     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
587     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
588     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
589     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
590 
591     /* Process remaining bytes from right to left */
592     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
593         byte_dest = byte_src >> 4;
594         if (j >= 0) {
595             byte_src = access_get_byte(env, &srca, j, ra);
596         } else {
597             byte_src = 0;
598         }
599         byte_dest |= byte_src << 4;
600         access_set_byte(env, &desta, i, byte_dest, ra);
601     }
602 }
603 
604 /* move zones  */
605 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
606 {
607     const int mmu_idx = cpu_mmu_index(env, false);
608     S390Access srca1, srca2, desta;
609     uintptr_t ra = GETPC();
610     int i;
611 
612     /* MVZ always copies one more byte than specified - maximum is 256 */
613     l++;
614 
615     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
616     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
617     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
618     for (i = 0; i < l; i++) {
619         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
620                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
621 
622         access_set_byte(env, &desta, i, x, ra);
623     }
624 }
625 
626 /* compare unsigned byte arrays */
627 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
628                               uint64_t s2, uintptr_t ra)
629 {
630     uint32_t i;
631     uint32_t cc = 0;
632 
633     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
634                __func__, l, s1, s2);
635 
636     for (i = 0; i <= l; i++) {
637         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
638         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
639         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
640         if (x < y) {
641             cc = 1;
642             break;
643         } else if (x > y) {
644             cc = 2;
645             break;
646         }
647     }
648 
649     HELPER_LOG("\n");
650     return cc;
651 }
652 
653 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
654 {
655     return do_helper_clc(env, l, s1, s2, GETPC());
656 }
657 
658 /* compare logical under mask */
659 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
660                      uint64_t addr)
661 {
662     uintptr_t ra = GETPC();
663     uint32_t cc = 0;
664 
665     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
666                mask, addr);
667 
668     while (mask) {
669         if (mask & 8) {
670             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
671             uint8_t r = extract32(r1, 24, 8);
672             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
673                        addr);
674             if (r < d) {
675                 cc = 1;
676                 break;
677             } else if (r > d) {
678                 cc = 2;
679                 break;
680             }
681             addr++;
682         }
683         mask = (mask << 1) & 0xf;
684         r1 <<= 8;
685     }
686 
687     HELPER_LOG("\n");
688     return cc;
689 }
690 
691 static inline uint64_t get_address(CPUS390XState *env, int reg)
692 {
693     return wrap_address(env, env->regs[reg]);
694 }
695 
696 /*
697  * Store the address to the given register, zeroing out unused leftmost
698  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
699  */
700 static inline void set_address_zero(CPUS390XState *env, int reg,
701                                     uint64_t address)
702 {
703     if (env->psw.mask & PSW_MASK_64) {
704         env->regs[reg] = address;
705     } else {
706         if (!(env->psw.mask & PSW_MASK_32)) {
707             address &= 0x00ffffff;
708         } else {
709             address &= 0x7fffffff;
710         }
711         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
712     }
713 }
714 
715 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
716 {
717     if (env->psw.mask & PSW_MASK_64) {
718         /* 64-Bit mode */
719         env->regs[reg] = address;
720     } else {
721         if (!(env->psw.mask & PSW_MASK_32)) {
722             /* 24-Bit mode. According to the PoO it is implementation
723             dependent if bits 32-39 remain unchanged or are set to
724             zeros.  Choose the former so that the function can also be
725             used for TRT.  */
726             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
727         } else {
728             /* 31-Bit mode. According to the PoO it is implementation
729             dependent if bit 32 remains unchanged or is set to zero.
730             Choose the latter so that the function can also be used for
731             TRT.  */
732             address &= 0x7fffffff;
733             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
734         }
735     }
736 }
737 
738 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
739 {
740     if (!(env->psw.mask & PSW_MASK_64)) {
741         return (uint32_t)length;
742     }
743     return length;
744 }
745 
746 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
747 {
748     if (!(env->psw.mask & PSW_MASK_64)) {
749         /* 24-Bit and 31-Bit mode */
750         length &= 0x7fffffff;
751     }
752     return length;
753 }
754 
755 static inline uint64_t get_length(CPUS390XState *env, int reg)
756 {
757     return wrap_length31(env, env->regs[reg]);
758 }
759 
760 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
761 {
762     if (env->psw.mask & PSW_MASK_64) {
763         /* 64-Bit mode */
764         env->regs[reg] = length;
765     } else {
766         /* 24-Bit and 31-Bit mode */
767         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
768     }
769 }
770 
771 /* search string (c is byte to search, r2 is string, r1 end of string) */
772 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
773 {
774     uintptr_t ra = GETPC();
775     uint64_t end, str;
776     uint32_t len;
777     uint8_t v, c = env->regs[0];
778 
779     /* Bits 32-55 must contain all 0.  */
780     if (env->regs[0] & 0xffffff00u) {
781         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
782     }
783 
784     str = get_address(env, r2);
785     end = get_address(env, r1);
786 
787     /* Lest we fail to service interrupts in a timely manner, limit the
788        amount of work we're willing to do.  For now, let's cap at 8k.  */
789     for (len = 0; len < 0x2000; ++len) {
790         if (str + len == end) {
791             /* Character not found.  R1 & R2 are unmodified.  */
792             env->cc_op = 2;
793             return;
794         }
795         v = cpu_ldub_data_ra(env, str + len, ra);
796         if (v == c) {
797             /* Character found.  Set R1 to the location; R2 is unmodified.  */
798             env->cc_op = 1;
799             set_address(env, r1, str + len);
800             return;
801         }
802     }
803 
804     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
805     env->cc_op = 3;
806     set_address(env, r2, str + len);
807 }
808 
809 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
810 {
811     uintptr_t ra = GETPC();
812     uint32_t len;
813     uint16_t v, c = env->regs[0];
814     uint64_t end, str, adj_end;
815 
816     /* Bits 32-47 of R0 must be zero.  */
817     if (env->regs[0] & 0xffff0000u) {
818         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
819     }
820 
821     str = get_address(env, r2);
822     end = get_address(env, r1);
823 
824     /* If the LSB of the two addresses differ, use one extra byte.  */
825     adj_end = end + ((str ^ end) & 1);
826 
827     /* Lest we fail to service interrupts in a timely manner, limit the
828        amount of work we're willing to do.  For now, let's cap at 8k.  */
829     for (len = 0; len < 0x2000; len += 2) {
830         if (str + len == adj_end) {
831             /* End of input found.  */
832             env->cc_op = 2;
833             return;
834         }
835         v = cpu_lduw_data_ra(env, str + len, ra);
836         if (v == c) {
837             /* Character found.  Set R1 to the location; R2 is unmodified.  */
838             env->cc_op = 1;
839             set_address(env, r1, str + len);
840             return;
841         }
842     }
843 
844     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
845     env->cc_op = 3;
846     set_address(env, r2, str + len);
847 }
848 
849 /* unsigned string compare (c is string terminator) */
850 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
851 {
852     uintptr_t ra = GETPC();
853     uint32_t len;
854 
855     c = c & 0xff;
856     s1 = wrap_address(env, s1);
857     s2 = wrap_address(env, s2);
858 
859     /* Lest we fail to service interrupts in a timely manner, limit the
860        amount of work we're willing to do.  For now, let's cap at 8k.  */
861     for (len = 0; len < 0x2000; ++len) {
862         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
863         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
864         if (v1 == v2) {
865             if (v1 == c) {
866                 /* Equal.  CC=0, and don't advance the registers.  */
867                 env->cc_op = 0;
868                 return int128_make128(s2, s1);
869             }
870         } else {
871             /* Unequal.  CC={1,2}, and advance the registers.  Note that
872                the terminator need not be zero, but the string that contains
873                the terminator is by definition "low".  */
874             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
875             return int128_make128(s2 + len, s1 + len);
876         }
877     }
878 
879     /* CPU-determined bytes equal; advance the registers.  */
880     env->cc_op = 3;
881     return int128_make128(s2 + len, s1 + len);
882 }
883 
884 /* move page */
885 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
886 {
887     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
888     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
889     const int mmu_idx = cpu_mmu_index(env, false);
890     const bool f = extract64(r0, 11, 1);
891     const bool s = extract64(r0, 10, 1);
892     const bool cco = extract64(r0, 8, 1);
893     uintptr_t ra = GETPC();
894     S390Access srca, desta;
895     int exc;
896 
897     if ((f && s) || extract64(r0, 12, 4)) {
898         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
899     }
900 
901     /*
902      * We always manually handle exceptions such that we can properly store
903      * r1/r2 to the lowcore on page-translation exceptions.
904      *
905      * TODO: Access key handling
906      */
907     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
908                             MMU_DATA_LOAD, mmu_idx, ra);
909     if (exc) {
910         if (cco) {
911             return 2;
912         }
913         goto inject_exc;
914     }
915     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
916                             MMU_DATA_STORE, mmu_idx, ra);
917     if (exc) {
918         if (cco && exc != PGM_PROTECTION) {
919             return 1;
920         }
921         goto inject_exc;
922     }
923     access_memmove(env, &desta, &srca, ra);
924     return 0; /* data moved */
925 inject_exc:
926 #if !defined(CONFIG_USER_ONLY)
927     if (exc != PGM_ADDRESSING) {
928         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
929                  env->tlb_fill_tec);
930     }
931     if (exc == PGM_PAGE_TRANS) {
932         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
933                  r1 << 4 | r2);
934     }
935 #endif
936     tcg_s390_program_interrupt(env, exc, ra);
937 }
938 
939 /* string copy */
940 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
941 {
942     const int mmu_idx = cpu_mmu_index(env, false);
943     const uint64_t d = get_address(env, r1);
944     const uint64_t s = get_address(env, r2);
945     const uint8_t c = env->regs[0];
946     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
947     S390Access srca, desta;
948     uintptr_t ra = GETPC();
949     int i;
950 
951     if (env->regs[0] & 0xffffff00ull) {
952         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
953     }
954 
955     /*
956      * Our access should not exceed single pages, as we must not report access
957      * exceptions exceeding the actually copied range (which we don't know at
958      * this point). We might over-indicate watchpoints within the pages
959      * (if we ever care, we have to limit processing to a single byte).
960      */
961     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
962     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
963     for (i = 0; i < len; i++) {
964         const uint8_t v = access_get_byte(env, &srca, i, ra);
965 
966         access_set_byte(env, &desta, i, v, ra);
967         if (v == c) {
968             set_address_zero(env, r1, d + i);
969             return 1;
970         }
971     }
972     set_address_zero(env, r1, d + len);
973     set_address_zero(env, r2, s + len);
974     return 3;
975 }
976 
977 /* load access registers r1 to r3 from memory at a2 */
978 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
979 {
980     uintptr_t ra = GETPC();
981     int i;
982 
983     if (a2 & 0x3) {
984         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
985     }
986 
987     for (i = r1;; i = (i + 1) % 16) {
988         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
989         a2 += 4;
990 
991         if (i == r3) {
992             break;
993         }
994     }
995 }
996 
997 /* store access registers r1 to r3 in memory at a2 */
998 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
999 {
1000     uintptr_t ra = GETPC();
1001     int i;
1002 
1003     if (a2 & 0x3) {
1004         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1005     }
1006 
1007     for (i = r1;; i = (i + 1) % 16) {
1008         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1009         a2 += 4;
1010 
1011         if (i == r3) {
1012             break;
1013         }
1014     }
1015 }
1016 
1017 /* move long helper */
1018 static inline uint32_t do_mvcl(CPUS390XState *env,
1019                                uint64_t *dest, uint64_t *destlen,
1020                                uint64_t *src, uint64_t *srclen,
1021                                uint16_t pad, int wordsize, uintptr_t ra)
1022 {
1023     const int mmu_idx = cpu_mmu_index(env, false);
1024     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1025     S390Access srca, desta;
1026     int i, cc;
1027 
1028     if (*destlen == *srclen) {
1029         cc = 0;
1030     } else if (*destlen < *srclen) {
1031         cc = 1;
1032     } else {
1033         cc = 2;
1034     }
1035 
1036     if (!*destlen) {
1037         return cc;
1038     }
1039 
1040     /*
1041      * Only perform one type of type of operation (move/pad) at a time.
1042      * Stay within single pages.
1043      */
1044     if (*srclen) {
1045         /* Copy the src array */
1046         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1047         *destlen -= len;
1048         *srclen -= len;
1049         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1050         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1051         access_memmove(env, &desta, &srca, ra);
1052         *src = wrap_address(env, *src + len);
1053         *dest = wrap_address(env, *dest + len);
1054     } else if (wordsize == 1) {
1055         /* Pad the remaining area */
1056         *destlen -= len;
1057         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1058         access_memset(env, &desta, pad, ra);
1059         *dest = wrap_address(env, *dest + len);
1060     } else {
1061         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1062 
1063         /* The remaining length selects the padding byte. */
1064         for (i = 0; i < len; (*destlen)--, i++) {
1065             if (*destlen & 1) {
1066                 access_set_byte(env, &desta, i, pad, ra);
1067             } else {
1068                 access_set_byte(env, &desta, i, pad >> 8, ra);
1069             }
1070         }
1071         *dest = wrap_address(env, *dest + len);
1072     }
1073 
1074     return *destlen ? 3 : cc;
1075 }
1076 
1077 /* move long */
1078 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1079 {
1080     const int mmu_idx = cpu_mmu_index(env, false);
1081     uintptr_t ra = GETPC();
1082     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1083     uint64_t dest = get_address(env, r1);
1084     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1085     uint64_t src = get_address(env, r2);
1086     uint8_t pad = env->regs[r2 + 1] >> 24;
1087     CPUState *cs = env_cpu(env);
1088     S390Access srca, desta;
1089     uint32_t cc, cur_len;
1090 
1091     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1092         cc = 3;
1093     } else if (srclen == destlen) {
1094         cc = 0;
1095     } else if (destlen < srclen) {
1096         cc = 1;
1097     } else {
1098         cc = 2;
1099     }
1100 
1101     /* We might have to zero-out some bits even if there was no action. */
1102     if (unlikely(!destlen || cc == 3)) {
1103         set_address_zero(env, r2, src);
1104         set_address_zero(env, r1, dest);
1105         return cc;
1106     } else if (!srclen) {
1107         set_address_zero(env, r2, src);
1108     }
1109 
1110     /*
1111      * Only perform one type of type of operation (move/pad) in one step.
1112      * Stay within single pages.
1113      */
1114     while (destlen) {
1115         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1116         if (!srclen) {
1117             access_prepare(&desta, env, dest, cur_len,
1118                            MMU_DATA_STORE, mmu_idx, ra);
1119             access_memset(env, &desta, pad, ra);
1120         } else {
1121             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1122 
1123             access_prepare(&srca, env, src, cur_len,
1124                            MMU_DATA_LOAD, mmu_idx, ra);
1125             access_prepare(&desta, env, dest, cur_len,
1126                            MMU_DATA_STORE, mmu_idx, ra);
1127             access_memmove(env, &desta, &srca, ra);
1128             src = wrap_address(env, src + cur_len);
1129             srclen -= cur_len;
1130             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1131             set_address_zero(env, r2, src);
1132         }
1133         dest = wrap_address(env, dest + cur_len);
1134         destlen -= cur_len;
1135         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1136         set_address_zero(env, r1, dest);
1137 
1138         /*
1139          * MVCL is interruptible. Return to the main loop if requested after
1140          * writing back all state to registers. If no interrupt will get
1141          * injected, we'll end up back in this handler and continue processing
1142          * the remaining parts.
1143          */
1144         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1145             cpu_loop_exit_restore(cs, ra);
1146         }
1147     }
1148     return cc;
1149 }
1150 
1151 /* move long extended */
1152 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1153                        uint32_t r3)
1154 {
1155     uintptr_t ra = GETPC();
1156     uint64_t destlen = get_length(env, r1 + 1);
1157     uint64_t dest = get_address(env, r1);
1158     uint64_t srclen = get_length(env, r3 + 1);
1159     uint64_t src = get_address(env, r3);
1160     uint8_t pad = a2;
1161     uint32_t cc;
1162 
1163     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1164 
1165     set_length(env, r1 + 1, destlen);
1166     set_length(env, r3 + 1, srclen);
1167     set_address(env, r1, dest);
1168     set_address(env, r3, src);
1169 
1170     return cc;
1171 }
1172 
1173 /* move long unicode */
1174 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1175                        uint32_t r3)
1176 {
1177     uintptr_t ra = GETPC();
1178     uint64_t destlen = get_length(env, r1 + 1);
1179     uint64_t dest = get_address(env, r1);
1180     uint64_t srclen = get_length(env, r3 + 1);
1181     uint64_t src = get_address(env, r3);
1182     uint16_t pad = a2;
1183     uint32_t cc;
1184 
1185     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1186 
1187     set_length(env, r1 + 1, destlen);
1188     set_length(env, r3 + 1, srclen);
1189     set_address(env, r1, dest);
1190     set_address(env, r3, src);
1191 
1192     return cc;
1193 }
1194 
1195 /* compare logical long helper */
1196 static inline uint32_t do_clcl(CPUS390XState *env,
1197                                uint64_t *src1, uint64_t *src1len,
1198                                uint64_t *src3, uint64_t *src3len,
1199                                uint16_t pad, uint64_t limit,
1200                                int wordsize, uintptr_t ra)
1201 {
1202     uint64_t len = MAX(*src1len, *src3len);
1203     uint32_t cc = 0;
1204 
1205     check_alignment(env, *src1len | *src3len, wordsize, ra);
1206 
1207     if (!len) {
1208         return cc;
1209     }
1210 
1211     /* Lest we fail to service interrupts in a timely manner, limit the
1212        amount of work we're willing to do.  */
1213     if (len > limit) {
1214         len = limit;
1215         cc = 3;
1216     }
1217 
1218     for (; len; len -= wordsize) {
1219         uint16_t v1 = pad;
1220         uint16_t v3 = pad;
1221 
1222         if (*src1len) {
1223             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1224         }
1225         if (*src3len) {
1226             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1227         }
1228 
1229         if (v1 != v3) {
1230             cc = (v1 < v3) ? 1 : 2;
1231             break;
1232         }
1233 
1234         if (*src1len) {
1235             *src1 += wordsize;
1236             *src1len -= wordsize;
1237         }
1238         if (*src3len) {
1239             *src3 += wordsize;
1240             *src3len -= wordsize;
1241         }
1242     }
1243 
1244     return cc;
1245 }
1246 
1247 
1248 /* compare logical long */
1249 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1250 {
1251     uintptr_t ra = GETPC();
1252     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1253     uint64_t src1 = get_address(env, r1);
1254     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1255     uint64_t src3 = get_address(env, r2);
1256     uint8_t pad = env->regs[r2 + 1] >> 24;
1257     uint32_t cc;
1258 
1259     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1260 
1261     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1262     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1263     set_address(env, r1, src1);
1264     set_address(env, r2, src3);
1265 
1266     return cc;
1267 }
1268 
1269 /* compare logical long extended memcompare insn with padding */
1270 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1271                        uint32_t r3)
1272 {
1273     uintptr_t ra = GETPC();
1274     uint64_t src1len = get_length(env, r1 + 1);
1275     uint64_t src1 = get_address(env, r1);
1276     uint64_t src3len = get_length(env, r3 + 1);
1277     uint64_t src3 = get_address(env, r3);
1278     uint8_t pad = a2;
1279     uint32_t cc;
1280 
1281     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1282 
1283     set_length(env, r1 + 1, src1len);
1284     set_length(env, r3 + 1, src3len);
1285     set_address(env, r1, src1);
1286     set_address(env, r3, src3);
1287 
1288     return cc;
1289 }
1290 
1291 /* compare logical long unicode memcompare insn with padding */
1292 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1293                        uint32_t r3)
1294 {
1295     uintptr_t ra = GETPC();
1296     uint64_t src1len = get_length(env, r1 + 1);
1297     uint64_t src1 = get_address(env, r1);
1298     uint64_t src3len = get_length(env, r3 + 1);
1299     uint64_t src3 = get_address(env, r3);
1300     uint16_t pad = a2;
1301     uint32_t cc = 0;
1302 
1303     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1304 
1305     set_length(env, r1 + 1, src1len);
1306     set_length(env, r3 + 1, src3len);
1307     set_address(env, r1, src1);
1308     set_address(env, r3, src3);
1309 
1310     return cc;
1311 }
1312 
1313 /* checksum */
1314 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1315                     uint64_t src, uint64_t src_len)
1316 {
1317     uintptr_t ra = GETPC();
1318     uint64_t max_len, len;
1319     uint64_t cksm = (uint32_t)r1;
1320 
1321     /* Lest we fail to service interrupts in a timely manner, limit the
1322        amount of work we're willing to do.  For now, let's cap at 8k.  */
1323     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1324 
1325     /* Process full words as available.  */
1326     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1327         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1328     }
1329 
1330     switch (max_len - len) {
1331     case 1:
1332         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1333         len += 1;
1334         break;
1335     case 2:
1336         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1337         len += 2;
1338         break;
1339     case 3:
1340         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1341         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1342         len += 3;
1343         break;
1344     }
1345 
1346     /* Fold the carry from the checksum.  Note that we can see carry-out
1347        during folding more than once (but probably not more than twice).  */
1348     while (cksm > 0xffffffffull) {
1349         cksm = (uint32_t)cksm + (cksm >> 32);
1350     }
1351 
1352     /* Indicate whether or not we've processed everything.  */
1353     env->cc_op = (len == src_len ? 0 : 3);
1354 
1355     /* Return both cksm and processed length.  */
1356     return int128_make128(cksm, len);
1357 }
1358 
1359 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1360 {
1361     uintptr_t ra = GETPC();
1362     int len_dest = len >> 4;
1363     int len_src = len & 0xf;
1364     uint8_t b;
1365 
1366     dest += len_dest;
1367     src += len_src;
1368 
1369     /* last byte is special, it only flips the nibbles */
1370     b = cpu_ldub_data_ra(env, src, ra);
1371     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1372     src--;
1373     len_src--;
1374 
1375     /* now pack every value */
1376     while (len_dest > 0) {
1377         b = 0;
1378 
1379         if (len_src >= 0) {
1380             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1381             src--;
1382             len_src--;
1383         }
1384         if (len_src >= 0) {
1385             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1386             src--;
1387             len_src--;
1388         }
1389 
1390         len_dest--;
1391         dest--;
1392         cpu_stb_data_ra(env, dest, b, ra);
1393     }
1394 }
1395 
1396 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1397                            uint32_t srclen, int ssize, uintptr_t ra)
1398 {
1399     int i;
1400     /* The destination operand is always 16 bytes long.  */
1401     const int destlen = 16;
1402 
1403     /* The operands are processed from right to left.  */
1404     src += srclen - 1;
1405     dest += destlen - 1;
1406 
1407     for (i = 0; i < destlen; i++) {
1408         uint8_t b = 0;
1409 
1410         /* Start with a positive sign */
1411         if (i == 0) {
1412             b = 0xc;
1413         } else if (srclen > ssize) {
1414             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415             src -= ssize;
1416             srclen -= ssize;
1417         }
1418 
1419         if (srclen > ssize) {
1420             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1421             src -= ssize;
1422             srclen -= ssize;
1423         }
1424 
1425         cpu_stb_data_ra(env, dest, b, ra);
1426         dest--;
1427     }
1428 }
1429 
1430 
1431 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1432                  uint32_t srclen)
1433 {
1434     do_pkau(env, dest, src, srclen, 1, GETPC());
1435 }
1436 
1437 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1438                  uint32_t srclen)
1439 {
1440     do_pkau(env, dest, src, srclen, 2, GETPC());
1441 }
1442 
1443 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1444                   uint64_t src)
1445 {
1446     uintptr_t ra = GETPC();
1447     int len_dest = len >> 4;
1448     int len_src = len & 0xf;
1449     uint8_t b;
1450     int second_nibble = 0;
1451 
1452     dest += len_dest;
1453     src += len_src;
1454 
1455     /* last byte is special, it only flips the nibbles */
1456     b = cpu_ldub_data_ra(env, src, ra);
1457     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1458     src--;
1459     len_src--;
1460 
1461     /* now pad every nibble with 0xf0 */
1462 
1463     while (len_dest > 0) {
1464         uint8_t cur_byte = 0;
1465 
1466         if (len_src > 0) {
1467             cur_byte = cpu_ldub_data_ra(env, src, ra);
1468         }
1469 
1470         len_dest--;
1471         dest--;
1472 
1473         /* only advance one nibble at a time */
1474         if (second_nibble) {
1475             cur_byte >>= 4;
1476             len_src--;
1477             src--;
1478         }
1479         second_nibble = !second_nibble;
1480 
1481         /* digit */
1482         cur_byte = (cur_byte & 0xf);
1483         /* zone bits */
1484         cur_byte |= 0xf0;
1485 
1486         cpu_stb_data_ra(env, dest, cur_byte, ra);
1487     }
1488 }
1489 
1490 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1491                                  uint32_t destlen, int dsize, uint64_t src,
1492                                  uintptr_t ra)
1493 {
1494     int i;
1495     uint32_t cc;
1496     uint8_t b;
1497     /* The source operand is always 16 bytes long.  */
1498     const int srclen = 16;
1499 
1500     /* The operands are processed from right to left.  */
1501     src += srclen - 1;
1502     dest += destlen - dsize;
1503 
1504     /* Check for the sign.  */
1505     b = cpu_ldub_data_ra(env, src, ra);
1506     src--;
1507     switch (b & 0xf) {
1508     case 0xa:
1509     case 0xc:
1510     case 0xe ... 0xf:
1511         cc = 0;  /* plus */
1512         break;
1513     case 0xb:
1514     case 0xd:
1515         cc = 1;  /* minus */
1516         break;
1517     default:
1518     case 0x0 ... 0x9:
1519         cc = 3;  /* invalid */
1520         break;
1521     }
1522 
1523     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1524     for (i = 0; i < destlen; i += dsize) {
1525         if (i == (31 * dsize)) {
1526             /* If length is 32/64 bytes, the leftmost byte is 0. */
1527             b = 0;
1528         } else if (i % (2 * dsize)) {
1529             b = cpu_ldub_data_ra(env, src, ra);
1530             src--;
1531         } else {
1532             b >>= 4;
1533         }
1534         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1535         dest -= dsize;
1536     }
1537 
1538     return cc;
1539 }
1540 
1541 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1542                        uint64_t src)
1543 {
1544     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1545 }
1546 
1547 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1548                        uint64_t src)
1549 {
1550     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1551 }
1552 
1553 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1554 {
1555     uintptr_t ra = GETPC();
1556     uint32_t cc = 0;
1557     int i;
1558 
1559     for (i = 0; i < destlen; i++) {
1560         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1561         /* digit */
1562         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1563 
1564         if (i == (destlen - 1)) {
1565             /* sign */
1566             cc |= (b & 0xf) < 0xa ? 1 : 0;
1567         } else {
1568             /* digit */
1569             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1570         }
1571     }
1572 
1573     return cc;
1574 }
1575 
1576 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1577                              uint64_t trans, uintptr_t ra)
1578 {
1579     uint32_t i;
1580 
1581     for (i = 0; i <= len; i++) {
1582         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1583         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1584         cpu_stb_data_ra(env, array + i, new_byte, ra);
1585     }
1586 
1587     return env->cc_op;
1588 }
1589 
1590 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1591                 uint64_t trans)
1592 {
1593     do_helper_tr(env, len, array, trans, GETPC());
1594 }
1595 
1596 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1597                    uint64_t len, uint64_t trans)
1598 {
1599     uintptr_t ra = GETPC();
1600     uint8_t end = env->regs[0] & 0xff;
1601     uint64_t l = len;
1602     uint64_t i;
1603     uint32_t cc = 0;
1604 
1605     if (!(env->psw.mask & PSW_MASK_64)) {
1606         array &= 0x7fffffff;
1607         l = (uint32_t)l;
1608     }
1609 
1610     /* Lest we fail to service interrupts in a timely manner, limit the
1611        amount of work we're willing to do.  For now, let's cap at 8k.  */
1612     if (l > 0x2000) {
1613         l = 0x2000;
1614         cc = 3;
1615     }
1616 
1617     for (i = 0; i < l; i++) {
1618         uint8_t byte, new_byte;
1619 
1620         byte = cpu_ldub_data_ra(env, array + i, ra);
1621 
1622         if (byte == end) {
1623             cc = 1;
1624             break;
1625         }
1626 
1627         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1628         cpu_stb_data_ra(env, array + i, new_byte, ra);
1629     }
1630 
1631     env->cc_op = cc;
1632     return int128_make128(len - i, array + i);
1633 }
1634 
1635 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1636                                      uint64_t array, uint64_t trans,
1637                                      int inc, uintptr_t ra)
1638 {
1639     int i;
1640 
1641     for (i = 0; i <= len; i++) {
1642         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1643         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1644 
1645         if (sbyte != 0) {
1646             set_address(env, 1, array + i * inc);
1647             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1648             return (i == len) ? 2 : 1;
1649         }
1650     }
1651 
1652     return 0;
1653 }
1654 
1655 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1656                                   uint64_t array, uint64_t trans,
1657                                   uintptr_t ra)
1658 {
1659     return do_helper_trt(env, len, array, trans, 1, ra);
1660 }
1661 
1662 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1663                      uint64_t trans)
1664 {
1665     return do_helper_trt(env, len, array, trans, 1, GETPC());
1666 }
1667 
1668 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1669                                    uint64_t array, uint64_t trans,
1670                                    uintptr_t ra)
1671 {
1672     return do_helper_trt(env, len, array, trans, -1, ra);
1673 }
1674 
1675 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1676                       uint64_t trans)
1677 {
1678     return do_helper_trt(env, len, array, trans, -1, GETPC());
1679 }
1680 
1681 /* Translate one/two to one/two */
1682 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1683                       uint32_t tst, uint32_t sizes)
1684 {
1685     uintptr_t ra = GETPC();
1686     int dsize = (sizes & 1) ? 1 : 2;
1687     int ssize = (sizes & 2) ? 1 : 2;
1688     uint64_t tbl = get_address(env, 1);
1689     uint64_t dst = get_address(env, r1);
1690     uint64_t len = get_length(env, r1 + 1);
1691     uint64_t src = get_address(env, r2);
1692     uint32_t cc = 3;
1693     int i;
1694 
1695     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1696        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1697        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1698     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1699         tbl &= -4096;
1700     } else {
1701         tbl &= -8;
1702     }
1703 
1704     check_alignment(env, len, ssize, ra);
1705 
1706     /* Lest we fail to service interrupts in a timely manner, */
1707     /* limit the amount of work we're willing to do.   */
1708     for (i = 0; i < 0x2000; i++) {
1709         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1710         uint64_t tble = tbl + (sval * dsize);
1711         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1712         if (dval == tst) {
1713             cc = 1;
1714             break;
1715         }
1716         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1717 
1718         len -= ssize;
1719         src += ssize;
1720         dst += dsize;
1721 
1722         if (len == 0) {
1723             cc = 0;
1724             break;
1725         }
1726     }
1727 
1728     set_address(env, r1, dst);
1729     set_length(env, r1 + 1, len);
1730     set_address(env, r2, src);
1731 
1732     return cc;
1733 }
1734 
1735 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1736                         uint64_t a2, bool parallel)
1737 {
1738     uint32_t mem_idx = cpu_mmu_index(env, false);
1739     uintptr_t ra = GETPC();
1740     uint32_t fc = extract32(env->regs[0], 0, 8);
1741     uint32_t sc = extract32(env->regs[0], 8, 8);
1742     uint64_t pl = get_address(env, 1) & -16;
1743     uint64_t svh, svl;
1744     uint32_t cc;
1745 
1746     /* Sanity check the function code and storage characteristic.  */
1747     if (fc > 1 || sc > 3) {
1748         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1749             goto spec_exception;
1750         }
1751         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1752             goto spec_exception;
1753         }
1754     }
1755 
1756     /* Sanity check the alignments.  */
1757     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1758         goto spec_exception;
1759     }
1760 
1761     /* Sanity check writability of the store address.  */
1762     probe_write(env, a2, 1 << sc, mem_idx, ra);
1763 
1764     /*
1765      * Note that the compare-and-swap is atomic, and the store is atomic,
1766      * but the complete operation is not.  Therefore we do not need to
1767      * assert serial context in order to implement this.  That said,
1768      * restart early if we can't support either operation that is supposed
1769      * to be atomic.
1770      */
1771     if (parallel) {
1772         uint32_t max = 2;
1773 #ifdef CONFIG_ATOMIC64
1774         max = 3;
1775 #endif
1776         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1777             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1778             cpu_loop_exit_atomic(env_cpu(env), ra);
1779         }
1780     }
1781 
1782     /* All loads happen before all stores.  For simplicity, load the entire
1783        store value area from the parameter list.  */
1784     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1785     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1786 
1787     switch (fc) {
1788     case 0:
1789         {
1790             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1791             uint32_t cv = env->regs[r3];
1792             uint32_t ov;
1793 
1794             if (parallel) {
1795 #ifdef CONFIG_USER_ONLY
1796                 uint32_t *haddr = g2h(env_cpu(env), a1);
1797                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1798 #else
1799                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1800                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1801 #endif
1802             } else {
1803                 ov = cpu_ldl_data_ra(env, a1, ra);
1804                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1805             }
1806             cc = (ov != cv);
1807             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1808         }
1809         break;
1810 
1811     case 1:
1812         {
1813             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1814             uint64_t cv = env->regs[r3];
1815             uint64_t ov;
1816 
1817             if (parallel) {
1818 #ifdef CONFIG_ATOMIC64
1819                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1820                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1821 #else
1822                 /* Note that we asserted !parallel above.  */
1823                 g_assert_not_reached();
1824 #endif
1825             } else {
1826                 ov = cpu_ldq_data_ra(env, a1, ra);
1827                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1828             }
1829             cc = (ov != cv);
1830             env->regs[r3] = ov;
1831         }
1832         break;
1833 
1834     case 2:
1835         {
1836             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1837             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1838             Int128 nv = int128_make128(nvl, nvh);
1839             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1840             Int128 ov;
1841 
1842             if (!parallel) {
1843                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1844                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1845 
1846                 ov = int128_make128(ol, oh);
1847                 cc = !int128_eq(ov, cv);
1848                 if (cc) {
1849                     nv = ov;
1850                 }
1851 
1852                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1853                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1854             } else if (HAVE_CMPXCHG128) {
1855                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1856                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1857                 cc = !int128_eq(ov, cv);
1858             } else {
1859                 /* Note that we asserted !parallel above.  */
1860                 g_assert_not_reached();
1861             }
1862 
1863             env->regs[r3 + 0] = int128_gethi(ov);
1864             env->regs[r3 + 1] = int128_getlo(ov);
1865         }
1866         break;
1867 
1868     default:
1869         g_assert_not_reached();
1870     }
1871 
1872     /* Store only if the comparison succeeded.  Note that above we use a pair
1873        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1874        from the most-significant bits of svh.  */
1875     if (cc == 0) {
1876         switch (sc) {
1877         case 0:
1878             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1879             break;
1880         case 1:
1881             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1882             break;
1883         case 2:
1884             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1885             break;
1886         case 3:
1887             cpu_stq_data_ra(env, a2, svh, ra);
1888             break;
1889         case 4:
1890             if (!parallel) {
1891                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1892                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1893             } else if (HAVE_ATOMIC128) {
1894                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1895                 Int128 sv = int128_make128(svl, svh);
1896                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1897             } else {
1898                 /* Note that we asserted !parallel above.  */
1899                 g_assert_not_reached();
1900             }
1901             break;
1902         default:
1903             g_assert_not_reached();
1904         }
1905     }
1906 
1907     return cc;
1908 
1909  spec_exception:
1910     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1911 }
1912 
1913 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1914 {
1915     return do_csst(env, r3, a1, a2, false);
1916 }
1917 
1918 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1919                                uint64_t a2)
1920 {
1921     return do_csst(env, r3, a1, a2, true);
1922 }
1923 
1924 #if !defined(CONFIG_USER_ONLY)
1925 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1926 {
1927     uintptr_t ra = GETPC();
1928     bool PERchanged = false;
1929     uint64_t src = a2;
1930     uint32_t i;
1931 
1932     if (src & 0x7) {
1933         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1934     }
1935 
1936     for (i = r1;; i = (i + 1) % 16) {
1937         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1938         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1939             PERchanged = true;
1940         }
1941         env->cregs[i] = val;
1942         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1943                    i, src, val);
1944         src += sizeof(uint64_t);
1945 
1946         if (i == r3) {
1947             break;
1948         }
1949     }
1950 
1951     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1952         s390_cpu_recompute_watchpoints(env_cpu(env));
1953     }
1954 
1955     tlb_flush(env_cpu(env));
1956 }
1957 
1958 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1959 {
1960     uintptr_t ra = GETPC();
1961     bool PERchanged = false;
1962     uint64_t src = a2;
1963     uint32_t i;
1964 
1965     if (src & 0x3) {
1966         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1967     }
1968 
1969     for (i = r1;; i = (i + 1) % 16) {
1970         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1971         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1972             PERchanged = true;
1973         }
1974         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1975         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1976         src += sizeof(uint32_t);
1977 
1978         if (i == r3) {
1979             break;
1980         }
1981     }
1982 
1983     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1984         s390_cpu_recompute_watchpoints(env_cpu(env));
1985     }
1986 
1987     tlb_flush(env_cpu(env));
1988 }
1989 
1990 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1991 {
1992     uintptr_t ra = GETPC();
1993     uint64_t dest = a2;
1994     uint32_t i;
1995 
1996     if (dest & 0x7) {
1997         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1998     }
1999 
2000     for (i = r1;; i = (i + 1) % 16) {
2001         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2002         dest += sizeof(uint64_t);
2003 
2004         if (i == r3) {
2005             break;
2006         }
2007     }
2008 }
2009 
2010 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2011 {
2012     uintptr_t ra = GETPC();
2013     uint64_t dest = a2;
2014     uint32_t i;
2015 
2016     if (dest & 0x3) {
2017         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2018     }
2019 
2020     for (i = r1;; i = (i + 1) % 16) {
2021         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2022         dest += sizeof(uint32_t);
2023 
2024         if (i == r3) {
2025             break;
2026         }
2027     }
2028 }
2029 
2030 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2031 {
2032     uintptr_t ra = GETPC();
2033     int i;
2034 
2035     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2036 
2037     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2038         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2039     }
2040 
2041     return 0;
2042 }
2043 
2044 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2045 {
2046     S390CPU *cpu = env_archcpu(env);
2047     CPUState *cs = env_cpu(env);
2048 
2049     /*
2050      * TODO: we currently don't handle all access protection types
2051      * (including access-list and key-controlled) as well as AR mode.
2052      */
2053     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2054         /* Fetching permitted; storing permitted */
2055         return 0;
2056     }
2057 
2058     if (env->int_pgm_code == PGM_PROTECTION) {
2059         /* retry if reading is possible */
2060         cs->exception_index = -1;
2061         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2062             /* Fetching permitted; storing not permitted */
2063             return 1;
2064         }
2065     }
2066 
2067     switch (env->int_pgm_code) {
2068     case PGM_PROTECTION:
2069         /* Fetching not permitted; storing not permitted */
2070         cs->exception_index = -1;
2071         return 2;
2072     case PGM_ADDRESSING:
2073     case PGM_TRANS_SPEC:
2074         /* exceptions forwarded to the guest */
2075         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2076         return 0;
2077     }
2078 
2079     /* Translation not available */
2080     cs->exception_index = -1;
2081     return 3;
2082 }
2083 
2084 /* insert storage key extended */
2085 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2086 {
2087     static S390SKeysState *ss;
2088     static S390SKeysClass *skeyclass;
2089     uint64_t addr = wrap_address(env, r2);
2090     uint8_t key;
2091     int rc;
2092 
2093     addr = mmu_real2abs(env, addr);
2094     if (!mmu_absolute_addr_valid(addr, false)) {
2095         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2096     }
2097 
2098     if (unlikely(!ss)) {
2099         ss = s390_get_skeys_device();
2100         skeyclass = S390_SKEYS_GET_CLASS(ss);
2101         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2102             tlb_flush_all_cpus_synced(env_cpu(env));
2103         }
2104     }
2105 
2106     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2107     if (rc) {
2108         trace_get_skeys_nonzero(rc);
2109         return 0;
2110     }
2111     return key;
2112 }
2113 
2114 /* set storage key extended */
2115 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2116 {
2117     static S390SKeysState *ss;
2118     static S390SKeysClass *skeyclass;
2119     uint64_t addr = wrap_address(env, r2);
2120     uint8_t key;
2121     int rc;
2122 
2123     addr = mmu_real2abs(env, addr);
2124     if (!mmu_absolute_addr_valid(addr, false)) {
2125         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2126     }
2127 
2128     if (unlikely(!ss)) {
2129         ss = s390_get_skeys_device();
2130         skeyclass = S390_SKEYS_GET_CLASS(ss);
2131         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2132             tlb_flush_all_cpus_synced(env_cpu(env));
2133         }
2134     }
2135 
2136     key = r1 & 0xfe;
2137     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2138     if (rc) {
2139         trace_set_skeys_nonzero(rc);
2140     }
2141    /*
2142     * As we can only flush by virtual address and not all the entries
2143     * that point to a physical address we have to flush the whole TLB.
2144     */
2145     tlb_flush_all_cpus_synced(env_cpu(env));
2146 }
2147 
2148 /* reset reference bit extended */
2149 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2150 {
2151     uint64_t addr = wrap_address(env, r2);
2152     static S390SKeysState *ss;
2153     static S390SKeysClass *skeyclass;
2154     uint8_t re, key;
2155     int rc;
2156 
2157     addr = mmu_real2abs(env, addr);
2158     if (!mmu_absolute_addr_valid(addr, false)) {
2159         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2160     }
2161 
2162     if (unlikely(!ss)) {
2163         ss = s390_get_skeys_device();
2164         skeyclass = S390_SKEYS_GET_CLASS(ss);
2165         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2166             tlb_flush_all_cpus_synced(env_cpu(env));
2167         }
2168     }
2169 
2170     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2171     if (rc) {
2172         trace_get_skeys_nonzero(rc);
2173         return 0;
2174     }
2175 
2176     re = key & (SK_R | SK_C);
2177     key &= ~SK_R;
2178 
2179     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2180     if (rc) {
2181         trace_set_skeys_nonzero(rc);
2182         return 0;
2183     }
2184    /*
2185     * As we can only flush by virtual address and not all the entries
2186     * that point to a physical address we have to flush the whole TLB.
2187     */
2188     tlb_flush_all_cpus_synced(env_cpu(env));
2189 
2190     /*
2191      * cc
2192      *
2193      * 0  Reference bit zero; change bit zero
2194      * 1  Reference bit zero; change bit one
2195      * 2  Reference bit one; change bit zero
2196      * 3  Reference bit one; change bit one
2197      */
2198 
2199     return re >> 1;
2200 }
2201 
2202 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2203                       uint64_t key)
2204 {
2205     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2206     S390Access srca, desta;
2207     uintptr_t ra = GETPC();
2208     int cc = 0;
2209 
2210     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2211                __func__, l, a1, a2);
2212 
2213     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2214         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2215         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2216     }
2217 
2218     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2219         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2220     }
2221 
2222     l = wrap_length32(env, l);
2223     if (l > 256) {
2224         /* max 256 */
2225         l = 256;
2226         cc = 3;
2227     } else if (!l) {
2228         return cc;
2229     }
2230 
2231     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2232     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2233     access_memmove(env, &desta, &srca, ra);
2234     return cc;
2235 }
2236 
2237 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2238                       uint64_t key)
2239 {
2240     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2241     S390Access srca, desta;
2242     uintptr_t ra = GETPC();
2243     int cc = 0;
2244 
2245     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2246                __func__, l, a1, a2);
2247 
2248     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2249         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2250         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2251     }
2252 
2253     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2254         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2255     }
2256 
2257     l = wrap_length32(env, l);
2258     if (l > 256) {
2259         /* max 256 */
2260         l = 256;
2261         cc = 3;
2262     } else if (!l) {
2263         return cc;
2264     }
2265     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2266     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2267     access_memmove(env, &desta, &srca, ra);
2268     return cc;
2269 }
2270 
2271 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2272 {
2273     CPUState *cs = env_cpu(env);
2274     const uintptr_t ra = GETPC();
2275     uint64_t table, entry, raddr;
2276     uint16_t entries, i, index = 0;
2277 
2278     if (r2 & 0xff000) {
2279         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2280     }
2281 
2282     if (!(r2 & 0x800)) {
2283         /* invalidation-and-clearing operation */
2284         table = r1 & ASCE_ORIGIN;
2285         entries = (r2 & 0x7ff) + 1;
2286 
2287         switch (r1 & ASCE_TYPE_MASK) {
2288         case ASCE_TYPE_REGION1:
2289             index = (r2 >> 53) & 0x7ff;
2290             break;
2291         case ASCE_TYPE_REGION2:
2292             index = (r2 >> 42) & 0x7ff;
2293             break;
2294         case ASCE_TYPE_REGION3:
2295             index = (r2 >> 31) & 0x7ff;
2296             break;
2297         case ASCE_TYPE_SEGMENT:
2298             index = (r2 >> 20) & 0x7ff;
2299             break;
2300         }
2301         for (i = 0; i < entries; i++) {
2302             /* addresses are not wrapped in 24/31bit mode but table index is */
2303             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2304             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2305             if (!(entry & REGION_ENTRY_I)) {
2306                 /* we are allowed to not store if already invalid */
2307                 entry |= REGION_ENTRY_I;
2308                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2309             }
2310         }
2311     }
2312 
2313     /* We simply flush the complete tlb, therefore we can ignore r3. */
2314     if (m4 & 1) {
2315         tlb_flush(cs);
2316     } else {
2317         tlb_flush_all_cpus_synced(cs);
2318     }
2319 }
2320 
2321 /* invalidate pte */
2322 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2323                   uint32_t m4)
2324 {
2325     CPUState *cs = env_cpu(env);
2326     const uintptr_t ra = GETPC();
2327     uint64_t page = vaddr & TARGET_PAGE_MASK;
2328     uint64_t pte_addr, pte;
2329 
2330     /* Compute the page table entry address */
2331     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2332     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2333 
2334     /* Mark the page table entry as invalid */
2335     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2336     pte |= PAGE_ENTRY_I;
2337     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2338 
2339     /* XXX we exploit the fact that Linux passes the exact virtual
2340        address here - it's not obliged to! */
2341     if (m4 & 1) {
2342         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2343             tlb_flush_page(cs, page);
2344             /* XXX 31-bit hack */
2345             tlb_flush_page(cs, page ^ 0x80000000);
2346         } else {
2347             /* looks like we don't have a valid virtual address */
2348             tlb_flush(cs);
2349         }
2350     } else {
2351         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2352             tlb_flush_page_all_cpus_synced(cs, page);
2353             /* XXX 31-bit hack */
2354             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2355         } else {
2356             /* looks like we don't have a valid virtual address */
2357             tlb_flush_all_cpus_synced(cs);
2358         }
2359     }
2360 }
2361 
2362 /* flush local tlb */
2363 void HELPER(ptlb)(CPUS390XState *env)
2364 {
2365     tlb_flush(env_cpu(env));
2366 }
2367 
2368 /* flush global tlb */
2369 void HELPER(purge)(CPUS390XState *env)
2370 {
2371     tlb_flush_all_cpus_synced(env_cpu(env));
2372 }
2373 
2374 /* load real address */
2375 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2376 {
2377     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2378     uint64_t ret, tec;
2379     int flags, exc, cc;
2380 
2381     /* XXX incomplete - has more corner cases */
2382     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2383         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2384     }
2385 
2386     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2387     if (exc) {
2388         cc = 3;
2389         ret = exc | 0x80000000;
2390     } else {
2391         cc = 0;
2392         ret |= addr & ~TARGET_PAGE_MASK;
2393     }
2394 
2395     env->cc_op = cc;
2396     return ret;
2397 }
2398 #endif
2399 
2400 /* load pair from quadword */
2401 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2402 {
2403     uintptr_t ra = GETPC();
2404     uint64_t hi, lo;
2405 
2406     check_alignment(env, addr, 16, ra);
2407     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2408     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2409 
2410     env->retxl = lo;
2411     return hi;
2412 }
2413 
2414 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2415 {
2416     uintptr_t ra = GETPC();
2417     uint64_t hi, lo;
2418     int mem_idx;
2419     MemOpIdx oi;
2420     Int128 v;
2421 
2422     assert(HAVE_ATOMIC128);
2423 
2424     mem_idx = cpu_mmu_index(env, false);
2425     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2426     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2427     hi = int128_gethi(v);
2428     lo = int128_getlo(v);
2429 
2430     env->retxl = lo;
2431     return hi;
2432 }
2433 
2434 /* store pair to quadword */
2435 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2436                   uint64_t low, uint64_t high)
2437 {
2438     uintptr_t ra = GETPC();
2439 
2440     check_alignment(env, addr, 16, ra);
2441     cpu_stq_data_ra(env, addr + 0, high, ra);
2442     cpu_stq_data_ra(env, addr + 8, low, ra);
2443 }
2444 
2445 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2446                            uint64_t low, uint64_t high)
2447 {
2448     uintptr_t ra = GETPC();
2449     int mem_idx;
2450     MemOpIdx oi;
2451     Int128 v;
2452 
2453     assert(HAVE_ATOMIC128);
2454 
2455     mem_idx = cpu_mmu_index(env, false);
2456     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2457     v = int128_make128(low, high);
2458     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2459 }
2460 
2461 /* Execute instruction.  This instruction executes an insn modified with
2462    the contents of r1.  It does not change the executed instruction in memory;
2463    it does not change the program counter.
2464 
2465    Perform this by recording the modified instruction in env->ex_value.
2466    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2467 */
2468 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2469 {
2470     uint64_t insn;
2471     uint8_t opc;
2472 
2473     /* EXECUTE targets must be at even addresses.  */
2474     if (addr & 1) {
2475         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2476     }
2477 
2478     insn = cpu_lduw_code(env, addr);
2479     opc = insn >> 8;
2480 
2481     /* Or in the contents of R1[56:63].  */
2482     insn |= r1 & 0xff;
2483 
2484     /* Load the rest of the instruction.  */
2485     insn <<= 48;
2486     switch (get_ilen(opc)) {
2487     case 2:
2488         break;
2489     case 4:
2490         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2491         break;
2492     case 6:
2493         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2494         break;
2495     default:
2496         g_assert_not_reached();
2497     }
2498 
2499     /* The very most common cases can be sped up by avoiding a new TB.  */
2500     if ((opc & 0xf0) == 0xd0) {
2501         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2502                                       uint64_t, uintptr_t);
2503         static const dx_helper dx[16] = {
2504             [0x0] = do_helper_trt_bkwd,
2505             [0x2] = do_helper_mvc,
2506             [0x4] = do_helper_nc,
2507             [0x5] = do_helper_clc,
2508             [0x6] = do_helper_oc,
2509             [0x7] = do_helper_xc,
2510             [0xc] = do_helper_tr,
2511             [0xd] = do_helper_trt_fwd,
2512         };
2513         dx_helper helper = dx[opc & 0xf];
2514 
2515         if (helper) {
2516             uint32_t l = extract64(insn, 48, 8);
2517             uint32_t b1 = extract64(insn, 44, 4);
2518             uint32_t d1 = extract64(insn, 32, 12);
2519             uint32_t b2 = extract64(insn, 28, 4);
2520             uint32_t d2 = extract64(insn, 16, 12);
2521             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2522             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2523 
2524             env->cc_op = helper(env, l, a1, a2, 0);
2525             env->psw.addr += ilen;
2526             return;
2527         }
2528     } else if (opc == 0x0a) {
2529         env->int_svc_code = extract64(insn, 48, 8);
2530         env->int_svc_ilen = ilen;
2531         helper_exception(env, EXCP_SVC);
2532         g_assert_not_reached();
2533     }
2534 
2535     /* Record the insn we want to execute as well as the ilen to use
2536        during the execution of the target insn.  This will also ensure
2537        that ex_value is non-zero, which flags that we are in a state
2538        that requires such execution.  */
2539     env->ex_value = insn | ilen;
2540     env->ex_target = addr;
2541 }
2542 
2543 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2544                        uint64_t len)
2545 {
2546     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2547     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2548     const uint64_t r0 = env->regs[0];
2549     const uintptr_t ra = GETPC();
2550     uint8_t dest_key, dest_as, dest_k, dest_a;
2551     uint8_t src_key, src_as, src_k, src_a;
2552     uint64_t val;
2553     int cc = 0;
2554 
2555     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2556                __func__, dest, src, len);
2557 
2558     if (!(env->psw.mask & PSW_MASK_DAT)) {
2559         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2560     }
2561 
2562     /* OAC (operand access control) for the first operand -> dest */
2563     val = (r0 & 0xffff0000ULL) >> 16;
2564     dest_key = (val >> 12) & 0xf;
2565     dest_as = (val >> 6) & 0x3;
2566     dest_k = (val >> 1) & 0x1;
2567     dest_a = val & 0x1;
2568 
2569     /* OAC (operand access control) for the second operand -> src */
2570     val = (r0 & 0x0000ffffULL);
2571     src_key = (val >> 12) & 0xf;
2572     src_as = (val >> 6) & 0x3;
2573     src_k = (val >> 1) & 0x1;
2574     src_a = val & 0x1;
2575 
2576     if (!dest_k) {
2577         dest_key = psw_key;
2578     }
2579     if (!src_k) {
2580         src_key = psw_key;
2581     }
2582     if (!dest_a) {
2583         dest_as = psw_as;
2584     }
2585     if (!src_a) {
2586         src_as = psw_as;
2587     }
2588 
2589     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2590         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2591     }
2592     if (!(env->cregs[0] & CR0_SECONDARY) &&
2593         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2594         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2595     }
2596     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2597         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2598     }
2599 
2600     len = wrap_length32(env, len);
2601     if (len > 4096) {
2602         cc = 3;
2603         len = 4096;
2604     }
2605 
2606     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2607     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2608         (env->psw.mask & PSW_MASK_PSTATE)) {
2609         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2610                       __func__);
2611         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2612     }
2613 
2614     /* FIXME: Access using correct keys and AR-mode */
2615     if (len) {
2616         S390Access srca, desta;
2617 
2618         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2619                        mmu_idx_from_as(src_as), ra);
2620         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2621                        mmu_idx_from_as(dest_as), ra);
2622 
2623         access_memmove(env, &desta, &srca, ra);
2624     }
2625 
2626     return cc;
2627 }
2628 
2629 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2630    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2631    value >= 0 indicates failure, and the CC value to be returned.  */
2632 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2633                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2634                                  uint32_t *ochar, uint32_t *olen);
2635 
2636 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2637    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2638    indicates failure, and the CC value to be returned.  */
2639 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2640                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2641                                  uint32_t *olen);
2642 
2643 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2644                        bool enh_check, uintptr_t ra,
2645                        uint32_t *ochar, uint32_t *olen)
2646 {
2647     uint8_t s0, s1, s2, s3;
2648     uint32_t c, l;
2649 
2650     if (ilen < 1) {
2651         return 0;
2652     }
2653     s0 = cpu_ldub_data_ra(env, addr, ra);
2654     if (s0 <= 0x7f) {
2655         /* one byte character */
2656         l = 1;
2657         c = s0;
2658     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2659         /* invalid character */
2660         return 2;
2661     } else if (s0 <= 0xdf) {
2662         /* two byte character */
2663         l = 2;
2664         if (ilen < 2) {
2665             return 0;
2666         }
2667         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2668         c = s0 & 0x1f;
2669         c = (c << 6) | (s1 & 0x3f);
2670         if (enh_check && (s1 & 0xc0) != 0x80) {
2671             return 2;
2672         }
2673     } else if (s0 <= 0xef) {
2674         /* three byte character */
2675         l = 3;
2676         if (ilen < 3) {
2677             return 0;
2678         }
2679         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2680         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2681         c = s0 & 0x0f;
2682         c = (c << 6) | (s1 & 0x3f);
2683         c = (c << 6) | (s2 & 0x3f);
2684         /* Fold the byte-by-byte range descriptions in the PoO into
2685            tests against the complete value.  It disallows encodings
2686            that could be smaller, and the UTF-16 surrogates.  */
2687         if (enh_check
2688             && ((s1 & 0xc0) != 0x80
2689                 || (s2 & 0xc0) != 0x80
2690                 || c < 0x1000
2691                 || (c >= 0xd800 && c <= 0xdfff))) {
2692             return 2;
2693         }
2694     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2695         /* four byte character */
2696         l = 4;
2697         if (ilen < 4) {
2698             return 0;
2699         }
2700         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2701         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2702         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2703         c = s0 & 0x07;
2704         c = (c << 6) | (s1 & 0x3f);
2705         c = (c << 6) | (s2 & 0x3f);
2706         c = (c << 6) | (s3 & 0x3f);
2707         /* See above.  */
2708         if (enh_check
2709             && ((s1 & 0xc0) != 0x80
2710                 || (s2 & 0xc0) != 0x80
2711                 || (s3 & 0xc0) != 0x80
2712                 || c < 0x010000
2713                 || c > 0x10ffff)) {
2714             return 2;
2715         }
2716     } else {
2717         /* invalid character */
2718         return 2;
2719     }
2720 
2721     *ochar = c;
2722     *olen = l;
2723     return -1;
2724 }
2725 
2726 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2727                         bool enh_check, uintptr_t ra,
2728                         uint32_t *ochar, uint32_t *olen)
2729 {
2730     uint16_t s0, s1;
2731     uint32_t c, l;
2732 
2733     if (ilen < 2) {
2734         return 0;
2735     }
2736     s0 = cpu_lduw_data_ra(env, addr, ra);
2737     if ((s0 & 0xfc00) != 0xd800) {
2738         /* one word character */
2739         l = 2;
2740         c = s0;
2741     } else {
2742         /* two word character */
2743         l = 4;
2744         if (ilen < 4) {
2745             return 0;
2746         }
2747         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2748         c = extract32(s0, 6, 4) + 1;
2749         c = (c << 6) | (s0 & 0x3f);
2750         c = (c << 10) | (s1 & 0x3ff);
2751         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2752             /* invalid surrogate character */
2753             return 2;
2754         }
2755     }
2756 
2757     *ochar = c;
2758     *olen = l;
2759     return -1;
2760 }
2761 
2762 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2763                         bool enh_check, uintptr_t ra,
2764                         uint32_t *ochar, uint32_t *olen)
2765 {
2766     uint32_t c;
2767 
2768     if (ilen < 4) {
2769         return 0;
2770     }
2771     c = cpu_ldl_data_ra(env, addr, ra);
2772     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2773         /* invalid unicode character */
2774         return 2;
2775     }
2776 
2777     *ochar = c;
2778     *olen = 4;
2779     return -1;
2780 }
2781 
2782 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2783                        uintptr_t ra, uint32_t c, uint32_t *olen)
2784 {
2785     uint8_t d[4];
2786     uint32_t l, i;
2787 
2788     if (c <= 0x7f) {
2789         /* one byte character */
2790         l = 1;
2791         d[0] = c;
2792     } else if (c <= 0x7ff) {
2793         /* two byte character */
2794         l = 2;
2795         d[1] = 0x80 | extract32(c, 0, 6);
2796         d[0] = 0xc0 | extract32(c, 6, 5);
2797     } else if (c <= 0xffff) {
2798         /* three byte character */
2799         l = 3;
2800         d[2] = 0x80 | extract32(c, 0, 6);
2801         d[1] = 0x80 | extract32(c, 6, 6);
2802         d[0] = 0xe0 | extract32(c, 12, 4);
2803     } else {
2804         /* four byte character */
2805         l = 4;
2806         d[3] = 0x80 | extract32(c, 0, 6);
2807         d[2] = 0x80 | extract32(c, 6, 6);
2808         d[1] = 0x80 | extract32(c, 12, 6);
2809         d[0] = 0xf0 | extract32(c, 18, 3);
2810     }
2811 
2812     if (ilen < l) {
2813         return 1;
2814     }
2815     for (i = 0; i < l; ++i) {
2816         cpu_stb_data_ra(env, addr + i, d[i], ra);
2817     }
2818 
2819     *olen = l;
2820     return -1;
2821 }
2822 
2823 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2824                         uintptr_t ra, uint32_t c, uint32_t *olen)
2825 {
2826     uint16_t d0, d1;
2827 
2828     if (c <= 0xffff) {
2829         /* one word character */
2830         if (ilen < 2) {
2831             return 1;
2832         }
2833         cpu_stw_data_ra(env, addr, c, ra);
2834         *olen = 2;
2835     } else {
2836         /* two word character */
2837         if (ilen < 4) {
2838             return 1;
2839         }
2840         d1 = 0xdc00 | extract32(c, 0, 10);
2841         d0 = 0xd800 | extract32(c, 10, 6);
2842         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2843         cpu_stw_data_ra(env, addr + 0, d0, ra);
2844         cpu_stw_data_ra(env, addr + 2, d1, ra);
2845         *olen = 4;
2846     }
2847 
2848     return -1;
2849 }
2850 
2851 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2852                         uintptr_t ra, uint32_t c, uint32_t *olen)
2853 {
2854     if (ilen < 4) {
2855         return 1;
2856     }
2857     cpu_stl_data_ra(env, addr, c, ra);
2858     *olen = 4;
2859     return -1;
2860 }
2861 
2862 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2863                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2864                                        decode_unicode_fn decode,
2865                                        encode_unicode_fn encode)
2866 {
2867     uint64_t dst = get_address(env, r1);
2868     uint64_t dlen = get_length(env, r1 + 1);
2869     uint64_t src = get_address(env, r2);
2870     uint64_t slen = get_length(env, r2 + 1);
2871     bool enh_check = m3 & 1;
2872     int cc, i;
2873 
2874     /* Lest we fail to service interrupts in a timely manner, limit the
2875        amount of work we're willing to do.  For now, let's cap at 256.  */
2876     for (i = 0; i < 256; ++i) {
2877         uint32_t c, ilen, olen;
2878 
2879         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2880         if (unlikely(cc >= 0)) {
2881             break;
2882         }
2883         cc = encode(env, dst, dlen, ra, c, &olen);
2884         if (unlikely(cc >= 0)) {
2885             break;
2886         }
2887 
2888         src += ilen;
2889         slen -= ilen;
2890         dst += olen;
2891         dlen -= olen;
2892         cc = 3;
2893     }
2894 
2895     set_address(env, r1, dst);
2896     set_length(env, r1 + 1, dlen);
2897     set_address(env, r2, src);
2898     set_length(env, r2 + 1, slen);
2899 
2900     return cc;
2901 }
2902 
2903 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2904 {
2905     return convert_unicode(env, r1, r2, m3, GETPC(),
2906                            decode_utf8, encode_utf16);
2907 }
2908 
2909 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2910 {
2911     return convert_unicode(env, r1, r2, m3, GETPC(),
2912                            decode_utf8, encode_utf32);
2913 }
2914 
2915 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2916 {
2917     return convert_unicode(env, r1, r2, m3, GETPC(),
2918                            decode_utf16, encode_utf8);
2919 }
2920 
2921 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2922 {
2923     return convert_unicode(env, r1, r2, m3, GETPC(),
2924                            decode_utf16, encode_utf32);
2925 }
2926 
2927 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2928 {
2929     return convert_unicode(env, r1, r2, m3, GETPC(),
2930                            decode_utf32, encode_utf8);
2931 }
2932 
2933 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2934 {
2935     return convert_unicode(env, r1, r2, m3, GETPC(),
2936                            decode_utf32, encode_utf16);
2937 }
2938 
2939 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2940                         uintptr_t ra)
2941 {
2942     /* test the actual access, not just any access to the page due to LAP */
2943     while (len) {
2944         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2945         const uint64_t curlen = MIN(pagelen, len);
2946 
2947         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2948         addr = wrap_address(env, addr + curlen);
2949         len -= curlen;
2950     }
2951 }
2952 
2953 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2954 {
2955     probe_write_access(env, addr, len, GETPC());
2956 }
2957