xref: /openbmc/qemu/target/arm/tcg/translate.c (revision ee3b34cd)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(tcg_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, tcg_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, tcg_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(tcg_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(tcg_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(tcg_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(tcg_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(tcg_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(tcg_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(tcg_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
904         /*
905          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
906          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
907          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
908          */
909         MO_ALIGN_16
910     };
911     g_assert(i < ARRAY_SIZE(mop_align));
912     return mop_align[i];
913 }
914 
915 /*
916  * Abstractions of "generate code to do a guest load/store for
917  * AArch32", where a vaddr is always 32 bits (and is zero
918  * extended if we're a 64 bit core) and  data is also
919  * 32 bits unless specifically doing a 64 bit access.
920  * These functions work like tcg_gen_qemu_{ld,st}* except
921  * that the address argument is TCGv_i32 rather than TCGv.
922  */
923 
924 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926     TCGv addr = tcg_temp_new();
927     tcg_gen_extu_i32_tl(addr, a32);
928 
929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932     }
933     return addr;
934 }
935 
936 /*
937  * Internal routines are used for NEON cases where the endianness
938  * and/or alignment has already been taken into account and manipulated.
939  */
940 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_ld_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951     tcg_gen_qemu_st_i32(val, addr, index, opc);
952 }
953 
954 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
955                               TCGv_i32 a32, int index, MemOp opc)
956 {
957     TCGv addr = gen_aa32_addr(s, a32, opc);
958 
959     tcg_gen_qemu_ld_i64(val, addr, index, opc);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
963         tcg_gen_rotri_i64(val, val, 32);
964     }
965 }
966 
967 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971 
972     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
973     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
974         TCGv_i64 tmp = tcg_temp_new_i64();
975         tcg_gen_rotri_i64(tmp, val, 32);
976         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
977     } else {
978         tcg_gen_qemu_st_i64(val, addr, index, opc);
979     }
980 }
981 
982 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                      int index, MemOp opc)
1002 {
1003     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004 }
1005 
1006 #define DO_GEN_LD(SUFF, OPC)                                            \
1007     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 #define DO_GEN_ST(SUFF, OPC)                                            \
1014     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                          TCGv_i32 a32, int index)       \
1016     {                                                                   \
1017         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018     }
1019 
1020 static inline void gen_hvc(DisasContext *s, int imm16)
1021 {
1022     /* The pre HVC helper handles cases when HVC gets trapped
1023      * as an undefined insn by runtime configuration (ie before
1024      * the insn really executes).
1025      */
1026     gen_update_pc(s, 0);
1027     gen_helper_pre_hvc(tcg_env);
1028     /* Otherwise we will treat this as a real exception which
1029      * happens after execution of the insn. (The distinction matters
1030      * for the PC value reported to the exception handler and also
1031      * for single stepping.)
1032      */
1033     s->svc_imm = imm16;
1034     gen_update_pc(s, curr_insn_len(s));
1035     s->base.is_jmp = DISAS_HVC;
1036 }
1037 
1038 static inline void gen_smc(DisasContext *s)
1039 {
1040     /* As with HVC, we may take an exception either before or after
1041      * the insn executes.
1042      */
1043     gen_update_pc(s, 0);
1044     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc()));
1045     gen_update_pc(s, curr_insn_len(s));
1046     s->base.is_jmp = DISAS_SMC;
1047 }
1048 
1049 static void gen_exception_internal_insn(DisasContext *s, int excp)
1050 {
1051     gen_set_condexec(s);
1052     gen_update_pc(s, 0);
1053     gen_exception_internal(excp);
1054     s->base.is_jmp = DISAS_NORETURN;
1055 }
1056 
1057 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058 {
1059     gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp),
1060                                           tcg_constant_i32(syndrome), tcg_el);
1061 }
1062 
1063 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064 {
1065     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066 }
1067 
1068 static void gen_exception(int excp, uint32_t syndrome)
1069 {
1070     gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp),
1071                                        tcg_constant_i32(syndrome));
1072 }
1073 
1074 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1076 {
1077     if (s->aarch64) {
1078         gen_a64_update_pc(s, pc_diff);
1079     } else {
1080         gen_set_condexec(s);
1081         gen_update_pc(s, pc_diff);
1082     }
1083     gen_exception_el_v(excp, syn, tcg_el);
1084     s->base.is_jmp = DISAS_NORETURN;
1085 }
1086 
1087 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                            uint32_t syn, uint32_t target_el)
1089 {
1090     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                             tcg_constant_i32(target_el));
1092 }
1093 
1094 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                         int excp, uint32_t syn)
1096 {
1097     if (s->aarch64) {
1098         gen_a64_update_pc(s, pc_diff);
1099     } else {
1100         gen_set_condexec(s);
1101         gen_update_pc(s, pc_diff);
1102     }
1103     gen_exception(excp, syn);
1104     s->base.is_jmp = DISAS_NORETURN;
1105 }
1106 
1107 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108 {
1109     gen_set_condexec(s);
1110     gen_update_pc(s, 0);
1111     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn));
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 void unallocated_encoding(DisasContext *s)
1116 {
1117     /* Unallocated and reserved encodings are uncategorized */
1118     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119 }
1120 
1121 /* Force a TB lookup after an instruction that changes the CPU state.  */
1122 void gen_lookup_tb(DisasContext *s)
1123 {
1124     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125     s->base.is_jmp = DISAS_EXIT;
1126 }
1127 
1128 static inline void gen_hlt(DisasContext *s, int imm)
1129 {
1130     /* HLT. This has two purposes.
1131      * Architecturally, it is an external halting debug instruction.
1132      * Since QEMU doesn't implement external debug, we treat this as
1133      * it is required for halting debug disabled: it will UNDEF.
1134      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136      * must trigger semihosting even for ARMv7 and earlier, where
1137      * HLT was an undefined encoding.
1138      * In system mode, we don't allow userspace access to
1139      * semihosting, to provide some semblance of security
1140      * (and for consistency with our 32-bit semihosting).
1141      */
1142     if (semihosting_enabled(s->current_el == 0) &&
1143         (imm == (s->thumb ? 0x3c : 0xf000))) {
1144         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145         return;
1146     }
1147 
1148     unallocated_encoding(s);
1149 }
1150 
1151 /*
1152  * Return the offset of a "full" NEON Dreg.
1153  */
1154 long neon_full_reg_offset(unsigned reg)
1155 {
1156     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157 }
1158 
1159 /*
1160  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161  * where 0 is the least significant end of the register.
1162  */
1163 long neon_element_offset(int reg, int element, MemOp memop)
1164 {
1165     int element_size = 1 << (memop & MO_SIZE);
1166     int ofs = element * element_size;
1167 #if HOST_BIG_ENDIAN
1168     /*
1169      * Calculate the offset assuming fully little-endian,
1170      * then XOR to account for the order of the 8-byte units.
1171      */
1172     if (element_size < 8) {
1173         ofs ^= 8 - element_size;
1174     }
1175 #endif
1176     return neon_full_reg_offset(reg) + ofs;
1177 }
1178 
1179 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180 long vfp_reg_offset(bool dp, unsigned reg)
1181 {
1182     if (dp) {
1183         return neon_element_offset(reg, 0, MO_64);
1184     } else {
1185         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186     }
1187 }
1188 
1189 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190 {
1191     long off = neon_element_offset(reg, ele, memop);
1192 
1193     switch (memop) {
1194     case MO_SB:
1195         tcg_gen_ld8s_i32(dest, tcg_env, off);
1196         break;
1197     case MO_UB:
1198         tcg_gen_ld8u_i32(dest, tcg_env, off);
1199         break;
1200     case MO_SW:
1201         tcg_gen_ld16s_i32(dest, tcg_env, off);
1202         break;
1203     case MO_UW:
1204         tcg_gen_ld16u_i32(dest, tcg_env, off);
1205         break;
1206     case MO_UL:
1207     case MO_SL:
1208         tcg_gen_ld_i32(dest, tcg_env, off);
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213 }
1214 
1215 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216 {
1217     long off = neon_element_offset(reg, ele, memop);
1218 
1219     switch (memop) {
1220     case MO_SL:
1221         tcg_gen_ld32s_i64(dest, tcg_env, off);
1222         break;
1223     case MO_UL:
1224         tcg_gen_ld32u_i64(dest, tcg_env, off);
1225         break;
1226     case MO_UQ:
1227         tcg_gen_ld_i64(dest, tcg_env, off);
1228         break;
1229     default:
1230         g_assert_not_reached();
1231     }
1232 }
1233 
1234 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_8:
1240         tcg_gen_st8_i32(src, tcg_env, off);
1241         break;
1242     case MO_16:
1243         tcg_gen_st16_i32(src, tcg_env, off);
1244         break;
1245     case MO_32:
1246         tcg_gen_st_i32(src, tcg_env, off);
1247         break;
1248     default:
1249         g_assert_not_reached();
1250     }
1251 }
1252 
1253 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254 {
1255     long off = neon_element_offset(reg, ele, memop);
1256 
1257     switch (memop) {
1258     case MO_32:
1259         tcg_gen_st32_i64(src, tcg_env, off);
1260         break;
1261     case MO_64:
1262         tcg_gen_st_i64(src, tcg_env, off);
1263         break;
1264     default:
1265         g_assert_not_reached();
1266     }
1267 }
1268 
1269 #define ARM_CP_RW_BIT   (1 << 20)
1270 
1271 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272 {
1273     tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274 }
1275 
1276 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277 {
1278     tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279 }
1280 
1281 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282 {
1283     TCGv_i32 var = tcg_temp_new_i32();
1284     tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285     return var;
1286 }
1287 
1288 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289 {
1290     tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291 }
1292 
1293 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294 {
1295     iwmmxt_store_reg(cpu_M0, rn);
1296 }
1297 
1298 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299 {
1300     iwmmxt_load_reg(cpu_M0, rn);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316 {
1317     iwmmxt_load_reg(cpu_V1, rn);
1318     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319 }
1320 
1321 #define IWMMXT_OP(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV(name) \
1329 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330 { \
1331     iwmmxt_load_reg(cpu_V1, rn); \
1332     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
1333 }
1334 
1335 #define IWMMXT_OP_ENV_SIZE(name) \
1336 IWMMXT_OP_ENV(name##b) \
1337 IWMMXT_OP_ENV(name##w) \
1338 IWMMXT_OP_ENV(name##l)
1339 
1340 #define IWMMXT_OP_ENV1(name) \
1341 static inline void gen_op_iwmmxt_##name##_M0(void) \
1342 { \
1343     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
1344 }
1345 
1346 IWMMXT_OP(maddsq)
1347 IWMMXT_OP(madduq)
1348 IWMMXT_OP(sadb)
1349 IWMMXT_OP(sadw)
1350 IWMMXT_OP(mulslw)
1351 IWMMXT_OP(mulshw)
1352 IWMMXT_OP(mululw)
1353 IWMMXT_OP(muluhw)
1354 IWMMXT_OP(macsw)
1355 IWMMXT_OP(macuw)
1356 
1357 IWMMXT_OP_ENV_SIZE(unpackl)
1358 IWMMXT_OP_ENV_SIZE(unpackh)
1359 
1360 IWMMXT_OP_ENV1(unpacklub)
1361 IWMMXT_OP_ENV1(unpackluw)
1362 IWMMXT_OP_ENV1(unpacklul)
1363 IWMMXT_OP_ENV1(unpackhub)
1364 IWMMXT_OP_ENV1(unpackhuw)
1365 IWMMXT_OP_ENV1(unpackhul)
1366 IWMMXT_OP_ENV1(unpacklsb)
1367 IWMMXT_OP_ENV1(unpacklsw)
1368 IWMMXT_OP_ENV1(unpacklsl)
1369 IWMMXT_OP_ENV1(unpackhsb)
1370 IWMMXT_OP_ENV1(unpackhsw)
1371 IWMMXT_OP_ENV1(unpackhsl)
1372 
1373 IWMMXT_OP_ENV_SIZE(cmpeq)
1374 IWMMXT_OP_ENV_SIZE(cmpgtu)
1375 IWMMXT_OP_ENV_SIZE(cmpgts)
1376 
1377 IWMMXT_OP_ENV_SIZE(mins)
1378 IWMMXT_OP_ENV_SIZE(minu)
1379 IWMMXT_OP_ENV_SIZE(maxs)
1380 IWMMXT_OP_ENV_SIZE(maxu)
1381 
1382 IWMMXT_OP_ENV_SIZE(subn)
1383 IWMMXT_OP_ENV_SIZE(addn)
1384 IWMMXT_OP_ENV_SIZE(subu)
1385 IWMMXT_OP_ENV_SIZE(addu)
1386 IWMMXT_OP_ENV_SIZE(subs)
1387 IWMMXT_OP_ENV_SIZE(adds)
1388 
1389 IWMMXT_OP_ENV(avgb0)
1390 IWMMXT_OP_ENV(avgb1)
1391 IWMMXT_OP_ENV(avgw0)
1392 IWMMXT_OP_ENV(avgw1)
1393 
1394 IWMMXT_OP_ENV(packuw)
1395 IWMMXT_OP_ENV(packul)
1396 IWMMXT_OP_ENV(packuq)
1397 IWMMXT_OP_ENV(packsw)
1398 IWMMXT_OP_ENV(packsl)
1399 IWMMXT_OP_ENV(packsq)
1400 
1401 static void gen_op_iwmmxt_set_mup(void)
1402 {
1403     TCGv_i32 tmp;
1404     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405     tcg_gen_ori_i32(tmp, tmp, 2);
1406     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407 }
1408 
1409 static void gen_op_iwmmxt_set_cup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 1);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_setpsr_nz(void)
1418 {
1419     TCGv_i32 tmp = tcg_temp_new_i32();
1420     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422 }
1423 
1424 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425 {
1426     iwmmxt_load_reg(cpu_V1, rn);
1427     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429 }
1430 
1431 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                      TCGv_i32 dest)
1433 {
1434     int rd;
1435     uint32_t offset;
1436     TCGv_i32 tmp;
1437 
1438     rd = (insn >> 16) & 0xf;
1439     tmp = load_reg(s, rd);
1440 
1441     offset = (insn & 0xff) << ((insn >> 7) & 2);
1442     if (insn & (1 << 24)) {
1443         /* Pre indexed */
1444         if (insn & (1 << 23))
1445             tcg_gen_addi_i32(tmp, tmp, offset);
1446         else
1447             tcg_gen_addi_i32(tmp, tmp, -offset);
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 21)) {
1450             store_reg(s, rd, tmp);
1451         }
1452     } else if (insn & (1 << 21)) {
1453         /* Post indexed */
1454         tcg_gen_mov_i32(dest, tmp);
1455         if (insn & (1 << 23))
1456             tcg_gen_addi_i32(tmp, tmp, offset);
1457         else
1458             tcg_gen_addi_i32(tmp, tmp, -offset);
1459         store_reg(s, rd, tmp);
1460     } else if (!(insn & (1 << 23)))
1461         return 1;
1462     return 0;
1463 }
1464 
1465 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466 {
1467     int rd = (insn >> 0) & 0xf;
1468     TCGv_i32 tmp;
1469 
1470     if (insn & (1 << 8)) {
1471         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472             return 1;
1473         } else {
1474             tmp = iwmmxt_load_creg(rd);
1475         }
1476     } else {
1477         tmp = tcg_temp_new_i32();
1478         iwmmxt_load_reg(cpu_V0, rd);
1479         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480     }
1481     tcg_gen_andi_i32(tmp, tmp, mask);
1482     tcg_gen_mov_i32(dest, tmp);
1483     return 0;
1484 }
1485 
1486 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487    (ie. an undefined instruction).  */
1488 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489 {
1490     int rd, wrd;
1491     int rdhi, rdlo, rd0, rd1, i;
1492     TCGv_i32 addr;
1493     TCGv_i32 tmp, tmp2, tmp3;
1494 
1495     if ((insn & 0x0e000e00) == 0x0c000000) {
1496         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497             wrd = insn & 0xf;
1498             rdlo = (insn >> 12) & 0xf;
1499             rdhi = (insn >> 16) & 0xf;
1500             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                 iwmmxt_load_reg(cpu_V0, wrd);
1502                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504             } else {                                    /* TMCRR */
1505                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                 iwmmxt_store_reg(cpu_V0, wrd);
1507                 gen_op_iwmmxt_set_mup();
1508             }
1509             return 0;
1510         }
1511 
1512         wrd = (insn >> 12) & 0xf;
1513         addr = tcg_temp_new_i32();
1514         if (gen_iwmmxt_address(s, insn, addr)) {
1515             return 1;
1516         }
1517         if (insn & ARM_CP_RW_BIT) {
1518             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                 tmp = tcg_temp_new_i32();
1520                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                 iwmmxt_store_creg(wrd, tmp);
1522             } else {
1523                 i = 1;
1524                 if (insn & (1 << 8)) {
1525                     if (insn & (1 << 22)) {             /* WLDRD */
1526                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                         i = 0;
1528                     } else {                            /* WLDRW wRd */
1529                         tmp = tcg_temp_new_i32();
1530                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 } else {
1533                     tmp = tcg_temp_new_i32();
1534                     if (insn & (1 << 22)) {             /* WLDRH */
1535                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                     } else {                            /* WLDRB */
1537                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                     }
1539                 }
1540                 if (i) {
1541                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                 }
1543                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1544             }
1545         } else {
1546             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                 tmp = iwmmxt_load_creg(wrd);
1548                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549             } else {
1550                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                 tmp = tcg_temp_new_i32();
1552                 if (insn & (1 << 8)) {
1553                     if (insn & (1 << 22)) {             /* WSTRD */
1554                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                     } else {                            /* WSTRW wRd */
1556                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                     }
1559                 } else {
1560                     if (insn & (1 << 22)) {             /* WSTRH */
1561                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                     } else {                            /* WSTRB */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 }
1568             }
1569         }
1570         return 0;
1571     }
1572 
1573     if ((insn & 0x0f000000) != 0x0e000000)
1574         return 1;
1575 
1576     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577     case 0x000:                                                 /* WOR */
1578         wrd = (insn >> 12) & 0xf;
1579         rd0 = (insn >> 0) & 0xf;
1580         rd1 = (insn >> 16) & 0xf;
1581         gen_op_iwmmxt_movq_M0_wRn(rd0);
1582         gen_op_iwmmxt_orq_M0_wRn(rd1);
1583         gen_op_iwmmxt_setpsr_nz();
1584         gen_op_iwmmxt_movq_wRn_M0(wrd);
1585         gen_op_iwmmxt_set_mup();
1586         gen_op_iwmmxt_set_cup();
1587         break;
1588     case 0x011:                                                 /* TMCR */
1589         if (insn & 0xf)
1590             return 1;
1591         rd = (insn >> 12) & 0xf;
1592         wrd = (insn >> 16) & 0xf;
1593         switch (wrd) {
1594         case ARM_IWMMXT_wCID:
1595         case ARM_IWMMXT_wCASF:
1596             break;
1597         case ARM_IWMMXT_wCon:
1598             gen_op_iwmmxt_set_cup();
1599             /* Fall through.  */
1600         case ARM_IWMMXT_wCSSF:
1601             tmp = iwmmxt_load_creg(wrd);
1602             tmp2 = load_reg(s, rd);
1603             tcg_gen_andc_i32(tmp, tmp, tmp2);
1604             iwmmxt_store_creg(wrd, tmp);
1605             break;
1606         case ARM_IWMMXT_wCGR0:
1607         case ARM_IWMMXT_wCGR1:
1608         case ARM_IWMMXT_wCGR2:
1609         case ARM_IWMMXT_wCGR3:
1610             gen_op_iwmmxt_set_cup();
1611             tmp = load_reg(s, rd);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         default:
1615             return 1;
1616         }
1617         break;
1618     case 0x100:                                                 /* WXOR */
1619         wrd = (insn >> 12) & 0xf;
1620         rd0 = (insn >> 0) & 0xf;
1621         rd1 = (insn >> 16) & 0xf;
1622         gen_op_iwmmxt_movq_M0_wRn(rd0);
1623         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624         gen_op_iwmmxt_setpsr_nz();
1625         gen_op_iwmmxt_movq_wRn_M0(wrd);
1626         gen_op_iwmmxt_set_mup();
1627         gen_op_iwmmxt_set_cup();
1628         break;
1629     case 0x111:                                                 /* TMRC */
1630         if (insn & 0xf)
1631             return 1;
1632         rd = (insn >> 12) & 0xf;
1633         wrd = (insn >> 16) & 0xf;
1634         tmp = iwmmxt_load_creg(wrd);
1635         store_reg(s, rd, tmp);
1636         break;
1637     case 0x300:                                                 /* WANDN */
1638         wrd = (insn >> 12) & 0xf;
1639         rd0 = (insn >> 0) & 0xf;
1640         rd1 = (insn >> 16) & 0xf;
1641         gen_op_iwmmxt_movq_M0_wRn(rd0);
1642         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643         gen_op_iwmmxt_andq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x200:                                                 /* WAND */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 0) & 0xf;
1652         rd1 = (insn >> 16) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         gen_op_iwmmxt_andq_M0_wRn(rd1);
1655         gen_op_iwmmxt_setpsr_nz();
1656         gen_op_iwmmxt_movq_wRn_M0(wrd);
1657         gen_op_iwmmxt_set_mup();
1658         gen_op_iwmmxt_set_cup();
1659         break;
1660     case 0x810: case 0xa10:                             /* WMADD */
1661         wrd = (insn >> 12) & 0xf;
1662         rd0 = (insn >> 0) & 0xf;
1663         rd1 = (insn >> 16) & 0xf;
1664         gen_op_iwmmxt_movq_M0_wRn(rd0);
1665         if (insn & (1 << 21))
1666             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667         else
1668             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669         gen_op_iwmmxt_movq_wRn_M0(wrd);
1670         gen_op_iwmmxt_set_mup();
1671         break;
1672     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673         wrd = (insn >> 12) & 0xf;
1674         rd0 = (insn >> 16) & 0xf;
1675         rd1 = (insn >> 0) & 0xf;
1676         gen_op_iwmmxt_movq_M0_wRn(rd0);
1677         switch ((insn >> 22) & 3) {
1678         case 0:
1679             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680             break;
1681         case 1:
1682             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683             break;
1684         case 2:
1685             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686             break;
1687         case 3:
1688             return 1;
1689         }
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 16) & 0xf;
1697         rd1 = (insn >> 0) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         switch ((insn >> 22) & 3) {
1700         case 0:
1701             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702             break;
1703         case 1:
1704             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705             break;
1706         case 2:
1707             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708             break;
1709         case 3:
1710             return 1;
1711         }
1712         gen_op_iwmmxt_movq_wRn_M0(wrd);
1713         gen_op_iwmmxt_set_mup();
1714         gen_op_iwmmxt_set_cup();
1715         break;
1716     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717         wrd = (insn >> 12) & 0xf;
1718         rd0 = (insn >> 16) & 0xf;
1719         rd1 = (insn >> 0) & 0xf;
1720         gen_op_iwmmxt_movq_M0_wRn(rd0);
1721         if (insn & (1 << 22))
1722             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723         else
1724             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725         if (!(insn & (1 << 20)))
1726             gen_op_iwmmxt_addl_M0_wRn(wrd);
1727         gen_op_iwmmxt_movq_wRn_M0(wrd);
1728         gen_op_iwmmxt_set_mup();
1729         break;
1730     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731         wrd = (insn >> 12) & 0xf;
1732         rd0 = (insn >> 16) & 0xf;
1733         rd1 = (insn >> 0) & 0xf;
1734         gen_op_iwmmxt_movq_M0_wRn(rd0);
1735         if (insn & (1 << 21)) {
1736             if (insn & (1 << 20))
1737                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738             else
1739                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740         } else {
1741             if (insn & (1 << 20))
1742                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743             else
1744                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745         }
1746         gen_op_iwmmxt_movq_wRn_M0(wrd);
1747         gen_op_iwmmxt_set_mup();
1748         break;
1749     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750         wrd = (insn >> 12) & 0xf;
1751         rd0 = (insn >> 16) & 0xf;
1752         rd1 = (insn >> 0) & 0xf;
1753         gen_op_iwmmxt_movq_M0_wRn(rd0);
1754         if (insn & (1 << 21))
1755             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756         else
1757             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758         if (!(insn & (1 << 20))) {
1759             iwmmxt_load_reg(cpu_V1, wrd);
1760             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761         }
1762         gen_op_iwmmxt_movq_wRn_M0(wrd);
1763         gen_op_iwmmxt_set_mup();
1764         break;
1765     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766         wrd = (insn >> 12) & 0xf;
1767         rd0 = (insn >> 16) & 0xf;
1768         rd1 = (insn >> 0) & 0xf;
1769         gen_op_iwmmxt_movq_M0_wRn(rd0);
1770         switch ((insn >> 22) & 3) {
1771         case 0:
1772             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773             break;
1774         case 1:
1775             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776             break;
1777         case 2:
1778             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779             break;
1780         case 3:
1781             return 1;
1782         }
1783         gen_op_iwmmxt_movq_wRn_M0(wrd);
1784         gen_op_iwmmxt_set_mup();
1785         gen_op_iwmmxt_set_cup();
1786         break;
1787     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 22)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813         tcg_gen_andi_i32(tmp, tmp, 7);
1814         iwmmxt_load_reg(cpu_V1, rd1);
1815         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816         gen_op_iwmmxt_movq_wRn_M0(wrd);
1817         gen_op_iwmmxt_set_mup();
1818         break;
1819     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820         if (((insn >> 6) & 3) == 3)
1821             return 1;
1822         rd = (insn >> 12) & 0xf;
1823         wrd = (insn >> 16) & 0xf;
1824         tmp = load_reg(s, rd);
1825         gen_op_iwmmxt_movq_M0_wRn(wrd);
1826         switch ((insn >> 6) & 3) {
1827         case 0:
1828             tmp2 = tcg_constant_i32(0xff);
1829             tmp3 = tcg_constant_i32((insn & 7) << 3);
1830             break;
1831         case 1:
1832             tmp2 = tcg_constant_i32(0xffff);
1833             tmp3 = tcg_constant_i32((insn & 3) << 4);
1834             break;
1835         case 2:
1836             tmp2 = tcg_constant_i32(0xffffffff);
1837             tmp3 = tcg_constant_i32((insn & 1) << 5);
1838             break;
1839         default:
1840             g_assert_not_reached();
1841         }
1842         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         break;
1846     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847         rd = (insn >> 12) & 0xf;
1848         wrd = (insn >> 16) & 0xf;
1849         if (rd == 15 || ((insn >> 22) & 3) == 3)
1850             return 1;
1851         gen_op_iwmmxt_movq_M0_wRn(wrd);
1852         tmp = tcg_temp_new_i32();
1853         switch ((insn >> 22) & 3) {
1854         case 0:
1855             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857             if (insn & 8) {
1858                 tcg_gen_ext8s_i32(tmp, tmp);
1859             } else {
1860                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1861             }
1862             break;
1863         case 1:
1864             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866             if (insn & 8) {
1867                 tcg_gen_ext16s_i32(tmp, tmp);
1868             } else {
1869                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870             }
1871             break;
1872         case 2:
1873             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875             break;
1876         }
1877         store_reg(s, rd, tmp);
1878         break;
1879     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881             return 1;
1882         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883         switch ((insn >> 22) & 3) {
1884         case 0:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886             break;
1887         case 1:
1888             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889             break;
1890         case 2:
1891             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892             break;
1893         }
1894         tcg_gen_shli_i32(tmp, tmp, 28);
1895         gen_set_nzcv(tmp);
1896         break;
1897     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898         if (((insn >> 6) & 3) == 3)
1899             return 1;
1900         rd = (insn >> 12) & 0xf;
1901         wrd = (insn >> 16) & 0xf;
1902         tmp = load_reg(s, rd);
1903         switch ((insn >> 6) & 3) {
1904         case 0:
1905             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906             break;
1907         case 1:
1908             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909             break;
1910         case 2:
1911             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912             break;
1913         }
1914         gen_op_iwmmxt_movq_wRn_M0(wrd);
1915         gen_op_iwmmxt_set_mup();
1916         break;
1917     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919             return 1;
1920         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921         tmp2 = tcg_temp_new_i32();
1922         tcg_gen_mov_i32(tmp2, tmp);
1923         switch ((insn >> 22) & 3) {
1924         case 0:
1925             for (i = 0; i < 7; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 1:
1931             for (i = 0; i < 3; i ++) {
1932                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                 tcg_gen_and_i32(tmp, tmp, tmp2);
1934             }
1935             break;
1936         case 2:
1937             tcg_gen_shli_i32(tmp2, tmp2, 16);
1938             tcg_gen_and_i32(tmp, tmp, tmp2);
1939             break;
1940         }
1941         gen_set_nzcv(tmp);
1942         break;
1943     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944         wrd = (insn >> 12) & 0xf;
1945         rd0 = (insn >> 16) & 0xf;
1946         gen_op_iwmmxt_movq_M0_wRn(rd0);
1947         switch ((insn >> 22) & 3) {
1948         case 0:
1949             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950             break;
1951         case 1:
1952             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953             break;
1954         case 2:
1955             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956             break;
1957         case 3:
1958             return 1;
1959         }
1960         gen_op_iwmmxt_movq_wRn_M0(wrd);
1961         gen_op_iwmmxt_set_mup();
1962         break;
1963     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965             return 1;
1966         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967         tmp2 = tcg_temp_new_i32();
1968         tcg_gen_mov_i32(tmp2, tmp);
1969         switch ((insn >> 22) & 3) {
1970         case 0:
1971             for (i = 0; i < 7; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 1:
1977             for (i = 0; i < 3; i ++) {
1978                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                 tcg_gen_or_i32(tmp, tmp, tmp2);
1980             }
1981             break;
1982         case 2:
1983             tcg_gen_shli_i32(tmp2, tmp2, 16);
1984             tcg_gen_or_i32(tmp, tmp, tmp2);
1985             break;
1986         }
1987         gen_set_nzcv(tmp);
1988         break;
1989     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990         rd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993             return 1;
1994         gen_op_iwmmxt_movq_M0_wRn(rd0);
1995         tmp = tcg_temp_new_i32();
1996         switch ((insn >> 22) & 3) {
1997         case 0:
1998             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999             break;
2000         case 1:
2001             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002             break;
2003         case 2:
2004             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005             break;
2006         }
2007         store_reg(s, rd, tmp);
2008         break;
2009     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011         wrd = (insn >> 12) & 0xf;
2012         rd0 = (insn >> 16) & 0xf;
2013         rd1 = (insn >> 0) & 0xf;
2014         gen_op_iwmmxt_movq_M0_wRn(rd0);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021             break;
2022         case 1:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027             break;
2028         case 2:
2029             if (insn & (1 << 21))
2030                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031             else
2032                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033             break;
2034         case 3:
2035             return 1;
2036         }
2037         gen_op_iwmmxt_movq_wRn_M0(wrd);
2038         gen_op_iwmmxt_set_mup();
2039         gen_op_iwmmxt_set_cup();
2040         break;
2041     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043         wrd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsb_M0();
2050             else
2051                 gen_op_iwmmxt_unpacklub_M0();
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsw_M0();
2056             else
2057                 gen_op_iwmmxt_unpackluw_M0();
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_unpacklsl_M0();
2062             else
2063                 gen_op_iwmmxt_unpacklul_M0();
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhuw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpackhsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpackhul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104     case 0x214: case 0x614: case 0xa14: case 0xe14:
2105         if (((insn >> 22) & 3) == 0)
2106             return 1;
2107         wrd = (insn >> 12) & 0xf;
2108         rd0 = (insn >> 16) & 0xf;
2109         gen_op_iwmmxt_movq_M0_wRn(rd0);
2110         tmp = tcg_temp_new_i32();
2111         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112             return 1;
2113         }
2114         switch ((insn >> 22) & 3) {
2115         case 1:
2116             gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
2117             break;
2118         case 2:
2119             gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
2120             break;
2121         case 3:
2122             gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
2123             break;
2124         }
2125         gen_op_iwmmxt_movq_wRn_M0(wrd);
2126         gen_op_iwmmxt_set_mup();
2127         gen_op_iwmmxt_set_cup();
2128         break;
2129     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130     case 0x014: case 0x414: case 0x814: case 0xc14:
2131         if (((insn >> 22) & 3) == 0)
2132             return 1;
2133         wrd = (insn >> 12) & 0xf;
2134         rd0 = (insn >> 16) & 0xf;
2135         gen_op_iwmmxt_movq_M0_wRn(rd0);
2136         tmp = tcg_temp_new_i32();
2137         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138             return 1;
2139         }
2140         switch ((insn >> 22) & 3) {
2141         case 1:
2142             gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
2143             break;
2144         case 2:
2145             gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
2146             break;
2147         case 3:
2148             gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
2149             break;
2150         }
2151         gen_op_iwmmxt_movq_wRn_M0(wrd);
2152         gen_op_iwmmxt_set_mup();
2153         gen_op_iwmmxt_set_cup();
2154         break;
2155     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156     case 0x114: case 0x514: case 0x914: case 0xd14:
2157         if (((insn >> 22) & 3) == 0)
2158             return 1;
2159         wrd = (insn >> 12) & 0xf;
2160         rd0 = (insn >> 16) & 0xf;
2161         gen_op_iwmmxt_movq_M0_wRn(rd0);
2162         tmp = tcg_temp_new_i32();
2163         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164             return 1;
2165         }
2166         switch ((insn >> 22) & 3) {
2167         case 1:
2168             gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
2169             break;
2170         case 2:
2171             gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
2172             break;
2173         case 3:
2174             gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
2175             break;
2176         }
2177         gen_op_iwmmxt_movq_wRn_M0(wrd);
2178         gen_op_iwmmxt_set_mup();
2179         gen_op_iwmmxt_set_cup();
2180         break;
2181     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182     case 0x314: case 0x714: case 0xb14: case 0xf14:
2183         if (((insn >> 22) & 3) == 0)
2184             return 1;
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         gen_op_iwmmxt_movq_M0_wRn(rd0);
2188         tmp = tcg_temp_new_i32();
2189         switch ((insn >> 22) & 3) {
2190         case 1:
2191             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
2195             break;
2196         case 2:
2197             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
2201             break;
2202         case 3:
2203             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                 return 1;
2205             }
2206             gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
2207             break;
2208         }
2209         gen_op_iwmmxt_movq_wRn_M0(wrd);
2210         gen_op_iwmmxt_set_mup();
2211         gen_op_iwmmxt_set_cup();
2212         break;
2213     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215         wrd = (insn >> 12) & 0xf;
2216         rd0 = (insn >> 16) & 0xf;
2217         rd1 = (insn >> 0) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         switch ((insn >> 22) & 3) {
2220         case 0:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2225             break;
2226         case 1:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231             break;
2232         case 2:
2233             if (insn & (1 << 21))
2234                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235             else
2236                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2237             break;
2238         case 3:
2239             return 1;
2240         }
2241         gen_op_iwmmxt_movq_wRn_M0(wrd);
2242         gen_op_iwmmxt_set_mup();
2243         break;
2244     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246         wrd = (insn >> 12) & 0xf;
2247         rd0 = (insn >> 16) & 0xf;
2248         rd1 = (insn >> 0) & 0xf;
2249         gen_op_iwmmxt_movq_M0_wRn(rd0);
2250         switch ((insn >> 22) & 3) {
2251         case 0:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256             break;
2257         case 1:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262             break;
2263         case 2:
2264             if (insn & (1 << 21))
2265                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266             else
2267                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268             break;
2269         case 3:
2270             return 1;
2271         }
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         break;
2275     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276     case 0x402: case 0x502: case 0x602: case 0x702:
2277         wrd = (insn >> 12) & 0xf;
2278         rd0 = (insn >> 16) & 0xf;
2279         rd1 = (insn >> 0) & 0xf;
2280         gen_op_iwmmxt_movq_M0_wRn(rd0);
2281         iwmmxt_load_reg(cpu_V1, rd1);
2282         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                 tcg_constant_i32((insn >> 20) & 3));
2284         gen_op_iwmmxt_movq_wRn_M0(wrd);
2285         gen_op_iwmmxt_set_mup();
2286         break;
2287     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291         wrd = (insn >> 12) & 0xf;
2292         rd0 = (insn >> 16) & 0xf;
2293         rd1 = (insn >> 0) & 0xf;
2294         gen_op_iwmmxt_movq_M0_wRn(rd0);
2295         switch ((insn >> 20) & 0xf) {
2296         case 0x0:
2297             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298             break;
2299         case 0x1:
2300             gen_op_iwmmxt_subub_M0_wRn(rd1);
2301             break;
2302         case 0x3:
2303             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304             break;
2305         case 0x4:
2306             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307             break;
2308         case 0x5:
2309             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310             break;
2311         case 0x7:
2312             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313             break;
2314         case 0x8:
2315             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316             break;
2317         case 0x9:
2318             gen_op_iwmmxt_subul_M0_wRn(rd1);
2319             break;
2320         case 0xb:
2321             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322             break;
2323         default:
2324             return 1;
2325         }
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         gen_op_iwmmxt_set_cup();
2329         break;
2330     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338         gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
2339         gen_op_iwmmxt_movq_wRn_M0(wrd);
2340         gen_op_iwmmxt_set_mup();
2341         gen_op_iwmmxt_set_cup();
2342         break;
2343     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344     case 0x418: case 0x518: case 0x618: case 0x718:
2345     case 0x818: case 0x918: case 0xa18: case 0xb18:
2346     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347         wrd = (insn >> 12) & 0xf;
2348         rd0 = (insn >> 16) & 0xf;
2349         rd1 = (insn >> 0) & 0xf;
2350         gen_op_iwmmxt_movq_M0_wRn(rd0);
2351         switch ((insn >> 20) & 0xf) {
2352         case 0x0:
2353             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354             break;
2355         case 0x1:
2356             gen_op_iwmmxt_addub_M0_wRn(rd1);
2357             break;
2358         case 0x3:
2359             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360             break;
2361         case 0x4:
2362             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363             break;
2364         case 0x5:
2365             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366             break;
2367         case 0x7:
2368             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369             break;
2370         case 0x8:
2371             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372             break;
2373         case 0x9:
2374             gen_op_iwmmxt_addul_M0_wRn(rd1);
2375             break;
2376         case 0xb:
2377             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378             break;
2379         default:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387     case 0x408: case 0x508: case 0x608: case 0x708:
2388     case 0x808: case 0x908: case 0xa08: case 0xb08:
2389     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391             return 1;
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         rd1 = (insn >> 0) & 0xf;
2395         gen_op_iwmmxt_movq_M0_wRn(rd0);
2396         switch ((insn >> 22) & 3) {
2397         case 1:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402             break;
2403         case 2:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2408             break;
2409         case 3:
2410             if (insn & (1 << 21))
2411                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412             else
2413                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414             break;
2415         }
2416         gen_op_iwmmxt_movq_wRn_M0(wrd);
2417         gen_op_iwmmxt_set_mup();
2418         gen_op_iwmmxt_set_cup();
2419         break;
2420     case 0x201: case 0x203: case 0x205: case 0x207:
2421     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422     case 0x211: case 0x213: case 0x215: case 0x217:
2423     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424         wrd = (insn >> 5) & 0xf;
2425         rd0 = (insn >> 12) & 0xf;
2426         rd1 = (insn >> 0) & 0xf;
2427         if (rd0 == 0xf || rd1 == 0xf)
2428             return 1;
2429         gen_op_iwmmxt_movq_M0_wRn(wrd);
2430         tmp = load_reg(s, rd0);
2431         tmp2 = load_reg(s, rd1);
2432         switch ((insn >> 16) & 0xf) {
2433         case 0x0:                                       /* TMIA */
2434             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435             break;
2436         case 0x8:                                       /* TMIAPH */
2437             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440             if (insn & (1 << 16))
2441                 tcg_gen_shri_i32(tmp, tmp, 16);
2442             if (insn & (1 << 17))
2443                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2444             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445             break;
2446         default:
2447             return 1;
2448         }
2449         gen_op_iwmmxt_movq_wRn_M0(wrd);
2450         gen_op_iwmmxt_set_mup();
2451         break;
2452     default:
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460    (ie. an undefined instruction).  */
2461 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462 {
2463     int acc, rd0, rd1, rdhi, rdlo;
2464     TCGv_i32 tmp, tmp2;
2465 
2466     if ((insn & 0x0ff00f10) == 0x0e200010) {
2467         /* Multiply with Internal Accumulate Format */
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = insn & 0xf;
2470         acc = (insn >> 5) & 7;
2471 
2472         if (acc != 0)
2473             return 1;
2474 
2475         tmp = load_reg(s, rd0);
2476         tmp2 = load_reg(s, rd1);
2477         switch ((insn >> 16) & 0xf) {
2478         case 0x0:                                       /* MIA */
2479             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480             break;
2481         case 0x8:                                       /* MIAPH */
2482             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483             break;
2484         case 0xc:                                       /* MIABB */
2485         case 0xd:                                       /* MIABT */
2486         case 0xe:                                       /* MIATB */
2487         case 0xf:                                       /* MIATT */
2488             if (insn & (1 << 16))
2489                 tcg_gen_shri_i32(tmp, tmp, 16);
2490             if (insn & (1 << 17))
2491                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2492             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         default:
2495             return 1;
2496         }
2497 
2498         gen_op_iwmmxt_movq_wRn_M0(acc);
2499         return 0;
2500     }
2501 
2502     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503         /* Internal Accumulator Access Format */
2504         rdhi = (insn >> 16) & 0xf;
2505         rdlo = (insn >> 12) & 0xf;
2506         acc = insn & 7;
2507 
2508         if (acc != 0)
2509             return 1;
2510 
2511         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512             iwmmxt_load_reg(cpu_V0, acc);
2513             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516         } else {                                        /* MAR */
2517             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518             iwmmxt_store_reg(cpu_V0, acc);
2519         }
2520         return 0;
2521     }
2522 
2523     return 1;
2524 }
2525 
2526 static void gen_goto_ptr(void)
2527 {
2528     tcg_gen_lookup_and_goto_ptr();
2529 }
2530 
2531 /* This will end the TB but doesn't guarantee we'll return to
2532  * cpu_loop_exec. Any live exit_requests will be processed as we
2533  * enter the next TB.
2534  */
2535 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536 {
2537     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538         /*
2539          * For pcrel, the pc must always be up-to-date on entry to
2540          * the linked TB, so that it can use simple additions for all
2541          * further adjustments.  For !pcrel, the linked TB is compiled
2542          * to know its full virtual address, so we can delay the
2543          * update to pc to the unlinked path.  A long chain of links
2544          * can thus avoid many updates to the PC.
2545          */
2546         if (tb_cflags(s->base.tb) & CF_PCREL) {
2547             gen_update_pc(s, diff);
2548             tcg_gen_goto_tb(n);
2549         } else {
2550             tcg_gen_goto_tb(n);
2551             gen_update_pc(s, diff);
2552         }
2553         tcg_gen_exit_tb(s->base.tb, n);
2554     } else {
2555         gen_update_pc(s, diff);
2556         gen_goto_ptr();
2557     }
2558     s->base.is_jmp = DISAS_NORETURN;
2559 }
2560 
2561 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2562 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563 {
2564     if (unlikely(s->ss_active)) {
2565         /* An indirect jump so that we still trigger the debug exception.  */
2566         gen_update_pc(s, diff);
2567         s->base.is_jmp = DISAS_JUMP;
2568         return;
2569     }
2570     switch (s->base.is_jmp) {
2571     case DISAS_NEXT:
2572     case DISAS_TOO_MANY:
2573     case DISAS_NORETURN:
2574         /*
2575          * The normal case: just go to the destination TB.
2576          * NB: NORETURN happens if we generate code like
2577          *    gen_brcondi(l);
2578          *    gen_jmp();
2579          *    gen_set_label(l);
2580          *    gen_jmp();
2581          * on the second call to gen_jmp().
2582          */
2583         gen_goto_tb(s, tbno, diff);
2584         break;
2585     case DISAS_UPDATE_NOCHAIN:
2586     case DISAS_UPDATE_EXIT:
2587         /*
2588          * We already decided we're leaving the TB for some other reason.
2589          * Avoid using goto_tb so we really do exit back to the main loop
2590          * and don't chain to another TB.
2591          */
2592         gen_update_pc(s, diff);
2593         gen_goto_ptr();
2594         s->base.is_jmp = DISAS_NORETURN;
2595         break;
2596     default:
2597         /*
2598          * We shouldn't be emitting code for a jump and also have
2599          * is_jmp set to one of the special cases like DISAS_SWI.
2600          */
2601         g_assert_not_reached();
2602     }
2603 }
2604 
2605 static inline void gen_jmp(DisasContext *s, target_long diff)
2606 {
2607     gen_jmp_tb(s, diff, 0);
2608 }
2609 
2610 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611 {
2612     if (x)
2613         tcg_gen_sari_i32(t0, t0, 16);
2614     else
2615         gen_sxth(t0);
2616     if (y)
2617         tcg_gen_sari_i32(t1, t1, 16);
2618     else
2619         gen_sxth(t1);
2620     tcg_gen_mul_i32(t0, t0, t1);
2621 }
2622 
2623 /* Return the mask of PSR bits set by a MSR instruction.  */
2624 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625 {
2626     uint32_t mask = 0;
2627 
2628     if (flags & (1 << 0)) {
2629         mask |= 0xff;
2630     }
2631     if (flags & (1 << 1)) {
2632         mask |= 0xff00;
2633     }
2634     if (flags & (1 << 2)) {
2635         mask |= 0xff0000;
2636     }
2637     if (flags & (1 << 3)) {
2638         mask |= 0xff000000;
2639     }
2640 
2641     /* Mask out undefined and reserved bits.  */
2642     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643 
2644     /* Mask out execution state.  */
2645     if (!spsr) {
2646         mask &= ~CPSR_EXEC;
2647     }
2648 
2649     /* Mask out privileged bits.  */
2650     if (IS_USER(s)) {
2651         mask &= CPSR_USER;
2652     }
2653     return mask;
2654 }
2655 
2656 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658 {
2659     TCGv_i32 tmp;
2660     if (spsr) {
2661         /* ??? This is also undefined in system mode.  */
2662         if (IS_USER(s))
2663             return 1;
2664 
2665         tmp = load_cpu_field(spsr);
2666         tcg_gen_andi_i32(tmp, tmp, ~mask);
2667         tcg_gen_andi_i32(t0, t0, mask);
2668         tcg_gen_or_i32(tmp, tmp, t0);
2669         store_cpu_field(tmp, spsr);
2670     } else {
2671         gen_set_cpsr(t0, mask);
2672     }
2673     gen_lookup_tb(s);
2674     return 0;
2675 }
2676 
2677 /* Returns nonzero if access to the PSR is not permitted.  */
2678 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679 {
2680     TCGv_i32 tmp;
2681     tmp = tcg_temp_new_i32();
2682     tcg_gen_movi_i32(tmp, val);
2683     return gen_set_psr(s, mask, spsr, tmp);
2684 }
2685 
2686 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                      int *tgtmode, int *regno)
2688 {
2689     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690      * the target mode and register number, and identify the various
2691      * unpredictable cases.
2692      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693      *  + executed in user mode
2694      *  + using R15 as the src/dest register
2695      *  + accessing an unimplemented register
2696      *  + accessing a register that's inaccessible at current PL/security state*
2697      *  + accessing a register that you could access with a different insn
2698      * We choose to UNDEF in all these cases.
2699      * Since we don't know which of the various AArch32 modes we are in
2700      * we have to defer some checks to runtime.
2701      * Accesses to Monitor mode registers from Secure EL1 (which implies
2702      * that EL3 is AArch64) must trap to EL3.
2703      *
2704      * If the access checks fail this function will emit code to take
2705      * an exception and return false. Otherwise it will return true,
2706      * and set *tgtmode and *regno appropriately.
2707      */
2708     /* These instructions are present only in ARMv8, or in ARMv7 with the
2709      * Virtualization Extensions.
2710      */
2711     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713         goto undef;
2714     }
2715 
2716     if (IS_USER(s) || rn == 15) {
2717         goto undef;
2718     }
2719 
2720     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721      * of registers into (r, sysm).
2722      */
2723     if (r) {
2724         /* SPSRs for other modes */
2725         switch (sysm) {
2726         case 0xe: /* SPSR_fiq */
2727             *tgtmode = ARM_CPU_MODE_FIQ;
2728             break;
2729         case 0x10: /* SPSR_irq */
2730             *tgtmode = ARM_CPU_MODE_IRQ;
2731             break;
2732         case 0x12: /* SPSR_svc */
2733             *tgtmode = ARM_CPU_MODE_SVC;
2734             break;
2735         case 0x14: /* SPSR_abt */
2736             *tgtmode = ARM_CPU_MODE_ABT;
2737             break;
2738         case 0x16: /* SPSR_und */
2739             *tgtmode = ARM_CPU_MODE_UND;
2740             break;
2741         case 0x1c: /* SPSR_mon */
2742             *tgtmode = ARM_CPU_MODE_MON;
2743             break;
2744         case 0x1e: /* SPSR_hyp */
2745             *tgtmode = ARM_CPU_MODE_HYP;
2746             break;
2747         default: /* unallocated */
2748             goto undef;
2749         }
2750         /* We arbitrarily assign SPSR a register number of 16. */
2751         *regno = 16;
2752     } else {
2753         /* general purpose registers for other modes */
2754         switch (sysm) {
2755         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756             *tgtmode = ARM_CPU_MODE_USR;
2757             *regno = sysm + 8;
2758             break;
2759         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760             *tgtmode = ARM_CPU_MODE_FIQ;
2761             *regno = sysm;
2762             break;
2763         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764             *tgtmode = ARM_CPU_MODE_IRQ;
2765             *regno = sysm & 1 ? 13 : 14;
2766             break;
2767         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768             *tgtmode = ARM_CPU_MODE_SVC;
2769             *regno = sysm & 1 ? 13 : 14;
2770             break;
2771         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772             *tgtmode = ARM_CPU_MODE_ABT;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776             *tgtmode = ARM_CPU_MODE_UND;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780             *tgtmode = ARM_CPU_MODE_MON;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784             *tgtmode = ARM_CPU_MODE_HYP;
2785             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786             *regno = sysm & 1 ? 13 : 17;
2787             break;
2788         default: /* unallocated */
2789             goto undef;
2790         }
2791     }
2792 
2793     /* Catch the 'accessing inaccessible register' cases we can detect
2794      * at translate time.
2795      */
2796     switch (*tgtmode) {
2797     case ARM_CPU_MODE_MON:
2798         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799             goto undef;
2800         }
2801         if (s->current_el == 1) {
2802             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803              * then accesses to Mon registers trap to Secure EL2, if it exists,
2804              * otherwise EL3.
2805              */
2806             TCGv_i32 tcg_el;
2807 
2808             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                 dc_isar_feature(aa64_sel2, s)) {
2810                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814             } else {
2815                 tcg_el = tcg_constant_i32(3);
2816             }
2817 
2818             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                     syn_uncategorized(), tcg_el);
2820             return false;
2821         }
2822         break;
2823     case ARM_CPU_MODE_HYP:
2824         /*
2825          * r13_hyp can only be accessed from Monitor mode, and so we
2826          * can forbid accesses from EL2 or below.
2827          * elr_hyp can be accessed also from Hyp mode, so forbid
2828          * accesses from EL0 or EL1.
2829          * SPSR_hyp is supposed to be in the same category as r13_hyp
2830          * and UNPREDICTABLE if accessed from anything except Monitor
2831          * mode. However there is some real-world code that will do
2832          * it because at least some hardware happens to permit the
2833          * access. (Notably a standard Cortex-R52 startup code fragment
2834          * does this.) So we permit SPSR_hyp from Hyp mode also, to allow
2835          * this (incorrect) guest code to run.
2836          */
2837         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2
2838             || (s->current_el < 3 && *regno != 16 && *regno != 17)) {
2839             goto undef;
2840         }
2841         break;
2842     default:
2843         break;
2844     }
2845 
2846     return true;
2847 
2848 undef:
2849     /* If we get here then some access check did not pass */
2850     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2851     return false;
2852 }
2853 
2854 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2855 {
2856     TCGv_i32 tcg_reg;
2857     int tgtmode = 0, regno = 0;
2858 
2859     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2860         return;
2861     }
2862 
2863     /* Sync state because msr_banked() can raise exceptions */
2864     gen_set_condexec(s);
2865     gen_update_pc(s, 0);
2866     tcg_reg = load_reg(s, rn);
2867     gen_helper_msr_banked(tcg_env, tcg_reg,
2868                           tcg_constant_i32(tgtmode),
2869                           tcg_constant_i32(regno));
2870     s->base.is_jmp = DISAS_UPDATE_EXIT;
2871 }
2872 
2873 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2874 {
2875     TCGv_i32 tcg_reg;
2876     int tgtmode = 0, regno = 0;
2877 
2878     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2879         return;
2880     }
2881 
2882     /* Sync state because mrs_banked() can raise exceptions */
2883     gen_set_condexec(s);
2884     gen_update_pc(s, 0);
2885     tcg_reg = tcg_temp_new_i32();
2886     gen_helper_mrs_banked(tcg_reg, tcg_env,
2887                           tcg_constant_i32(tgtmode),
2888                           tcg_constant_i32(regno));
2889     store_reg(s, rn, tcg_reg);
2890     s->base.is_jmp = DISAS_UPDATE_EXIT;
2891 }
2892 
2893 /* Store value to PC as for an exception return (ie don't
2894  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2895  * will do the masking based on the new value of the Thumb bit.
2896  */
2897 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2898 {
2899     tcg_gen_mov_i32(cpu_R[15], pc);
2900 }
2901 
2902 /* Generate a v6 exception return.  Marks both values as dead.  */
2903 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2904 {
2905     store_pc_exc_ret(s, pc);
2906     /* The cpsr_write_eret helper will mask the low bits of PC
2907      * appropriately depending on the new Thumb bit, so it must
2908      * be called after storing the new PC.
2909      */
2910     translator_io_start(&s->base);
2911     gen_helper_cpsr_write_eret(tcg_env, cpsr);
2912     /* Must exit loop to check un-masked IRQs */
2913     s->base.is_jmp = DISAS_EXIT;
2914 }
2915 
2916 /* Generate an old-style exception return. Marks pc as dead. */
2917 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2918 {
2919     gen_rfe(s, pc, load_cpu_field(spsr));
2920 }
2921 
2922 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2923                             uint32_t opr_sz, uint32_t max_sz,
2924                             gen_helper_gvec_3_ptr *fn)
2925 {
2926     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2927 
2928     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
2929     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2930                        opr_sz, max_sz, 0, fn);
2931 }
2932 
2933 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2934                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2935 {
2936     static gen_helper_gvec_3_ptr * const fns[2] = {
2937         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2938     };
2939     tcg_debug_assert(vece >= 1 && vece <= 2);
2940     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2941 }
2942 
2943 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2944                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2945 {
2946     static gen_helper_gvec_3_ptr * const fns[2] = {
2947         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2948     };
2949     tcg_debug_assert(vece >= 1 && vece <= 2);
2950     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2951 }
2952 
2953 #define GEN_CMP0(NAME, COND)                              \
2954     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
2955               uint32_t opr_sz, uint32_t max_sz)           \
2956     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
2957 
2958 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
2959 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
2960 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
2961 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
2962 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
2963 
2964 #undef GEN_CMP0
2965 
2966 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2967 {
2968     tcg_gen_vec_sar8i_i64(a, a, shift);
2969     tcg_gen_vec_add8_i64(d, d, a);
2970 }
2971 
2972 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2973 {
2974     tcg_gen_vec_sar16i_i64(a, a, shift);
2975     tcg_gen_vec_add16_i64(d, d, a);
2976 }
2977 
2978 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2979 {
2980     tcg_gen_sari_i32(a, a, shift);
2981     tcg_gen_add_i32(d, d, a);
2982 }
2983 
2984 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2985 {
2986     tcg_gen_sari_i64(a, a, shift);
2987     tcg_gen_add_i64(d, d, a);
2988 }
2989 
2990 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2991 {
2992     tcg_gen_sari_vec(vece, a, a, sh);
2993     tcg_gen_add_vec(vece, d, d, a);
2994 }
2995 
2996 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2997                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2998 {
2999     static const TCGOpcode vecop_list[] = {
3000         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3001     };
3002     static const GVecGen2i ops[4] = {
3003         { .fni8 = gen_ssra8_i64,
3004           .fniv = gen_ssra_vec,
3005           .fno = gen_helper_gvec_ssra_b,
3006           .load_dest = true,
3007           .opt_opc = vecop_list,
3008           .vece = MO_8 },
3009         { .fni8 = gen_ssra16_i64,
3010           .fniv = gen_ssra_vec,
3011           .fno = gen_helper_gvec_ssra_h,
3012           .load_dest = true,
3013           .opt_opc = vecop_list,
3014           .vece = MO_16 },
3015         { .fni4 = gen_ssra32_i32,
3016           .fniv = gen_ssra_vec,
3017           .fno = gen_helper_gvec_ssra_s,
3018           .load_dest = true,
3019           .opt_opc = vecop_list,
3020           .vece = MO_32 },
3021         { .fni8 = gen_ssra64_i64,
3022           .fniv = gen_ssra_vec,
3023           .fno = gen_helper_gvec_ssra_d,
3024           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3025           .opt_opc = vecop_list,
3026           .load_dest = true,
3027           .vece = MO_64 },
3028     };
3029 
3030     /* tszimm encoding produces immediates in the range [1..esize]. */
3031     tcg_debug_assert(shift > 0);
3032     tcg_debug_assert(shift <= (8 << vece));
3033 
3034     /*
3035      * Shifts larger than the element size are architecturally valid.
3036      * Signed results in all sign bits.
3037      */
3038     shift = MIN(shift, (8 << vece) - 1);
3039     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3040 }
3041 
3042 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3043 {
3044     tcg_gen_vec_shr8i_i64(a, a, shift);
3045     tcg_gen_vec_add8_i64(d, d, a);
3046 }
3047 
3048 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3049 {
3050     tcg_gen_vec_shr16i_i64(a, a, shift);
3051     tcg_gen_vec_add16_i64(d, d, a);
3052 }
3053 
3054 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3055 {
3056     tcg_gen_shri_i32(a, a, shift);
3057     tcg_gen_add_i32(d, d, a);
3058 }
3059 
3060 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3061 {
3062     tcg_gen_shri_i64(a, a, shift);
3063     tcg_gen_add_i64(d, d, a);
3064 }
3065 
3066 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3067 {
3068     tcg_gen_shri_vec(vece, a, a, sh);
3069     tcg_gen_add_vec(vece, d, d, a);
3070 }
3071 
3072 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3073                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3074 {
3075     static const TCGOpcode vecop_list[] = {
3076         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3077     };
3078     static const GVecGen2i ops[4] = {
3079         { .fni8 = gen_usra8_i64,
3080           .fniv = gen_usra_vec,
3081           .fno = gen_helper_gvec_usra_b,
3082           .load_dest = true,
3083           .opt_opc = vecop_list,
3084           .vece = MO_8, },
3085         { .fni8 = gen_usra16_i64,
3086           .fniv = gen_usra_vec,
3087           .fno = gen_helper_gvec_usra_h,
3088           .load_dest = true,
3089           .opt_opc = vecop_list,
3090           .vece = MO_16, },
3091         { .fni4 = gen_usra32_i32,
3092           .fniv = gen_usra_vec,
3093           .fno = gen_helper_gvec_usra_s,
3094           .load_dest = true,
3095           .opt_opc = vecop_list,
3096           .vece = MO_32, },
3097         { .fni8 = gen_usra64_i64,
3098           .fniv = gen_usra_vec,
3099           .fno = gen_helper_gvec_usra_d,
3100           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3101           .load_dest = true,
3102           .opt_opc = vecop_list,
3103           .vece = MO_64, },
3104     };
3105 
3106     /* tszimm encoding produces immediates in the range [1..esize]. */
3107     tcg_debug_assert(shift > 0);
3108     tcg_debug_assert(shift <= (8 << vece));
3109 
3110     /*
3111      * Shifts larger than the element size are architecturally valid.
3112      * Unsigned results in all zeros as input to accumulate: nop.
3113      */
3114     if (shift < (8 << vece)) {
3115         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3116     } else {
3117         /* Nop, but we do need to clear the tail. */
3118         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3119     }
3120 }
3121 
3122 /*
3123  * Shift one less than the requested amount, and the low bit is
3124  * the rounding bit.  For the 8 and 16-bit operations, because we
3125  * mask the low bit, we can perform a normal integer shift instead
3126  * of a vector shift.
3127  */
3128 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3129 {
3130     TCGv_i64 t = tcg_temp_new_i64();
3131 
3132     tcg_gen_shri_i64(t, a, sh - 1);
3133     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3134     tcg_gen_vec_sar8i_i64(d, a, sh);
3135     tcg_gen_vec_add8_i64(d, d, t);
3136 }
3137 
3138 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3139 {
3140     TCGv_i64 t = tcg_temp_new_i64();
3141 
3142     tcg_gen_shri_i64(t, a, sh - 1);
3143     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3144     tcg_gen_vec_sar16i_i64(d, a, sh);
3145     tcg_gen_vec_add16_i64(d, d, t);
3146 }
3147 
3148 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3149 {
3150     TCGv_i32 t;
3151 
3152     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3153     if (sh == 32) {
3154         tcg_gen_movi_i32(d, 0);
3155         return;
3156     }
3157     t = tcg_temp_new_i32();
3158     tcg_gen_extract_i32(t, a, sh - 1, 1);
3159     tcg_gen_sari_i32(d, a, sh);
3160     tcg_gen_add_i32(d, d, t);
3161 }
3162 
3163 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3164 {
3165     TCGv_i64 t = tcg_temp_new_i64();
3166 
3167     tcg_gen_extract_i64(t, a, sh - 1, 1);
3168     tcg_gen_sari_i64(d, a, sh);
3169     tcg_gen_add_i64(d, d, t);
3170 }
3171 
3172 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3173 {
3174     TCGv_vec t = tcg_temp_new_vec_matching(d);
3175     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3176 
3177     tcg_gen_shri_vec(vece, t, a, sh - 1);
3178     tcg_gen_dupi_vec(vece, ones, 1);
3179     tcg_gen_and_vec(vece, t, t, ones);
3180     tcg_gen_sari_vec(vece, d, a, sh);
3181     tcg_gen_add_vec(vece, d, d, t);
3182 }
3183 
3184 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3185                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3186 {
3187     static const TCGOpcode vecop_list[] = {
3188         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3189     };
3190     static const GVecGen2i ops[4] = {
3191         { .fni8 = gen_srshr8_i64,
3192           .fniv = gen_srshr_vec,
3193           .fno = gen_helper_gvec_srshr_b,
3194           .opt_opc = vecop_list,
3195           .vece = MO_8 },
3196         { .fni8 = gen_srshr16_i64,
3197           .fniv = gen_srshr_vec,
3198           .fno = gen_helper_gvec_srshr_h,
3199           .opt_opc = vecop_list,
3200           .vece = MO_16 },
3201         { .fni4 = gen_srshr32_i32,
3202           .fniv = gen_srshr_vec,
3203           .fno = gen_helper_gvec_srshr_s,
3204           .opt_opc = vecop_list,
3205           .vece = MO_32 },
3206         { .fni8 = gen_srshr64_i64,
3207           .fniv = gen_srshr_vec,
3208           .fno = gen_helper_gvec_srshr_d,
3209           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3210           .opt_opc = vecop_list,
3211           .vece = MO_64 },
3212     };
3213 
3214     /* tszimm encoding produces immediates in the range [1..esize] */
3215     tcg_debug_assert(shift > 0);
3216     tcg_debug_assert(shift <= (8 << vece));
3217 
3218     if (shift == (8 << vece)) {
3219         /*
3220          * Shifts larger than the element size are architecturally valid.
3221          * Signed results in all sign bits.  With rounding, this produces
3222          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3223          * I.e. always zero.
3224          */
3225         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3226     } else {
3227         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3228     }
3229 }
3230 
3231 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3232 {
3233     TCGv_i64 t = tcg_temp_new_i64();
3234 
3235     gen_srshr8_i64(t, a, sh);
3236     tcg_gen_vec_add8_i64(d, d, t);
3237 }
3238 
3239 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3240 {
3241     TCGv_i64 t = tcg_temp_new_i64();
3242 
3243     gen_srshr16_i64(t, a, sh);
3244     tcg_gen_vec_add16_i64(d, d, t);
3245 }
3246 
3247 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3248 {
3249     TCGv_i32 t = tcg_temp_new_i32();
3250 
3251     gen_srshr32_i32(t, a, sh);
3252     tcg_gen_add_i32(d, d, t);
3253 }
3254 
3255 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3256 {
3257     TCGv_i64 t = tcg_temp_new_i64();
3258 
3259     gen_srshr64_i64(t, a, sh);
3260     tcg_gen_add_i64(d, d, t);
3261 }
3262 
3263 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3264 {
3265     TCGv_vec t = tcg_temp_new_vec_matching(d);
3266 
3267     gen_srshr_vec(vece, t, a, sh);
3268     tcg_gen_add_vec(vece, d, d, t);
3269 }
3270 
3271 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3272                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3273 {
3274     static const TCGOpcode vecop_list[] = {
3275         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3276     };
3277     static const GVecGen2i ops[4] = {
3278         { .fni8 = gen_srsra8_i64,
3279           .fniv = gen_srsra_vec,
3280           .fno = gen_helper_gvec_srsra_b,
3281           .opt_opc = vecop_list,
3282           .load_dest = true,
3283           .vece = MO_8 },
3284         { .fni8 = gen_srsra16_i64,
3285           .fniv = gen_srsra_vec,
3286           .fno = gen_helper_gvec_srsra_h,
3287           .opt_opc = vecop_list,
3288           .load_dest = true,
3289           .vece = MO_16 },
3290         { .fni4 = gen_srsra32_i32,
3291           .fniv = gen_srsra_vec,
3292           .fno = gen_helper_gvec_srsra_s,
3293           .opt_opc = vecop_list,
3294           .load_dest = true,
3295           .vece = MO_32 },
3296         { .fni8 = gen_srsra64_i64,
3297           .fniv = gen_srsra_vec,
3298           .fno = gen_helper_gvec_srsra_d,
3299           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3300           .opt_opc = vecop_list,
3301           .load_dest = true,
3302           .vece = MO_64 },
3303     };
3304 
3305     /* tszimm encoding produces immediates in the range [1..esize] */
3306     tcg_debug_assert(shift > 0);
3307     tcg_debug_assert(shift <= (8 << vece));
3308 
3309     /*
3310      * Shifts larger than the element size are architecturally valid.
3311      * Signed results in all sign bits.  With rounding, this produces
3312      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3313      * I.e. always zero.  With accumulation, this leaves D unchanged.
3314      */
3315     if (shift == (8 << vece)) {
3316         /* Nop, but we do need to clear the tail. */
3317         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3318     } else {
3319         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3320     }
3321 }
3322 
3323 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3324 {
3325     TCGv_i64 t = tcg_temp_new_i64();
3326 
3327     tcg_gen_shri_i64(t, a, sh - 1);
3328     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3329     tcg_gen_vec_shr8i_i64(d, a, sh);
3330     tcg_gen_vec_add8_i64(d, d, t);
3331 }
3332 
3333 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3334 {
3335     TCGv_i64 t = tcg_temp_new_i64();
3336 
3337     tcg_gen_shri_i64(t, a, sh - 1);
3338     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3339     tcg_gen_vec_shr16i_i64(d, a, sh);
3340     tcg_gen_vec_add16_i64(d, d, t);
3341 }
3342 
3343 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3344 {
3345     TCGv_i32 t;
3346 
3347     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3348     if (sh == 32) {
3349         tcg_gen_extract_i32(d, a, sh - 1, 1);
3350         return;
3351     }
3352     t = tcg_temp_new_i32();
3353     tcg_gen_extract_i32(t, a, sh - 1, 1);
3354     tcg_gen_shri_i32(d, a, sh);
3355     tcg_gen_add_i32(d, d, t);
3356 }
3357 
3358 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3359 {
3360     TCGv_i64 t = tcg_temp_new_i64();
3361 
3362     tcg_gen_extract_i64(t, a, sh - 1, 1);
3363     tcg_gen_shri_i64(d, a, sh);
3364     tcg_gen_add_i64(d, d, t);
3365 }
3366 
3367 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3368 {
3369     TCGv_vec t = tcg_temp_new_vec_matching(d);
3370     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3371 
3372     tcg_gen_shri_vec(vece, t, a, shift - 1);
3373     tcg_gen_dupi_vec(vece, ones, 1);
3374     tcg_gen_and_vec(vece, t, t, ones);
3375     tcg_gen_shri_vec(vece, d, a, shift);
3376     tcg_gen_add_vec(vece, d, d, t);
3377 }
3378 
3379 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3380                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3381 {
3382     static const TCGOpcode vecop_list[] = {
3383         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3384     };
3385     static const GVecGen2i ops[4] = {
3386         { .fni8 = gen_urshr8_i64,
3387           .fniv = gen_urshr_vec,
3388           .fno = gen_helper_gvec_urshr_b,
3389           .opt_opc = vecop_list,
3390           .vece = MO_8 },
3391         { .fni8 = gen_urshr16_i64,
3392           .fniv = gen_urshr_vec,
3393           .fno = gen_helper_gvec_urshr_h,
3394           .opt_opc = vecop_list,
3395           .vece = MO_16 },
3396         { .fni4 = gen_urshr32_i32,
3397           .fniv = gen_urshr_vec,
3398           .fno = gen_helper_gvec_urshr_s,
3399           .opt_opc = vecop_list,
3400           .vece = MO_32 },
3401         { .fni8 = gen_urshr64_i64,
3402           .fniv = gen_urshr_vec,
3403           .fno = gen_helper_gvec_urshr_d,
3404           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3405           .opt_opc = vecop_list,
3406           .vece = MO_64 },
3407     };
3408 
3409     /* tszimm encoding produces immediates in the range [1..esize] */
3410     tcg_debug_assert(shift > 0);
3411     tcg_debug_assert(shift <= (8 << vece));
3412 
3413     if (shift == (8 << vece)) {
3414         /*
3415          * Shifts larger than the element size are architecturally valid.
3416          * Unsigned results in zero.  With rounding, this produces a
3417          * copy of the most significant bit.
3418          */
3419         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3420     } else {
3421         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3422     }
3423 }
3424 
3425 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3426 {
3427     TCGv_i64 t = tcg_temp_new_i64();
3428 
3429     if (sh == 8) {
3430         tcg_gen_vec_shr8i_i64(t, a, 7);
3431     } else {
3432         gen_urshr8_i64(t, a, sh);
3433     }
3434     tcg_gen_vec_add8_i64(d, d, t);
3435 }
3436 
3437 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3438 {
3439     TCGv_i64 t = tcg_temp_new_i64();
3440 
3441     if (sh == 16) {
3442         tcg_gen_vec_shr16i_i64(t, a, 15);
3443     } else {
3444         gen_urshr16_i64(t, a, sh);
3445     }
3446     tcg_gen_vec_add16_i64(d, d, t);
3447 }
3448 
3449 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3450 {
3451     TCGv_i32 t = tcg_temp_new_i32();
3452 
3453     if (sh == 32) {
3454         tcg_gen_shri_i32(t, a, 31);
3455     } else {
3456         gen_urshr32_i32(t, a, sh);
3457     }
3458     tcg_gen_add_i32(d, d, t);
3459 }
3460 
3461 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3462 {
3463     TCGv_i64 t = tcg_temp_new_i64();
3464 
3465     if (sh == 64) {
3466         tcg_gen_shri_i64(t, a, 63);
3467     } else {
3468         gen_urshr64_i64(t, a, sh);
3469     }
3470     tcg_gen_add_i64(d, d, t);
3471 }
3472 
3473 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3474 {
3475     TCGv_vec t = tcg_temp_new_vec_matching(d);
3476 
3477     if (sh == (8 << vece)) {
3478         tcg_gen_shri_vec(vece, t, a, sh - 1);
3479     } else {
3480         gen_urshr_vec(vece, t, a, sh);
3481     }
3482     tcg_gen_add_vec(vece, d, d, t);
3483 }
3484 
3485 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3486                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3487 {
3488     static const TCGOpcode vecop_list[] = {
3489         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3490     };
3491     static const GVecGen2i ops[4] = {
3492         { .fni8 = gen_ursra8_i64,
3493           .fniv = gen_ursra_vec,
3494           .fno = gen_helper_gvec_ursra_b,
3495           .opt_opc = vecop_list,
3496           .load_dest = true,
3497           .vece = MO_8 },
3498         { .fni8 = gen_ursra16_i64,
3499           .fniv = gen_ursra_vec,
3500           .fno = gen_helper_gvec_ursra_h,
3501           .opt_opc = vecop_list,
3502           .load_dest = true,
3503           .vece = MO_16 },
3504         { .fni4 = gen_ursra32_i32,
3505           .fniv = gen_ursra_vec,
3506           .fno = gen_helper_gvec_ursra_s,
3507           .opt_opc = vecop_list,
3508           .load_dest = true,
3509           .vece = MO_32 },
3510         { .fni8 = gen_ursra64_i64,
3511           .fniv = gen_ursra_vec,
3512           .fno = gen_helper_gvec_ursra_d,
3513           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3514           .opt_opc = vecop_list,
3515           .load_dest = true,
3516           .vece = MO_64 },
3517     };
3518 
3519     /* tszimm encoding produces immediates in the range [1..esize] */
3520     tcg_debug_assert(shift > 0);
3521     tcg_debug_assert(shift <= (8 << vece));
3522 
3523     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3524 }
3525 
3526 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3527 {
3528     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3529     TCGv_i64 t = tcg_temp_new_i64();
3530 
3531     tcg_gen_shri_i64(t, a, shift);
3532     tcg_gen_andi_i64(t, t, mask);
3533     tcg_gen_andi_i64(d, d, ~mask);
3534     tcg_gen_or_i64(d, d, t);
3535 }
3536 
3537 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3538 {
3539     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3540     TCGv_i64 t = tcg_temp_new_i64();
3541 
3542     tcg_gen_shri_i64(t, a, shift);
3543     tcg_gen_andi_i64(t, t, mask);
3544     tcg_gen_andi_i64(d, d, ~mask);
3545     tcg_gen_or_i64(d, d, t);
3546 }
3547 
3548 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3549 {
3550     tcg_gen_shri_i32(a, a, shift);
3551     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3552 }
3553 
3554 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3555 {
3556     tcg_gen_shri_i64(a, a, shift);
3557     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3558 }
3559 
3560 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3561 {
3562     TCGv_vec t = tcg_temp_new_vec_matching(d);
3563     TCGv_vec m = tcg_temp_new_vec_matching(d);
3564 
3565     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3566     tcg_gen_shri_vec(vece, t, a, sh);
3567     tcg_gen_and_vec(vece, d, d, m);
3568     tcg_gen_or_vec(vece, d, d, t);
3569 }
3570 
3571 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3572                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3573 {
3574     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3575     const GVecGen2i ops[4] = {
3576         { .fni8 = gen_shr8_ins_i64,
3577           .fniv = gen_shr_ins_vec,
3578           .fno = gen_helper_gvec_sri_b,
3579           .load_dest = true,
3580           .opt_opc = vecop_list,
3581           .vece = MO_8 },
3582         { .fni8 = gen_shr16_ins_i64,
3583           .fniv = gen_shr_ins_vec,
3584           .fno = gen_helper_gvec_sri_h,
3585           .load_dest = true,
3586           .opt_opc = vecop_list,
3587           .vece = MO_16 },
3588         { .fni4 = gen_shr32_ins_i32,
3589           .fniv = gen_shr_ins_vec,
3590           .fno = gen_helper_gvec_sri_s,
3591           .load_dest = true,
3592           .opt_opc = vecop_list,
3593           .vece = MO_32 },
3594         { .fni8 = gen_shr64_ins_i64,
3595           .fniv = gen_shr_ins_vec,
3596           .fno = gen_helper_gvec_sri_d,
3597           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3598           .load_dest = true,
3599           .opt_opc = vecop_list,
3600           .vece = MO_64 },
3601     };
3602 
3603     /* tszimm encoding produces immediates in the range [1..esize]. */
3604     tcg_debug_assert(shift > 0);
3605     tcg_debug_assert(shift <= (8 << vece));
3606 
3607     /* Shift of esize leaves destination unchanged. */
3608     if (shift < (8 << vece)) {
3609         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3610     } else {
3611         /* Nop, but we do need to clear the tail. */
3612         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3613     }
3614 }
3615 
3616 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3617 {
3618     uint64_t mask = dup_const(MO_8, 0xff << shift);
3619     TCGv_i64 t = tcg_temp_new_i64();
3620 
3621     tcg_gen_shli_i64(t, a, shift);
3622     tcg_gen_andi_i64(t, t, mask);
3623     tcg_gen_andi_i64(d, d, ~mask);
3624     tcg_gen_or_i64(d, d, t);
3625 }
3626 
3627 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3628 {
3629     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3630     TCGv_i64 t = tcg_temp_new_i64();
3631 
3632     tcg_gen_shli_i64(t, a, shift);
3633     tcg_gen_andi_i64(t, t, mask);
3634     tcg_gen_andi_i64(d, d, ~mask);
3635     tcg_gen_or_i64(d, d, t);
3636 }
3637 
3638 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3639 {
3640     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3641 }
3642 
3643 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3644 {
3645     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3646 }
3647 
3648 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3649 {
3650     TCGv_vec t = tcg_temp_new_vec_matching(d);
3651     TCGv_vec m = tcg_temp_new_vec_matching(d);
3652 
3653     tcg_gen_shli_vec(vece, t, a, sh);
3654     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3655     tcg_gen_and_vec(vece, d, d, m);
3656     tcg_gen_or_vec(vece, d, d, t);
3657 }
3658 
3659 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3660                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3661 {
3662     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3663     const GVecGen2i ops[4] = {
3664         { .fni8 = gen_shl8_ins_i64,
3665           .fniv = gen_shl_ins_vec,
3666           .fno = gen_helper_gvec_sli_b,
3667           .load_dest = true,
3668           .opt_opc = vecop_list,
3669           .vece = MO_8 },
3670         { .fni8 = gen_shl16_ins_i64,
3671           .fniv = gen_shl_ins_vec,
3672           .fno = gen_helper_gvec_sli_h,
3673           .load_dest = true,
3674           .opt_opc = vecop_list,
3675           .vece = MO_16 },
3676         { .fni4 = gen_shl32_ins_i32,
3677           .fniv = gen_shl_ins_vec,
3678           .fno = gen_helper_gvec_sli_s,
3679           .load_dest = true,
3680           .opt_opc = vecop_list,
3681           .vece = MO_32 },
3682         { .fni8 = gen_shl64_ins_i64,
3683           .fniv = gen_shl_ins_vec,
3684           .fno = gen_helper_gvec_sli_d,
3685           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3686           .load_dest = true,
3687           .opt_opc = vecop_list,
3688           .vece = MO_64 },
3689     };
3690 
3691     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3692     tcg_debug_assert(shift >= 0);
3693     tcg_debug_assert(shift < (8 << vece));
3694 
3695     if (shift == 0) {
3696         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3697     } else {
3698         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3699     }
3700 }
3701 
3702 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3703 {
3704     gen_helper_neon_mul_u8(a, a, b);
3705     gen_helper_neon_add_u8(d, d, a);
3706 }
3707 
3708 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3709 {
3710     gen_helper_neon_mul_u8(a, a, b);
3711     gen_helper_neon_sub_u8(d, d, a);
3712 }
3713 
3714 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3715 {
3716     gen_helper_neon_mul_u16(a, a, b);
3717     gen_helper_neon_add_u16(d, d, a);
3718 }
3719 
3720 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3721 {
3722     gen_helper_neon_mul_u16(a, a, b);
3723     gen_helper_neon_sub_u16(d, d, a);
3724 }
3725 
3726 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3727 {
3728     tcg_gen_mul_i32(a, a, b);
3729     tcg_gen_add_i32(d, d, a);
3730 }
3731 
3732 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3733 {
3734     tcg_gen_mul_i32(a, a, b);
3735     tcg_gen_sub_i32(d, d, a);
3736 }
3737 
3738 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3739 {
3740     tcg_gen_mul_i64(a, a, b);
3741     tcg_gen_add_i64(d, d, a);
3742 }
3743 
3744 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3745 {
3746     tcg_gen_mul_i64(a, a, b);
3747     tcg_gen_sub_i64(d, d, a);
3748 }
3749 
3750 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3751 {
3752     tcg_gen_mul_vec(vece, a, a, b);
3753     tcg_gen_add_vec(vece, d, d, a);
3754 }
3755 
3756 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3757 {
3758     tcg_gen_mul_vec(vece, a, a, b);
3759     tcg_gen_sub_vec(vece, d, d, a);
3760 }
3761 
3762 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3763  * these tables are shared with AArch64 which does support them.
3764  */
3765 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3766                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3767 {
3768     static const TCGOpcode vecop_list[] = {
3769         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3770     };
3771     static const GVecGen3 ops[4] = {
3772         { .fni4 = gen_mla8_i32,
3773           .fniv = gen_mla_vec,
3774           .load_dest = true,
3775           .opt_opc = vecop_list,
3776           .vece = MO_8 },
3777         { .fni4 = gen_mla16_i32,
3778           .fniv = gen_mla_vec,
3779           .load_dest = true,
3780           .opt_opc = vecop_list,
3781           .vece = MO_16 },
3782         { .fni4 = gen_mla32_i32,
3783           .fniv = gen_mla_vec,
3784           .load_dest = true,
3785           .opt_opc = vecop_list,
3786           .vece = MO_32 },
3787         { .fni8 = gen_mla64_i64,
3788           .fniv = gen_mla_vec,
3789           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3790           .load_dest = true,
3791           .opt_opc = vecop_list,
3792           .vece = MO_64 },
3793     };
3794     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3795 }
3796 
3797 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3798                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3799 {
3800     static const TCGOpcode vecop_list[] = {
3801         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3802     };
3803     static const GVecGen3 ops[4] = {
3804         { .fni4 = gen_mls8_i32,
3805           .fniv = gen_mls_vec,
3806           .load_dest = true,
3807           .opt_opc = vecop_list,
3808           .vece = MO_8 },
3809         { .fni4 = gen_mls16_i32,
3810           .fniv = gen_mls_vec,
3811           .load_dest = true,
3812           .opt_opc = vecop_list,
3813           .vece = MO_16 },
3814         { .fni4 = gen_mls32_i32,
3815           .fniv = gen_mls_vec,
3816           .load_dest = true,
3817           .opt_opc = vecop_list,
3818           .vece = MO_32 },
3819         { .fni8 = gen_mls64_i64,
3820           .fniv = gen_mls_vec,
3821           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3822           .load_dest = true,
3823           .opt_opc = vecop_list,
3824           .vece = MO_64 },
3825     };
3826     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3827 }
3828 
3829 /* CMTST : test is "if (X & Y != 0)". */
3830 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3831 {
3832     tcg_gen_and_i32(d, a, b);
3833     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3834 }
3835 
3836 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3837 {
3838     tcg_gen_and_i64(d, a, b);
3839     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3840 }
3841 
3842 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3843 {
3844     tcg_gen_and_vec(vece, d, a, b);
3845     tcg_gen_dupi_vec(vece, a, 0);
3846     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3847 }
3848 
3849 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3850                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3851 {
3852     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3853     static const GVecGen3 ops[4] = {
3854         { .fni4 = gen_helper_neon_tst_u8,
3855           .fniv = gen_cmtst_vec,
3856           .opt_opc = vecop_list,
3857           .vece = MO_8 },
3858         { .fni4 = gen_helper_neon_tst_u16,
3859           .fniv = gen_cmtst_vec,
3860           .opt_opc = vecop_list,
3861           .vece = MO_16 },
3862         { .fni4 = gen_cmtst_i32,
3863           .fniv = gen_cmtst_vec,
3864           .opt_opc = vecop_list,
3865           .vece = MO_32 },
3866         { .fni8 = gen_cmtst_i64,
3867           .fniv = gen_cmtst_vec,
3868           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3869           .opt_opc = vecop_list,
3870           .vece = MO_64 },
3871     };
3872     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3873 }
3874 
3875 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3876 {
3877     TCGv_i32 lval = tcg_temp_new_i32();
3878     TCGv_i32 rval = tcg_temp_new_i32();
3879     TCGv_i32 lsh = tcg_temp_new_i32();
3880     TCGv_i32 rsh = tcg_temp_new_i32();
3881     TCGv_i32 zero = tcg_constant_i32(0);
3882     TCGv_i32 max = tcg_constant_i32(32);
3883 
3884     /*
3885      * Rely on the TCG guarantee that out of range shifts produce
3886      * unspecified results, not undefined behaviour (i.e. no trap).
3887      * Discard out-of-range results after the fact.
3888      */
3889     tcg_gen_ext8s_i32(lsh, shift);
3890     tcg_gen_neg_i32(rsh, lsh);
3891     tcg_gen_shl_i32(lval, src, lsh);
3892     tcg_gen_shr_i32(rval, src, rsh);
3893     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3894     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3895 }
3896 
3897 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3898 {
3899     TCGv_i64 lval = tcg_temp_new_i64();
3900     TCGv_i64 rval = tcg_temp_new_i64();
3901     TCGv_i64 lsh = tcg_temp_new_i64();
3902     TCGv_i64 rsh = tcg_temp_new_i64();
3903     TCGv_i64 zero = tcg_constant_i64(0);
3904     TCGv_i64 max = tcg_constant_i64(64);
3905 
3906     /*
3907      * Rely on the TCG guarantee that out of range shifts produce
3908      * unspecified results, not undefined behaviour (i.e. no trap).
3909      * Discard out-of-range results after the fact.
3910      */
3911     tcg_gen_ext8s_i64(lsh, shift);
3912     tcg_gen_neg_i64(rsh, lsh);
3913     tcg_gen_shl_i64(lval, src, lsh);
3914     tcg_gen_shr_i64(rval, src, rsh);
3915     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3916     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3917 }
3918 
3919 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3920                          TCGv_vec src, TCGv_vec shift)
3921 {
3922     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3923     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3924     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3925     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3926     TCGv_vec msk, max;
3927 
3928     tcg_gen_neg_vec(vece, rsh, shift);
3929     if (vece == MO_8) {
3930         tcg_gen_mov_vec(lsh, shift);
3931     } else {
3932         msk = tcg_temp_new_vec_matching(dst);
3933         tcg_gen_dupi_vec(vece, msk, 0xff);
3934         tcg_gen_and_vec(vece, lsh, shift, msk);
3935         tcg_gen_and_vec(vece, rsh, rsh, msk);
3936     }
3937 
3938     /*
3939      * Rely on the TCG guarantee that out of range shifts produce
3940      * unspecified results, not undefined behaviour (i.e. no trap).
3941      * Discard out-of-range results after the fact.
3942      */
3943     tcg_gen_shlv_vec(vece, lval, src, lsh);
3944     tcg_gen_shrv_vec(vece, rval, src, rsh);
3945 
3946     max = tcg_temp_new_vec_matching(dst);
3947     tcg_gen_dupi_vec(vece, max, 8 << vece);
3948 
3949     /*
3950      * The choice of LT (signed) and GEU (unsigned) are biased toward
3951      * the instructions of the x86_64 host.  For MO_8, the whole byte
3952      * is significant so we must use an unsigned compare; otherwise we
3953      * have already masked to a byte and so a signed compare works.
3954      * Other tcg hosts have a full set of comparisons and do not care.
3955      */
3956     if (vece == MO_8) {
3957         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3958         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3959         tcg_gen_andc_vec(vece, lval, lval, lsh);
3960         tcg_gen_andc_vec(vece, rval, rval, rsh);
3961     } else {
3962         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3963         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3964         tcg_gen_and_vec(vece, lval, lval, lsh);
3965         tcg_gen_and_vec(vece, rval, rval, rsh);
3966     }
3967     tcg_gen_or_vec(vece, dst, lval, rval);
3968 }
3969 
3970 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3971                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3972 {
3973     static const TCGOpcode vecop_list[] = {
3974         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3975         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3976     };
3977     static const GVecGen3 ops[4] = {
3978         { .fniv = gen_ushl_vec,
3979           .fno = gen_helper_gvec_ushl_b,
3980           .opt_opc = vecop_list,
3981           .vece = MO_8 },
3982         { .fniv = gen_ushl_vec,
3983           .fno = gen_helper_gvec_ushl_h,
3984           .opt_opc = vecop_list,
3985           .vece = MO_16 },
3986         { .fni4 = gen_ushl_i32,
3987           .fniv = gen_ushl_vec,
3988           .opt_opc = vecop_list,
3989           .vece = MO_32 },
3990         { .fni8 = gen_ushl_i64,
3991           .fniv = gen_ushl_vec,
3992           .opt_opc = vecop_list,
3993           .vece = MO_64 },
3994     };
3995     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3996 }
3997 
3998 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3999 {
4000     TCGv_i32 lval = tcg_temp_new_i32();
4001     TCGv_i32 rval = tcg_temp_new_i32();
4002     TCGv_i32 lsh = tcg_temp_new_i32();
4003     TCGv_i32 rsh = tcg_temp_new_i32();
4004     TCGv_i32 zero = tcg_constant_i32(0);
4005     TCGv_i32 max = tcg_constant_i32(31);
4006 
4007     /*
4008      * Rely on the TCG guarantee that out of range shifts produce
4009      * unspecified results, not undefined behaviour (i.e. no trap).
4010      * Discard out-of-range results after the fact.
4011      */
4012     tcg_gen_ext8s_i32(lsh, shift);
4013     tcg_gen_neg_i32(rsh, lsh);
4014     tcg_gen_shl_i32(lval, src, lsh);
4015     tcg_gen_umin_i32(rsh, rsh, max);
4016     tcg_gen_sar_i32(rval, src, rsh);
4017     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4018     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4019 }
4020 
4021 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4022 {
4023     TCGv_i64 lval = tcg_temp_new_i64();
4024     TCGv_i64 rval = tcg_temp_new_i64();
4025     TCGv_i64 lsh = tcg_temp_new_i64();
4026     TCGv_i64 rsh = tcg_temp_new_i64();
4027     TCGv_i64 zero = tcg_constant_i64(0);
4028     TCGv_i64 max = tcg_constant_i64(63);
4029 
4030     /*
4031      * Rely on the TCG guarantee that out of range shifts produce
4032      * unspecified results, not undefined behaviour (i.e. no trap).
4033      * Discard out-of-range results after the fact.
4034      */
4035     tcg_gen_ext8s_i64(lsh, shift);
4036     tcg_gen_neg_i64(rsh, lsh);
4037     tcg_gen_shl_i64(lval, src, lsh);
4038     tcg_gen_umin_i64(rsh, rsh, max);
4039     tcg_gen_sar_i64(rval, src, rsh);
4040     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4041     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4042 }
4043 
4044 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4045                          TCGv_vec src, TCGv_vec shift)
4046 {
4047     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4048     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4049     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4050     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4051     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4052 
4053     /*
4054      * Rely on the TCG guarantee that out of range shifts produce
4055      * unspecified results, not undefined behaviour (i.e. no trap).
4056      * Discard out-of-range results after the fact.
4057      */
4058     tcg_gen_neg_vec(vece, rsh, shift);
4059     if (vece == MO_8) {
4060         tcg_gen_mov_vec(lsh, shift);
4061     } else {
4062         tcg_gen_dupi_vec(vece, tmp, 0xff);
4063         tcg_gen_and_vec(vece, lsh, shift, tmp);
4064         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4065     }
4066 
4067     /* Bound rsh so out of bound right shift gets -1.  */
4068     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4069     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4070     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4071 
4072     tcg_gen_shlv_vec(vece, lval, src, lsh);
4073     tcg_gen_sarv_vec(vece, rval, src, rsh);
4074 
4075     /* Select in-bound left shift.  */
4076     tcg_gen_andc_vec(vece, lval, lval, tmp);
4077 
4078     /* Select between left and right shift.  */
4079     if (vece == MO_8) {
4080         tcg_gen_dupi_vec(vece, tmp, 0);
4081         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4082     } else {
4083         tcg_gen_dupi_vec(vece, tmp, 0x80);
4084         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4085     }
4086 }
4087 
4088 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4089                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4090 {
4091     static const TCGOpcode vecop_list[] = {
4092         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4093         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4094     };
4095     static const GVecGen3 ops[4] = {
4096         { .fniv = gen_sshl_vec,
4097           .fno = gen_helper_gvec_sshl_b,
4098           .opt_opc = vecop_list,
4099           .vece = MO_8 },
4100         { .fniv = gen_sshl_vec,
4101           .fno = gen_helper_gvec_sshl_h,
4102           .opt_opc = vecop_list,
4103           .vece = MO_16 },
4104         { .fni4 = gen_sshl_i32,
4105           .fniv = gen_sshl_vec,
4106           .opt_opc = vecop_list,
4107           .vece = MO_32 },
4108         { .fni8 = gen_sshl_i64,
4109           .fniv = gen_sshl_vec,
4110           .opt_opc = vecop_list,
4111           .vece = MO_64 },
4112     };
4113     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4114 }
4115 
4116 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4117                           TCGv_vec a, TCGv_vec b)
4118 {
4119     TCGv_vec x = tcg_temp_new_vec_matching(t);
4120     tcg_gen_add_vec(vece, x, a, b);
4121     tcg_gen_usadd_vec(vece, t, a, b);
4122     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4123     tcg_gen_or_vec(vece, sat, sat, x);
4124 }
4125 
4126 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4127                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4128 {
4129     static const TCGOpcode vecop_list[] = {
4130         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4131     };
4132     static const GVecGen4 ops[4] = {
4133         { .fniv = gen_uqadd_vec,
4134           .fno = gen_helper_gvec_uqadd_b,
4135           .write_aofs = true,
4136           .opt_opc = vecop_list,
4137           .vece = MO_8 },
4138         { .fniv = gen_uqadd_vec,
4139           .fno = gen_helper_gvec_uqadd_h,
4140           .write_aofs = true,
4141           .opt_opc = vecop_list,
4142           .vece = MO_16 },
4143         { .fniv = gen_uqadd_vec,
4144           .fno = gen_helper_gvec_uqadd_s,
4145           .write_aofs = true,
4146           .opt_opc = vecop_list,
4147           .vece = MO_32 },
4148         { .fniv = gen_uqadd_vec,
4149           .fno = gen_helper_gvec_uqadd_d,
4150           .write_aofs = true,
4151           .opt_opc = vecop_list,
4152           .vece = MO_64 },
4153     };
4154     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4155                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4156 }
4157 
4158 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4159                           TCGv_vec a, TCGv_vec b)
4160 {
4161     TCGv_vec x = tcg_temp_new_vec_matching(t);
4162     tcg_gen_add_vec(vece, x, a, b);
4163     tcg_gen_ssadd_vec(vece, t, a, b);
4164     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4165     tcg_gen_or_vec(vece, sat, sat, x);
4166 }
4167 
4168 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4169                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4170 {
4171     static const TCGOpcode vecop_list[] = {
4172         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4173     };
4174     static const GVecGen4 ops[4] = {
4175         { .fniv = gen_sqadd_vec,
4176           .fno = gen_helper_gvec_sqadd_b,
4177           .opt_opc = vecop_list,
4178           .write_aofs = true,
4179           .vece = MO_8 },
4180         { .fniv = gen_sqadd_vec,
4181           .fno = gen_helper_gvec_sqadd_h,
4182           .opt_opc = vecop_list,
4183           .write_aofs = true,
4184           .vece = MO_16 },
4185         { .fniv = gen_sqadd_vec,
4186           .fno = gen_helper_gvec_sqadd_s,
4187           .opt_opc = vecop_list,
4188           .write_aofs = true,
4189           .vece = MO_32 },
4190         { .fniv = gen_sqadd_vec,
4191           .fno = gen_helper_gvec_sqadd_d,
4192           .opt_opc = vecop_list,
4193           .write_aofs = true,
4194           .vece = MO_64 },
4195     };
4196     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4197                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4198 }
4199 
4200 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4201                           TCGv_vec a, TCGv_vec b)
4202 {
4203     TCGv_vec x = tcg_temp_new_vec_matching(t);
4204     tcg_gen_sub_vec(vece, x, a, b);
4205     tcg_gen_ussub_vec(vece, t, a, b);
4206     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4207     tcg_gen_or_vec(vece, sat, sat, x);
4208 }
4209 
4210 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4211                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4212 {
4213     static const TCGOpcode vecop_list[] = {
4214         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4215     };
4216     static const GVecGen4 ops[4] = {
4217         { .fniv = gen_uqsub_vec,
4218           .fno = gen_helper_gvec_uqsub_b,
4219           .opt_opc = vecop_list,
4220           .write_aofs = true,
4221           .vece = MO_8 },
4222         { .fniv = gen_uqsub_vec,
4223           .fno = gen_helper_gvec_uqsub_h,
4224           .opt_opc = vecop_list,
4225           .write_aofs = true,
4226           .vece = MO_16 },
4227         { .fniv = gen_uqsub_vec,
4228           .fno = gen_helper_gvec_uqsub_s,
4229           .opt_opc = vecop_list,
4230           .write_aofs = true,
4231           .vece = MO_32 },
4232         { .fniv = gen_uqsub_vec,
4233           .fno = gen_helper_gvec_uqsub_d,
4234           .opt_opc = vecop_list,
4235           .write_aofs = true,
4236           .vece = MO_64 },
4237     };
4238     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4239                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4240 }
4241 
4242 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4243                           TCGv_vec a, TCGv_vec b)
4244 {
4245     TCGv_vec x = tcg_temp_new_vec_matching(t);
4246     tcg_gen_sub_vec(vece, x, a, b);
4247     tcg_gen_sssub_vec(vece, t, a, b);
4248     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4249     tcg_gen_or_vec(vece, sat, sat, x);
4250 }
4251 
4252 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4253                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4254 {
4255     static const TCGOpcode vecop_list[] = {
4256         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4257     };
4258     static const GVecGen4 ops[4] = {
4259         { .fniv = gen_sqsub_vec,
4260           .fno = gen_helper_gvec_sqsub_b,
4261           .opt_opc = vecop_list,
4262           .write_aofs = true,
4263           .vece = MO_8 },
4264         { .fniv = gen_sqsub_vec,
4265           .fno = gen_helper_gvec_sqsub_h,
4266           .opt_opc = vecop_list,
4267           .write_aofs = true,
4268           .vece = MO_16 },
4269         { .fniv = gen_sqsub_vec,
4270           .fno = gen_helper_gvec_sqsub_s,
4271           .opt_opc = vecop_list,
4272           .write_aofs = true,
4273           .vece = MO_32 },
4274         { .fniv = gen_sqsub_vec,
4275           .fno = gen_helper_gvec_sqsub_d,
4276           .opt_opc = vecop_list,
4277           .write_aofs = true,
4278           .vece = MO_64 },
4279     };
4280     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4281                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4282 }
4283 
4284 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4285 {
4286     TCGv_i32 t = tcg_temp_new_i32();
4287 
4288     tcg_gen_sub_i32(t, a, b);
4289     tcg_gen_sub_i32(d, b, a);
4290     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4291 }
4292 
4293 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4294 {
4295     TCGv_i64 t = tcg_temp_new_i64();
4296 
4297     tcg_gen_sub_i64(t, a, b);
4298     tcg_gen_sub_i64(d, b, a);
4299     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4300 }
4301 
4302 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4303 {
4304     TCGv_vec t = tcg_temp_new_vec_matching(d);
4305 
4306     tcg_gen_smin_vec(vece, t, a, b);
4307     tcg_gen_smax_vec(vece, d, a, b);
4308     tcg_gen_sub_vec(vece, d, d, t);
4309 }
4310 
4311 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4312                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4313 {
4314     static const TCGOpcode vecop_list[] = {
4315         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4316     };
4317     static const GVecGen3 ops[4] = {
4318         { .fniv = gen_sabd_vec,
4319           .fno = gen_helper_gvec_sabd_b,
4320           .opt_opc = vecop_list,
4321           .vece = MO_8 },
4322         { .fniv = gen_sabd_vec,
4323           .fno = gen_helper_gvec_sabd_h,
4324           .opt_opc = vecop_list,
4325           .vece = MO_16 },
4326         { .fni4 = gen_sabd_i32,
4327           .fniv = gen_sabd_vec,
4328           .fno = gen_helper_gvec_sabd_s,
4329           .opt_opc = vecop_list,
4330           .vece = MO_32 },
4331         { .fni8 = gen_sabd_i64,
4332           .fniv = gen_sabd_vec,
4333           .fno = gen_helper_gvec_sabd_d,
4334           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4335           .opt_opc = vecop_list,
4336           .vece = MO_64 },
4337     };
4338     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4339 }
4340 
4341 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4342 {
4343     TCGv_i32 t = tcg_temp_new_i32();
4344 
4345     tcg_gen_sub_i32(t, a, b);
4346     tcg_gen_sub_i32(d, b, a);
4347     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4348 }
4349 
4350 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4351 {
4352     TCGv_i64 t = tcg_temp_new_i64();
4353 
4354     tcg_gen_sub_i64(t, a, b);
4355     tcg_gen_sub_i64(d, b, a);
4356     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4357 }
4358 
4359 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4360 {
4361     TCGv_vec t = tcg_temp_new_vec_matching(d);
4362 
4363     tcg_gen_umin_vec(vece, t, a, b);
4364     tcg_gen_umax_vec(vece, d, a, b);
4365     tcg_gen_sub_vec(vece, d, d, t);
4366 }
4367 
4368 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4369                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4370 {
4371     static const TCGOpcode vecop_list[] = {
4372         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4373     };
4374     static const GVecGen3 ops[4] = {
4375         { .fniv = gen_uabd_vec,
4376           .fno = gen_helper_gvec_uabd_b,
4377           .opt_opc = vecop_list,
4378           .vece = MO_8 },
4379         { .fniv = gen_uabd_vec,
4380           .fno = gen_helper_gvec_uabd_h,
4381           .opt_opc = vecop_list,
4382           .vece = MO_16 },
4383         { .fni4 = gen_uabd_i32,
4384           .fniv = gen_uabd_vec,
4385           .fno = gen_helper_gvec_uabd_s,
4386           .opt_opc = vecop_list,
4387           .vece = MO_32 },
4388         { .fni8 = gen_uabd_i64,
4389           .fniv = gen_uabd_vec,
4390           .fno = gen_helper_gvec_uabd_d,
4391           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4392           .opt_opc = vecop_list,
4393           .vece = MO_64 },
4394     };
4395     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4396 }
4397 
4398 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4399 {
4400     TCGv_i32 t = tcg_temp_new_i32();
4401     gen_sabd_i32(t, a, b);
4402     tcg_gen_add_i32(d, d, t);
4403 }
4404 
4405 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4406 {
4407     TCGv_i64 t = tcg_temp_new_i64();
4408     gen_sabd_i64(t, a, b);
4409     tcg_gen_add_i64(d, d, t);
4410 }
4411 
4412 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4413 {
4414     TCGv_vec t = tcg_temp_new_vec_matching(d);
4415     gen_sabd_vec(vece, t, a, b);
4416     tcg_gen_add_vec(vece, d, d, t);
4417 }
4418 
4419 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4420                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4421 {
4422     static const TCGOpcode vecop_list[] = {
4423         INDEX_op_sub_vec, INDEX_op_add_vec,
4424         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4425     };
4426     static const GVecGen3 ops[4] = {
4427         { .fniv = gen_saba_vec,
4428           .fno = gen_helper_gvec_saba_b,
4429           .opt_opc = vecop_list,
4430           .load_dest = true,
4431           .vece = MO_8 },
4432         { .fniv = gen_saba_vec,
4433           .fno = gen_helper_gvec_saba_h,
4434           .opt_opc = vecop_list,
4435           .load_dest = true,
4436           .vece = MO_16 },
4437         { .fni4 = gen_saba_i32,
4438           .fniv = gen_saba_vec,
4439           .fno = gen_helper_gvec_saba_s,
4440           .opt_opc = vecop_list,
4441           .load_dest = true,
4442           .vece = MO_32 },
4443         { .fni8 = gen_saba_i64,
4444           .fniv = gen_saba_vec,
4445           .fno = gen_helper_gvec_saba_d,
4446           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4447           .opt_opc = vecop_list,
4448           .load_dest = true,
4449           .vece = MO_64 },
4450     };
4451     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4452 }
4453 
4454 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4455 {
4456     TCGv_i32 t = tcg_temp_new_i32();
4457     gen_uabd_i32(t, a, b);
4458     tcg_gen_add_i32(d, d, t);
4459 }
4460 
4461 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4462 {
4463     TCGv_i64 t = tcg_temp_new_i64();
4464     gen_uabd_i64(t, a, b);
4465     tcg_gen_add_i64(d, d, t);
4466 }
4467 
4468 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4469 {
4470     TCGv_vec t = tcg_temp_new_vec_matching(d);
4471     gen_uabd_vec(vece, t, a, b);
4472     tcg_gen_add_vec(vece, d, d, t);
4473 }
4474 
4475 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4476                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4477 {
4478     static const TCGOpcode vecop_list[] = {
4479         INDEX_op_sub_vec, INDEX_op_add_vec,
4480         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4481     };
4482     static const GVecGen3 ops[4] = {
4483         { .fniv = gen_uaba_vec,
4484           .fno = gen_helper_gvec_uaba_b,
4485           .opt_opc = vecop_list,
4486           .load_dest = true,
4487           .vece = MO_8 },
4488         { .fniv = gen_uaba_vec,
4489           .fno = gen_helper_gvec_uaba_h,
4490           .opt_opc = vecop_list,
4491           .load_dest = true,
4492           .vece = MO_16 },
4493         { .fni4 = gen_uaba_i32,
4494           .fniv = gen_uaba_vec,
4495           .fno = gen_helper_gvec_uaba_s,
4496           .opt_opc = vecop_list,
4497           .load_dest = true,
4498           .vece = MO_32 },
4499         { .fni8 = gen_uaba_i64,
4500           .fniv = gen_uaba_vec,
4501           .fno = gen_helper_gvec_uaba_d,
4502           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4503           .opt_opc = vecop_list,
4504           .load_dest = true,
4505           .vece = MO_64 },
4506     };
4507     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4508 }
4509 
4510 static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
4511 {
4512     static const uint16_t mask[3] = {
4513         0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
4514         0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
4515         0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
4516     };
4517 
4518     if (crn >= 9 && crn <= 11) {
4519         return (mask[crn - 9] >> crm) & 1;
4520     }
4521     return false;
4522 }
4523 
4524 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4525                            int opc1, int crn, int crm, int opc2,
4526                            bool isread, int rt, int rt2)
4527 {
4528     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4529     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4530     TCGv_ptr tcg_ri = NULL;
4531     bool need_exit_tb = false;
4532     uint32_t syndrome;
4533 
4534     /*
4535      * Note that since we are an implementation which takes an
4536      * exception on a trapped conditional instruction only if the
4537      * instruction passes its condition code check, we can take
4538      * advantage of the clause in the ARM ARM that allows us to set
4539      * the COND field in the instruction to 0xE in all cases.
4540      * We could fish the actual condition out of the insn (ARM)
4541      * or the condexec bits (Thumb) but it isn't necessary.
4542      */
4543     switch (cpnum) {
4544     case 14:
4545         if (is64) {
4546             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4547                                          isread, false);
4548         } else {
4549             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4550                                         rt, isread, false);
4551         }
4552         break;
4553     case 15:
4554         if (is64) {
4555             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4556                                          isread, false);
4557         } else {
4558             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4559                                         rt, isread, false);
4560         }
4561         break;
4562     default:
4563         /*
4564          * ARMv8 defines that only coprocessors 14 and 15 exist,
4565          * so this can only happen if this is an ARMv7 or earlier CPU,
4566          * in which case the syndrome information won't actually be
4567          * guest visible.
4568          */
4569         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4570         syndrome = syn_uncategorized();
4571         break;
4572     }
4573 
4574     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4575         /*
4576          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4577          * over the UNDEF for "no such register" or the UNDEF for "access
4578          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4579          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4580          * access_check_cp_reg(), after the checks for whether the access
4581          * configurably trapped to EL1.
4582          */
4583         uint32_t maskbit = is64 ? crm : crn;
4584 
4585         if (maskbit != 4 && maskbit != 14) {
4586             /* T4 and T14 are RES0 so never cause traps */
4587             TCGv_i32 t;
4588             DisasLabel over = gen_disas_label(s);
4589 
4590             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4591             tcg_gen_andi_i32(t, t, 1u << maskbit);
4592             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4593 
4594             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4595             /*
4596              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4597              * but since we're conditionally branching over it, we want
4598              * to assume continue-to-next-instruction.
4599              */
4600             s->base.is_jmp = DISAS_NEXT;
4601             set_disas_label(s, over);
4602         }
4603     }
4604 
4605     if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
4606         /*
4607          * Check for TIDCP trap, which must take precedence over the UNDEF
4608          * for "no such register" etc.  It shares precedence with HSTR,
4609          * but raises the same exception, so order doesn't matter.
4610          */
4611         switch (s->current_el) {
4612         case 0:
4613             if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
4614                 && dc_isar_feature(aa64_tidcp1, s)) {
4615                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
4616             }
4617             break;
4618         case 1:
4619             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
4620             break;
4621         }
4622     }
4623 
4624     if (!ri) {
4625         /*
4626          * Unknown register; this might be a guest error or a QEMU
4627          * unimplemented feature.
4628          */
4629         if (is64) {
4630             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4631                           "64 bit system register cp:%d opc1: %d crm:%d "
4632                           "(%s)\n",
4633                           isread ? "read" : "write", cpnum, opc1, crm,
4634                           s->ns ? "non-secure" : "secure");
4635         } else {
4636             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4637                           "system register cp:%d opc1:%d crn:%d crm:%d "
4638                           "opc2:%d (%s)\n",
4639                           isread ? "read" : "write", cpnum, opc1, crn,
4640                           crm, opc2, s->ns ? "non-secure" : "secure");
4641         }
4642         unallocated_encoding(s);
4643         return;
4644     }
4645 
4646     /* Check access permissions */
4647     if (!cp_access_ok(s->current_el, ri, isread)) {
4648         unallocated_encoding(s);
4649         return;
4650     }
4651 
4652     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4653         (ri->fgt && s->fgt_active) ||
4654         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4655         /*
4656          * Emit code to perform further access permissions checks at
4657          * runtime; this may result in an exception.
4658          * Note that on XScale all cp0..c13 registers do an access check
4659          * call in order to handle c15_cpar.
4660          */
4661         gen_set_condexec(s);
4662         gen_update_pc(s, 0);
4663         tcg_ri = tcg_temp_new_ptr();
4664         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
4665                                        tcg_constant_i32(key),
4666                                        tcg_constant_i32(syndrome),
4667                                        tcg_constant_i32(isread));
4668     } else if (ri->type & ARM_CP_RAISES_EXC) {
4669         /*
4670          * The readfn or writefn might raise an exception;
4671          * synchronize the CPU state in case it does.
4672          */
4673         gen_set_condexec(s);
4674         gen_update_pc(s, 0);
4675     }
4676 
4677     /* Handle special cases first */
4678     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4679     case 0:
4680         break;
4681     case ARM_CP_NOP:
4682         return;
4683     case ARM_CP_WFI:
4684         if (isread) {
4685             unallocated_encoding(s);
4686         } else {
4687             gen_update_pc(s, curr_insn_len(s));
4688             s->base.is_jmp = DISAS_WFI;
4689         }
4690         return;
4691     default:
4692         g_assert_not_reached();
4693     }
4694 
4695     if (ri->type & ARM_CP_IO) {
4696         /* I/O operations must end the TB here (whether read or write) */
4697         need_exit_tb = translator_io_start(&s->base);
4698     }
4699 
4700     if (isread) {
4701         /* Read */
4702         if (is64) {
4703             TCGv_i64 tmp64;
4704             TCGv_i32 tmp;
4705             if (ri->type & ARM_CP_CONST) {
4706                 tmp64 = tcg_constant_i64(ri->resetvalue);
4707             } else if (ri->readfn) {
4708                 if (!tcg_ri) {
4709                     tcg_ri = gen_lookup_cp_reg(key);
4710                 }
4711                 tmp64 = tcg_temp_new_i64();
4712                 gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri);
4713             } else {
4714                 tmp64 = tcg_temp_new_i64();
4715                 tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset);
4716             }
4717             tmp = tcg_temp_new_i32();
4718             tcg_gen_extrl_i64_i32(tmp, tmp64);
4719             store_reg(s, rt, tmp);
4720             tmp = tcg_temp_new_i32();
4721             tcg_gen_extrh_i64_i32(tmp, tmp64);
4722             store_reg(s, rt2, tmp);
4723         } else {
4724             TCGv_i32 tmp;
4725             if (ri->type & ARM_CP_CONST) {
4726                 tmp = tcg_constant_i32(ri->resetvalue);
4727             } else if (ri->readfn) {
4728                 if (!tcg_ri) {
4729                     tcg_ri = gen_lookup_cp_reg(key);
4730                 }
4731                 tmp = tcg_temp_new_i32();
4732                 gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri);
4733             } else {
4734                 tmp = load_cpu_offset(ri->fieldoffset);
4735             }
4736             if (rt == 15) {
4737                 /* Destination register of r15 for 32 bit loads sets
4738                  * the condition codes from the high 4 bits of the value
4739                  */
4740                 gen_set_nzcv(tmp);
4741             } else {
4742                 store_reg(s, rt, tmp);
4743             }
4744         }
4745     } else {
4746         /* Write */
4747         if (ri->type & ARM_CP_CONST) {
4748             /* If not forbidden by access permissions, treat as WI */
4749             return;
4750         }
4751 
4752         if (is64) {
4753             TCGv_i32 tmplo, tmphi;
4754             TCGv_i64 tmp64 = tcg_temp_new_i64();
4755             tmplo = load_reg(s, rt);
4756             tmphi = load_reg(s, rt2);
4757             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4758             if (ri->writefn) {
4759                 if (!tcg_ri) {
4760                     tcg_ri = gen_lookup_cp_reg(key);
4761                 }
4762                 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64);
4763             } else {
4764                 tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset);
4765             }
4766         } else {
4767             TCGv_i32 tmp = load_reg(s, rt);
4768             if (ri->writefn) {
4769                 if (!tcg_ri) {
4770                     tcg_ri = gen_lookup_cp_reg(key);
4771                 }
4772                 gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp);
4773             } else {
4774                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4775             }
4776         }
4777     }
4778 
4779     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4780         /*
4781          * A write to any coprocessor register that ends a TB
4782          * must rebuild the hflags for the next TB.
4783          */
4784         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4785         /*
4786          * We default to ending the TB on a coprocessor register write,
4787          * but allow this to be suppressed by the register definition
4788          * (usually only necessary to work around guest bugs).
4789          */
4790         need_exit_tb = true;
4791     }
4792     if (need_exit_tb) {
4793         gen_lookup_tb(s);
4794     }
4795 }
4796 
4797 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4798 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4799 {
4800     int cpnum = (insn >> 8) & 0xf;
4801 
4802     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4803         unallocated_encoding(s);
4804     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4805         if (disas_iwmmxt_insn(s, insn)) {
4806             unallocated_encoding(s);
4807         }
4808     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4809         if (disas_dsp_insn(s, insn)) {
4810             unallocated_encoding(s);
4811         }
4812     }
4813 }
4814 
4815 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4816 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4817 {
4818     TCGv_i32 tmp;
4819     tmp = tcg_temp_new_i32();
4820     tcg_gen_extrl_i64_i32(tmp, val);
4821     store_reg(s, rlow, tmp);
4822     tmp = tcg_temp_new_i32();
4823     tcg_gen_extrh_i64_i32(tmp, val);
4824     store_reg(s, rhigh, tmp);
4825 }
4826 
4827 /* load and add a 64-bit value from a register pair.  */
4828 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4829 {
4830     TCGv_i64 tmp;
4831     TCGv_i32 tmpl;
4832     TCGv_i32 tmph;
4833 
4834     /* Load 64-bit value rd:rn.  */
4835     tmpl = load_reg(s, rlow);
4836     tmph = load_reg(s, rhigh);
4837     tmp = tcg_temp_new_i64();
4838     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4839     tcg_gen_add_i64(val, val, tmp);
4840 }
4841 
4842 /* Set N and Z flags from hi|lo.  */
4843 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4844 {
4845     tcg_gen_mov_i32(cpu_NF, hi);
4846     tcg_gen_or_i32(cpu_ZF, lo, hi);
4847 }
4848 
4849 /* Load/Store exclusive instructions are implemented by remembering
4850    the value/address loaded, and seeing if these are the same
4851    when the store is performed.  This should be sufficient to implement
4852    the architecturally mandated semantics, and avoids having to monitor
4853    regular stores.  The compare vs the remembered value is done during
4854    the cmpxchg operation, but we must compare the addresses manually.  */
4855 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4856                                TCGv_i32 addr, int size)
4857 {
4858     TCGv_i32 tmp = tcg_temp_new_i32();
4859     MemOp opc = size | MO_ALIGN | s->be_data;
4860 
4861     s->is_ldex = true;
4862 
4863     if (size == 3) {
4864         TCGv_i32 tmp2 = tcg_temp_new_i32();
4865         TCGv_i64 t64 = tcg_temp_new_i64();
4866 
4867         /*
4868          * For AArch32, architecturally the 32-bit word at the lowest
4869          * address is always Rt and the one at addr+4 is Rt2, even if
4870          * the CPU is big-endian. That means we don't want to do a
4871          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4872          * architecturally 64-bit access, but instead do a 64-bit access
4873          * using MO_BE if appropriate and then split the two halves.
4874          */
4875         TCGv taddr = gen_aa32_addr(s, addr, opc);
4876 
4877         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4878         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4879         if (s->be_data == MO_BE) {
4880             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4881         } else {
4882             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4883         }
4884         store_reg(s, rt2, tmp2);
4885     } else {
4886         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4887         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4888     }
4889 
4890     store_reg(s, rt, tmp);
4891     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4892 }
4893 
4894 static void gen_clrex(DisasContext *s)
4895 {
4896     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4897 }
4898 
4899 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4900                                 TCGv_i32 addr, int size)
4901 {
4902     TCGv_i32 t0, t1, t2;
4903     TCGv_i64 extaddr;
4904     TCGv taddr;
4905     TCGLabel *done_label;
4906     TCGLabel *fail_label;
4907     MemOp opc = size | MO_ALIGN | s->be_data;
4908 
4909     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4910          [addr] = {Rt};
4911          {Rd} = 0;
4912        } else {
4913          {Rd} = 1;
4914        } */
4915     fail_label = gen_new_label();
4916     done_label = gen_new_label();
4917     extaddr = tcg_temp_new_i64();
4918     tcg_gen_extu_i32_i64(extaddr, addr);
4919     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4920 
4921     taddr = gen_aa32_addr(s, addr, opc);
4922     t0 = tcg_temp_new_i32();
4923     t1 = load_reg(s, rt);
4924     if (size == 3) {
4925         TCGv_i64 o64 = tcg_temp_new_i64();
4926         TCGv_i64 n64 = tcg_temp_new_i64();
4927 
4928         t2 = load_reg(s, rt2);
4929 
4930         /*
4931          * For AArch32, architecturally the 32-bit word at the lowest
4932          * address is always Rt and the one at addr+4 is Rt2, even if
4933          * the CPU is big-endian. Since we're going to treat this as a
4934          * single 64-bit BE store, we need to put the two halves in the
4935          * opposite order for BE to LE, so that they end up in the right
4936          * places.  We don't want gen_aa32_st_i64, because that checks
4937          * SCTLR_B as if for an architectural 64-bit access.
4938          */
4939         if (s->be_data == MO_BE) {
4940             tcg_gen_concat_i32_i64(n64, t2, t1);
4941         } else {
4942             tcg_gen_concat_i32_i64(n64, t1, t2);
4943         }
4944 
4945         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4946                                    get_mem_index(s), opc);
4947 
4948         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4949         tcg_gen_extrl_i64_i32(t0, o64);
4950     } else {
4951         t2 = tcg_temp_new_i32();
4952         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4953         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4954         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4955     }
4956     tcg_gen_mov_i32(cpu_R[rd], t0);
4957     tcg_gen_br(done_label);
4958 
4959     gen_set_label(fail_label);
4960     tcg_gen_movi_i32(cpu_R[rd], 1);
4961     gen_set_label(done_label);
4962     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4963 }
4964 
4965 /* gen_srs:
4966  * @env: CPUARMState
4967  * @s: DisasContext
4968  * @mode: mode field from insn (which stack to store to)
4969  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4970  * @writeback: true if writeback bit set
4971  *
4972  * Generate code for the SRS (Store Return State) insn.
4973  */
4974 static void gen_srs(DisasContext *s,
4975                     uint32_t mode, uint32_t amode, bool writeback)
4976 {
4977     int32_t offset;
4978     TCGv_i32 addr, tmp;
4979     bool undef = false;
4980 
4981     /* SRS is:
4982      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4983      *   and specified mode is monitor mode
4984      * - UNDEFINED in Hyp mode
4985      * - UNPREDICTABLE in User or System mode
4986      * - UNPREDICTABLE if the specified mode is:
4987      * -- not implemented
4988      * -- not a valid mode number
4989      * -- a mode that's at a higher exception level
4990      * -- Monitor, if we are Non-secure
4991      * For the UNPREDICTABLE cases we choose to UNDEF.
4992      */
4993     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4994         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4995         return;
4996     }
4997 
4998     if (s->current_el == 0 || s->current_el == 2) {
4999         undef = true;
5000     }
5001 
5002     switch (mode) {
5003     case ARM_CPU_MODE_USR:
5004     case ARM_CPU_MODE_FIQ:
5005     case ARM_CPU_MODE_IRQ:
5006     case ARM_CPU_MODE_SVC:
5007     case ARM_CPU_MODE_ABT:
5008     case ARM_CPU_MODE_UND:
5009     case ARM_CPU_MODE_SYS:
5010         break;
5011     case ARM_CPU_MODE_HYP:
5012         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5013             undef = true;
5014         }
5015         break;
5016     case ARM_CPU_MODE_MON:
5017         /* No need to check specifically for "are we non-secure" because
5018          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5019          * so if this isn't EL3 then we must be non-secure.
5020          */
5021         if (s->current_el != 3) {
5022             undef = true;
5023         }
5024         break;
5025     default:
5026         undef = true;
5027     }
5028 
5029     if (undef) {
5030         unallocated_encoding(s);
5031         return;
5032     }
5033 
5034     addr = tcg_temp_new_i32();
5035     /* get_r13_banked() will raise an exception if called from System mode */
5036     gen_set_condexec(s);
5037     gen_update_pc(s, 0);
5038     gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode));
5039     switch (amode) {
5040     case 0: /* DA */
5041         offset = -4;
5042         break;
5043     case 1: /* IA */
5044         offset = 0;
5045         break;
5046     case 2: /* DB */
5047         offset = -8;
5048         break;
5049     case 3: /* IB */
5050         offset = 4;
5051         break;
5052     default:
5053         g_assert_not_reached();
5054     }
5055     tcg_gen_addi_i32(addr, addr, offset);
5056     tmp = load_reg(s, 14);
5057     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5058     tmp = load_cpu_field(spsr);
5059     tcg_gen_addi_i32(addr, addr, 4);
5060     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5061     if (writeback) {
5062         switch (amode) {
5063         case 0:
5064             offset = -8;
5065             break;
5066         case 1:
5067             offset = 4;
5068             break;
5069         case 2:
5070             offset = -4;
5071             break;
5072         case 3:
5073             offset = 0;
5074             break;
5075         default:
5076             g_assert_not_reached();
5077         }
5078         tcg_gen_addi_i32(addr, addr, offset);
5079         gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr);
5080     }
5081     s->base.is_jmp = DISAS_UPDATE_EXIT;
5082 }
5083 
5084 /* Skip this instruction if the ARM condition is false */
5085 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5086 {
5087     arm_gen_condlabel(s);
5088     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5089 }
5090 
5091 
5092 /*
5093  * Constant expanders used by T16/T32 decode
5094  */
5095 
5096 /* Return only the rotation part of T32ExpandImm.  */
5097 static int t32_expandimm_rot(DisasContext *s, int x)
5098 {
5099     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5100 }
5101 
5102 /* Return the unrotated immediate from T32ExpandImm.  */
5103 static int t32_expandimm_imm(DisasContext *s, int x)
5104 {
5105     int imm = extract32(x, 0, 8);
5106 
5107     switch (extract32(x, 8, 4)) {
5108     case 0: /* XY */
5109         /* Nothing to do.  */
5110         break;
5111     case 1: /* 00XY00XY */
5112         imm *= 0x00010001;
5113         break;
5114     case 2: /* XY00XY00 */
5115         imm *= 0x01000100;
5116         break;
5117     case 3: /* XYXYXYXY */
5118         imm *= 0x01010101;
5119         break;
5120     default:
5121         /* Rotated constant.  */
5122         imm |= 0x80;
5123         break;
5124     }
5125     return imm;
5126 }
5127 
5128 static int t32_branch24(DisasContext *s, int x)
5129 {
5130     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5131     x ^= !(x < 0) * (3 << 21);
5132     /* Append the final zero.  */
5133     return x << 1;
5134 }
5135 
5136 static int t16_setflags(DisasContext *s)
5137 {
5138     return s->condexec_mask == 0;
5139 }
5140 
5141 static int t16_push_list(DisasContext *s, int x)
5142 {
5143     return (x & 0xff) | (x & 0x100) << (14 - 8);
5144 }
5145 
5146 static int t16_pop_list(DisasContext *s, int x)
5147 {
5148     return (x & 0xff) | (x & 0x100) << (15 - 8);
5149 }
5150 
5151 /*
5152  * Include the generated decoders.
5153  */
5154 
5155 #include "decode-a32.c.inc"
5156 #include "decode-a32-uncond.c.inc"
5157 #include "decode-t32.c.inc"
5158 #include "decode-t16.c.inc"
5159 
5160 static bool valid_cp(DisasContext *s, int cp)
5161 {
5162     /*
5163      * Return true if this coprocessor field indicates something
5164      * that's really a possible coprocessor.
5165      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5166      * and of those only cp14 and cp15 were used for registers.
5167      * cp10 and cp11 were used for VFP and Neon, whose decode is
5168      * dealt with elsewhere. With the advent of fp16, cp9 is also
5169      * now part of VFP.
5170      * For v8A and later, the encoding has been tightened so that
5171      * only cp14 and cp15 are valid, and other values aren't considered
5172      * to be in the coprocessor-instruction space at all. v8M still
5173      * permits coprocessors 0..7.
5174      * For XScale, we must not decode the XScale cp0, cp1 space as
5175      * a standard coprocessor insn, because we want to fall through to
5176      * the legacy disas_xscale_insn() decoder after decodetree is done.
5177      */
5178     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5179         return false;
5180     }
5181 
5182     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5183         !arm_dc_feature(s, ARM_FEATURE_M)) {
5184         return cp >= 14;
5185     }
5186     return cp < 8 || cp >= 14;
5187 }
5188 
5189 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5190 {
5191     if (!valid_cp(s, a->cp)) {
5192         return false;
5193     }
5194     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5195                    false, a->rt, 0);
5196     return true;
5197 }
5198 
5199 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5200 {
5201     if (!valid_cp(s, a->cp)) {
5202         return false;
5203     }
5204     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5205                    true, a->rt, 0);
5206     return true;
5207 }
5208 
5209 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5210 {
5211     if (!valid_cp(s, a->cp)) {
5212         return false;
5213     }
5214     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5215                    false, a->rt, a->rt2);
5216     return true;
5217 }
5218 
5219 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5220 {
5221     if (!valid_cp(s, a->cp)) {
5222         return false;
5223     }
5224     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5225                    true, a->rt, a->rt2);
5226     return true;
5227 }
5228 
5229 /* Helpers to swap operands for reverse-subtract.  */
5230 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5231 {
5232     tcg_gen_sub_i32(dst, b, a);
5233 }
5234 
5235 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5236 {
5237     gen_sub_CC(dst, b, a);
5238 }
5239 
5240 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5241 {
5242     gen_sub_carry(dest, b, a);
5243 }
5244 
5245 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5246 {
5247     gen_sbc_CC(dest, b, a);
5248 }
5249 
5250 /*
5251  * Helpers for the data processing routines.
5252  *
5253  * After the computation store the results back.
5254  * This may be suppressed altogether (STREG_NONE), require a runtime
5255  * check against the stack limits (STREG_SP_CHECK), or generate an
5256  * exception return.  Oh, or store into a register.
5257  *
5258  * Always return true, indicating success for a trans_* function.
5259  */
5260 typedef enum {
5261    STREG_NONE,
5262    STREG_NORMAL,
5263    STREG_SP_CHECK,
5264    STREG_EXC_RET,
5265 } StoreRegKind;
5266 
5267 static bool store_reg_kind(DisasContext *s, int rd,
5268                             TCGv_i32 val, StoreRegKind kind)
5269 {
5270     switch (kind) {
5271     case STREG_NONE:
5272         return true;
5273     case STREG_NORMAL:
5274         /* See ALUWritePC: Interworking only from a32 mode. */
5275         if (s->thumb) {
5276             store_reg(s, rd, val);
5277         } else {
5278             store_reg_bx(s, rd, val);
5279         }
5280         return true;
5281     case STREG_SP_CHECK:
5282         store_sp_checked(s, val);
5283         return true;
5284     case STREG_EXC_RET:
5285         gen_exception_return(s, val);
5286         return true;
5287     }
5288     g_assert_not_reached();
5289 }
5290 
5291 /*
5292  * Data Processing (register)
5293  *
5294  * Operate, with set flags, one register source,
5295  * one immediate shifted register source, and a destination.
5296  */
5297 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5298                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5299                          int logic_cc, StoreRegKind kind)
5300 {
5301     TCGv_i32 tmp1, tmp2;
5302 
5303     tmp2 = load_reg(s, a->rm);
5304     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5305     tmp1 = load_reg(s, a->rn);
5306 
5307     gen(tmp1, tmp1, tmp2);
5308 
5309     if (logic_cc) {
5310         gen_logic_CC(tmp1);
5311     }
5312     return store_reg_kind(s, a->rd, tmp1, kind);
5313 }
5314 
5315 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5316                          void (*gen)(TCGv_i32, TCGv_i32),
5317                          int logic_cc, StoreRegKind kind)
5318 {
5319     TCGv_i32 tmp;
5320 
5321     tmp = load_reg(s, a->rm);
5322     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5323 
5324     gen(tmp, tmp);
5325     if (logic_cc) {
5326         gen_logic_CC(tmp);
5327     }
5328     return store_reg_kind(s, a->rd, tmp, kind);
5329 }
5330 
5331 /*
5332  * Data-processing (register-shifted register)
5333  *
5334  * Operate, with set flags, one register source,
5335  * one register shifted register source, and a destination.
5336  */
5337 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5338                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5339                          int logic_cc, StoreRegKind kind)
5340 {
5341     TCGv_i32 tmp1, tmp2;
5342 
5343     tmp1 = load_reg(s, a->rs);
5344     tmp2 = load_reg(s, a->rm);
5345     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5346     tmp1 = load_reg(s, a->rn);
5347 
5348     gen(tmp1, tmp1, tmp2);
5349 
5350     if (logic_cc) {
5351         gen_logic_CC(tmp1);
5352     }
5353     return store_reg_kind(s, a->rd, tmp1, kind);
5354 }
5355 
5356 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5357                          void (*gen)(TCGv_i32, TCGv_i32),
5358                          int logic_cc, StoreRegKind kind)
5359 {
5360     TCGv_i32 tmp1, tmp2;
5361 
5362     tmp1 = load_reg(s, a->rs);
5363     tmp2 = load_reg(s, a->rm);
5364     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5365 
5366     gen(tmp2, tmp2);
5367     if (logic_cc) {
5368         gen_logic_CC(tmp2);
5369     }
5370     return store_reg_kind(s, a->rd, tmp2, kind);
5371 }
5372 
5373 /*
5374  * Data-processing (immediate)
5375  *
5376  * Operate, with set flags, one register source,
5377  * one rotated immediate, and a destination.
5378  *
5379  * Note that logic_cc && a->rot setting CF based on the msb of the
5380  * immediate is the reason why we must pass in the unrotated form
5381  * of the immediate.
5382  */
5383 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5384                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5385                          int logic_cc, StoreRegKind kind)
5386 {
5387     TCGv_i32 tmp1;
5388     uint32_t imm;
5389 
5390     imm = ror32(a->imm, a->rot);
5391     if (logic_cc && a->rot) {
5392         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5393     }
5394     tmp1 = load_reg(s, a->rn);
5395 
5396     gen(tmp1, tmp1, tcg_constant_i32(imm));
5397 
5398     if (logic_cc) {
5399         gen_logic_CC(tmp1);
5400     }
5401     return store_reg_kind(s, a->rd, tmp1, kind);
5402 }
5403 
5404 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5405                          void (*gen)(TCGv_i32, TCGv_i32),
5406                          int logic_cc, StoreRegKind kind)
5407 {
5408     TCGv_i32 tmp;
5409     uint32_t imm;
5410 
5411     imm = ror32(a->imm, a->rot);
5412     if (logic_cc && a->rot) {
5413         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5414     }
5415 
5416     tmp = tcg_temp_new_i32();
5417     gen(tmp, tcg_constant_i32(imm));
5418 
5419     if (logic_cc) {
5420         gen_logic_CC(tmp);
5421     }
5422     return store_reg_kind(s, a->rd, tmp, kind);
5423 }
5424 
5425 #define DO_ANY3(NAME, OP, L, K)                                         \
5426     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5427     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5428     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5429     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5430     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5431     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5432 
5433 #define DO_ANY2(NAME, OP, L, K)                                         \
5434     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5435     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5436     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5437     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5438     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5439     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5440 
5441 #define DO_CMP2(NAME, OP, L)                                            \
5442     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5443     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5444     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5445     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5446     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5447     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5448 
5449 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5450 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5451 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5452 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5453 
5454 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5455 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5456 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5457 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5458 
5459 DO_CMP2(TST, tcg_gen_and_i32, true)
5460 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5461 DO_CMP2(CMN, gen_add_CC, false)
5462 DO_CMP2(CMP, gen_sub_CC, false)
5463 
5464 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5465         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5466 
5467 /*
5468  * Note for the computation of StoreRegKind we return out of the
5469  * middle of the functions that are expanded by DO_ANY3, and that
5470  * we modify a->s via that parameter before it is used by OP.
5471  */
5472 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5473         ({
5474             StoreRegKind ret = STREG_NORMAL;
5475             if (a->rd == 15 && a->s) {
5476                 /*
5477                  * See ALUExceptionReturn:
5478                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5479                  * In Hyp mode, UNDEFINED.
5480                  */
5481                 if (IS_USER(s) || s->current_el == 2) {
5482                     unallocated_encoding(s);
5483                     return true;
5484                 }
5485                 /* There is no writeback of nzcv to PSTATE.  */
5486                 a->s = 0;
5487                 ret = STREG_EXC_RET;
5488             } else if (a->rd == 13 && a->rn == 13) {
5489                 ret = STREG_SP_CHECK;
5490             }
5491             ret;
5492         }))
5493 
5494 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5495         ({
5496             StoreRegKind ret = STREG_NORMAL;
5497             if (a->rd == 15 && a->s) {
5498                 /*
5499                  * See ALUExceptionReturn:
5500                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5501                  * In Hyp mode, UNDEFINED.
5502                  */
5503                 if (IS_USER(s) || s->current_el == 2) {
5504                     unallocated_encoding(s);
5505                     return true;
5506                 }
5507                 /* There is no writeback of nzcv to PSTATE.  */
5508                 a->s = 0;
5509                 ret = STREG_EXC_RET;
5510             } else if (a->rd == 13) {
5511                 ret = STREG_SP_CHECK;
5512             }
5513             ret;
5514         }))
5515 
5516 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5517 
5518 /*
5519  * ORN is only available with T32, so there is no register-shifted-register
5520  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5521  */
5522 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5523 {
5524     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5525 }
5526 
5527 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5528 {
5529     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5530 }
5531 
5532 #undef DO_ANY3
5533 #undef DO_ANY2
5534 #undef DO_CMP2
5535 
5536 static bool trans_ADR(DisasContext *s, arg_ri *a)
5537 {
5538     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5539     return true;
5540 }
5541 
5542 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5543 {
5544     if (!ENABLE_ARCH_6T2) {
5545         return false;
5546     }
5547 
5548     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5549     return true;
5550 }
5551 
5552 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5553 {
5554     TCGv_i32 tmp;
5555 
5556     if (!ENABLE_ARCH_6T2) {
5557         return false;
5558     }
5559 
5560     tmp = load_reg(s, a->rd);
5561     tcg_gen_ext16u_i32(tmp, tmp);
5562     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5563     store_reg(s, a->rd, tmp);
5564     return true;
5565 }
5566 
5567 /*
5568  * v8.1M MVE wide-shifts
5569  */
5570 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5571                           WideShiftImmFn *fn)
5572 {
5573     TCGv_i64 rda;
5574     TCGv_i32 rdalo, rdahi;
5575 
5576     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5577         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5578         return false;
5579     }
5580     if (a->rdahi == 15) {
5581         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5582         return false;
5583     }
5584     if (!dc_isar_feature(aa32_mve, s) ||
5585         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5586         a->rdahi == 13) {
5587         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5588         unallocated_encoding(s);
5589         return true;
5590     }
5591 
5592     if (a->shim == 0) {
5593         a->shim = 32;
5594     }
5595 
5596     rda = tcg_temp_new_i64();
5597     rdalo = load_reg(s, a->rdalo);
5598     rdahi = load_reg(s, a->rdahi);
5599     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5600 
5601     fn(rda, rda, a->shim);
5602 
5603     tcg_gen_extrl_i64_i32(rdalo, rda);
5604     tcg_gen_extrh_i64_i32(rdahi, rda);
5605     store_reg(s, a->rdalo, rdalo);
5606     store_reg(s, a->rdahi, rdahi);
5607 
5608     return true;
5609 }
5610 
5611 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5612 {
5613     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5614 }
5615 
5616 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5617 {
5618     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5619 }
5620 
5621 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5622 {
5623     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5624 }
5625 
5626 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5627 {
5628     gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift));
5629 }
5630 
5631 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5632 {
5633     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5634 }
5635 
5636 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5637 {
5638     gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift));
5639 }
5640 
5641 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5642 {
5643     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5644 }
5645 
5646 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5647 {
5648     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5649 }
5650 
5651 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5652 {
5653     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5654 }
5655 
5656 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5657 {
5658     TCGv_i64 rda;
5659     TCGv_i32 rdalo, rdahi;
5660 
5661     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5662         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5663         return false;
5664     }
5665     if (a->rdahi == 15) {
5666         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5667         return false;
5668     }
5669     if (!dc_isar_feature(aa32_mve, s) ||
5670         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5671         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5672         a->rm == a->rdahi || a->rm == a->rdalo) {
5673         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5674         unallocated_encoding(s);
5675         return true;
5676     }
5677 
5678     rda = tcg_temp_new_i64();
5679     rdalo = load_reg(s, a->rdalo);
5680     rdahi = load_reg(s, a->rdahi);
5681     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5682 
5683     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5684     fn(rda, tcg_env, rda, cpu_R[a->rm]);
5685 
5686     tcg_gen_extrl_i64_i32(rdalo, rda);
5687     tcg_gen_extrh_i64_i32(rdahi, rda);
5688     store_reg(s, a->rdalo, rdalo);
5689     store_reg(s, a->rdahi, rdahi);
5690 
5691     return true;
5692 }
5693 
5694 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5695 {
5696     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5697 }
5698 
5699 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5700 {
5701     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5702 }
5703 
5704 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5705 {
5706     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5707 }
5708 
5709 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5710 {
5711     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5712 }
5713 
5714 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5715 {
5716     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5717 }
5718 
5719 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5720 {
5721     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5722 }
5723 
5724 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5725 {
5726     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5727         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5728         return false;
5729     }
5730     if (!dc_isar_feature(aa32_mve, s) ||
5731         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5732         a->rda == 13 || a->rda == 15) {
5733         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5734         unallocated_encoding(s);
5735         return true;
5736     }
5737 
5738     if (a->shim == 0) {
5739         a->shim = 32;
5740     }
5741     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5742 
5743     return true;
5744 }
5745 
5746 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5747 {
5748     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5749 }
5750 
5751 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5752 {
5753     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5754 }
5755 
5756 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5757 {
5758     gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift));
5759 }
5760 
5761 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5762 {
5763     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5764 }
5765 
5766 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5767 {
5768     gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift));
5769 }
5770 
5771 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5772 {
5773     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5774 }
5775 
5776 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5777 {
5778     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5779         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5780         return false;
5781     }
5782     if (!dc_isar_feature(aa32_mve, s) ||
5783         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5784         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5785         a->rm == a->rda) {
5786         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5787         unallocated_encoding(s);
5788         return true;
5789     }
5790 
5791     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5792     fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]);
5793     return true;
5794 }
5795 
5796 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5797 {
5798     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5799 }
5800 
5801 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5802 {
5803     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5804 }
5805 
5806 /*
5807  * Multiply and multiply accumulate
5808  */
5809 
5810 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5811 {
5812     TCGv_i32 t1, t2;
5813 
5814     t1 = load_reg(s, a->rn);
5815     t2 = load_reg(s, a->rm);
5816     tcg_gen_mul_i32(t1, t1, t2);
5817     if (add) {
5818         t2 = load_reg(s, a->ra);
5819         tcg_gen_add_i32(t1, t1, t2);
5820     }
5821     if (a->s) {
5822         gen_logic_CC(t1);
5823     }
5824     store_reg(s, a->rd, t1);
5825     return true;
5826 }
5827 
5828 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5829 {
5830     return op_mla(s, a, false);
5831 }
5832 
5833 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5834 {
5835     return op_mla(s, a, true);
5836 }
5837 
5838 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5839 {
5840     TCGv_i32 t1, t2;
5841 
5842     if (!ENABLE_ARCH_6T2) {
5843         return false;
5844     }
5845     t1 = load_reg(s, a->rn);
5846     t2 = load_reg(s, a->rm);
5847     tcg_gen_mul_i32(t1, t1, t2);
5848     t2 = load_reg(s, a->ra);
5849     tcg_gen_sub_i32(t1, t2, t1);
5850     store_reg(s, a->rd, t1);
5851     return true;
5852 }
5853 
5854 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5855 {
5856     TCGv_i32 t0, t1, t2, t3;
5857 
5858     t0 = load_reg(s, a->rm);
5859     t1 = load_reg(s, a->rn);
5860     if (uns) {
5861         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5862     } else {
5863         tcg_gen_muls2_i32(t0, t1, t0, t1);
5864     }
5865     if (add) {
5866         t2 = load_reg(s, a->ra);
5867         t3 = load_reg(s, a->rd);
5868         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5869     }
5870     if (a->s) {
5871         gen_logicq_cc(t0, t1);
5872     }
5873     store_reg(s, a->ra, t0);
5874     store_reg(s, a->rd, t1);
5875     return true;
5876 }
5877 
5878 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5879 {
5880     return op_mlal(s, a, true, false);
5881 }
5882 
5883 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5884 {
5885     return op_mlal(s, a, false, false);
5886 }
5887 
5888 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5889 {
5890     return op_mlal(s, a, true, true);
5891 }
5892 
5893 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5894 {
5895     return op_mlal(s, a, false, true);
5896 }
5897 
5898 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5899 {
5900     TCGv_i32 t0, t1, t2, zero;
5901 
5902     if (s->thumb
5903         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5904         : !ENABLE_ARCH_6) {
5905         return false;
5906     }
5907 
5908     t0 = load_reg(s, a->rm);
5909     t1 = load_reg(s, a->rn);
5910     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5911     zero = tcg_constant_i32(0);
5912     t2 = load_reg(s, a->ra);
5913     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5914     t2 = load_reg(s, a->rd);
5915     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5916     store_reg(s, a->ra, t0);
5917     store_reg(s, a->rd, t1);
5918     return true;
5919 }
5920 
5921 /*
5922  * Saturating addition and subtraction
5923  */
5924 
5925 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5926 {
5927     TCGv_i32 t0, t1;
5928 
5929     if (s->thumb
5930         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5931         : !ENABLE_ARCH_5TE) {
5932         return false;
5933     }
5934 
5935     t0 = load_reg(s, a->rm);
5936     t1 = load_reg(s, a->rn);
5937     if (doub) {
5938         gen_helper_add_saturate(t1, tcg_env, t1, t1);
5939     }
5940     if (add) {
5941         gen_helper_add_saturate(t0, tcg_env, t0, t1);
5942     } else {
5943         gen_helper_sub_saturate(t0, tcg_env, t0, t1);
5944     }
5945     store_reg(s, a->rd, t0);
5946     return true;
5947 }
5948 
5949 #define DO_QADDSUB(NAME, ADD, DOUB) \
5950 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5951 {                                                        \
5952     return op_qaddsub(s, a, ADD, DOUB);                  \
5953 }
5954 
5955 DO_QADDSUB(QADD, true, false)
5956 DO_QADDSUB(QSUB, false, false)
5957 DO_QADDSUB(QDADD, true, true)
5958 DO_QADDSUB(QDSUB, false, true)
5959 
5960 #undef DO_QADDSUB
5961 
5962 /*
5963  * Halfword multiply and multiply accumulate
5964  */
5965 
5966 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5967                        int add_long, bool nt, bool mt)
5968 {
5969     TCGv_i32 t0, t1, tl, th;
5970 
5971     if (s->thumb
5972         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5973         : !ENABLE_ARCH_5TE) {
5974         return false;
5975     }
5976 
5977     t0 = load_reg(s, a->rn);
5978     t1 = load_reg(s, a->rm);
5979     gen_mulxy(t0, t1, nt, mt);
5980 
5981     switch (add_long) {
5982     case 0:
5983         store_reg(s, a->rd, t0);
5984         break;
5985     case 1:
5986         t1 = load_reg(s, a->ra);
5987         gen_helper_add_setq(t0, tcg_env, t0, t1);
5988         store_reg(s, a->rd, t0);
5989         break;
5990     case 2:
5991         tl = load_reg(s, a->ra);
5992         th = load_reg(s, a->rd);
5993         /* Sign-extend the 32-bit product to 64 bits.  */
5994         t1 = tcg_temp_new_i32();
5995         tcg_gen_sari_i32(t1, t0, 31);
5996         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5997         store_reg(s, a->ra, tl);
5998         store_reg(s, a->rd, th);
5999         break;
6000     default:
6001         g_assert_not_reached();
6002     }
6003     return true;
6004 }
6005 
6006 #define DO_SMLAX(NAME, add, nt, mt) \
6007 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6008 {                                                          \
6009     return op_smlaxxx(s, a, add, nt, mt);                  \
6010 }
6011 
6012 DO_SMLAX(SMULBB, 0, 0, 0)
6013 DO_SMLAX(SMULBT, 0, 0, 1)
6014 DO_SMLAX(SMULTB, 0, 1, 0)
6015 DO_SMLAX(SMULTT, 0, 1, 1)
6016 
6017 DO_SMLAX(SMLABB, 1, 0, 0)
6018 DO_SMLAX(SMLABT, 1, 0, 1)
6019 DO_SMLAX(SMLATB, 1, 1, 0)
6020 DO_SMLAX(SMLATT, 1, 1, 1)
6021 
6022 DO_SMLAX(SMLALBB, 2, 0, 0)
6023 DO_SMLAX(SMLALBT, 2, 0, 1)
6024 DO_SMLAX(SMLALTB, 2, 1, 0)
6025 DO_SMLAX(SMLALTT, 2, 1, 1)
6026 
6027 #undef DO_SMLAX
6028 
6029 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6030 {
6031     TCGv_i32 t0, t1;
6032 
6033     if (!ENABLE_ARCH_5TE) {
6034         return false;
6035     }
6036 
6037     t0 = load_reg(s, a->rn);
6038     t1 = load_reg(s, a->rm);
6039     /*
6040      * Since the nominal result is product<47:16>, shift the 16-bit
6041      * input up by 16 bits, so that the result is at product<63:32>.
6042      */
6043     if (mt) {
6044         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6045     } else {
6046         tcg_gen_shli_i32(t1, t1, 16);
6047     }
6048     tcg_gen_muls2_i32(t0, t1, t0, t1);
6049     if (add) {
6050         t0 = load_reg(s, a->ra);
6051         gen_helper_add_setq(t1, tcg_env, t1, t0);
6052     }
6053     store_reg(s, a->rd, t1);
6054     return true;
6055 }
6056 
6057 #define DO_SMLAWX(NAME, add, mt) \
6058 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6059 {                                                          \
6060     return op_smlawx(s, a, add, mt);                       \
6061 }
6062 
6063 DO_SMLAWX(SMULWB, 0, 0)
6064 DO_SMLAWX(SMULWT, 0, 1)
6065 DO_SMLAWX(SMLAWB, 1, 0)
6066 DO_SMLAWX(SMLAWT, 1, 1)
6067 
6068 #undef DO_SMLAWX
6069 
6070 /*
6071  * MSR (immediate) and hints
6072  */
6073 
6074 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6075 {
6076     /*
6077      * When running single-threaded TCG code, use the helper to ensure that
6078      * the next round-robin scheduled vCPU gets a crack.  When running in
6079      * MTTCG we don't generate jumps to the helper as it won't affect the
6080      * scheduling of other vCPUs.
6081      */
6082     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6083         gen_update_pc(s, curr_insn_len(s));
6084         s->base.is_jmp = DISAS_YIELD;
6085     }
6086     return true;
6087 }
6088 
6089 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6090 {
6091     /*
6092      * When running single-threaded TCG code, use the helper to ensure that
6093      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6094      * just skip this instruction.  Currently the SEV/SEVL instructions,
6095      * which are *one* of many ways to wake the CPU from WFE, are not
6096      * implemented so we can't sleep like WFI does.
6097      */
6098     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6099         gen_update_pc(s, curr_insn_len(s));
6100         s->base.is_jmp = DISAS_WFE;
6101     }
6102     return true;
6103 }
6104 
6105 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6106 {
6107     /* For WFI, halt the vCPU until an IRQ. */
6108     gen_update_pc(s, curr_insn_len(s));
6109     s->base.is_jmp = DISAS_WFI;
6110     return true;
6111 }
6112 
6113 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6114 {
6115     /*
6116      * For M-profile, minimal-RAS ESB can be a NOP.
6117      * Without RAS, we must implement this as NOP.
6118      */
6119     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6120         /*
6121          * QEMU does not have a source of physical SErrors,
6122          * so we are only concerned with virtual SErrors.
6123          * The pseudocode in the ARM for this case is
6124          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6125          *      AArch32.vESBOperation();
6126          * Most of the condition can be evaluated at translation time.
6127          * Test for EL2 present, and defer test for SEL2 to runtime.
6128          */
6129         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6130             gen_helper_vesb(tcg_env);
6131         }
6132     }
6133     return true;
6134 }
6135 
6136 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6137 {
6138     return true;
6139 }
6140 
6141 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6142 {
6143     uint32_t val = ror32(a->imm, a->rot * 2);
6144     uint32_t mask = msr_mask(s, a->mask, a->r);
6145 
6146     if (gen_set_psr_im(s, mask, a->r, val)) {
6147         unallocated_encoding(s);
6148     }
6149     return true;
6150 }
6151 
6152 /*
6153  * Cyclic Redundancy Check
6154  */
6155 
6156 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6157 {
6158     TCGv_i32 t1, t2, t3;
6159 
6160     if (!dc_isar_feature(aa32_crc32, s)) {
6161         return false;
6162     }
6163 
6164     t1 = load_reg(s, a->rn);
6165     t2 = load_reg(s, a->rm);
6166     switch (sz) {
6167     case MO_8:
6168         gen_uxtb(t2);
6169         break;
6170     case MO_16:
6171         gen_uxth(t2);
6172         break;
6173     case MO_32:
6174         break;
6175     default:
6176         g_assert_not_reached();
6177     }
6178     t3 = tcg_constant_i32(1 << sz);
6179     if (c) {
6180         gen_helper_crc32c(t1, t1, t2, t3);
6181     } else {
6182         gen_helper_crc32(t1, t1, t2, t3);
6183     }
6184     store_reg(s, a->rd, t1);
6185     return true;
6186 }
6187 
6188 #define DO_CRC32(NAME, c, sz) \
6189 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6190     { return op_crc32(s, a, c, sz); }
6191 
6192 DO_CRC32(CRC32B, false, MO_8)
6193 DO_CRC32(CRC32H, false, MO_16)
6194 DO_CRC32(CRC32W, false, MO_32)
6195 DO_CRC32(CRC32CB, true, MO_8)
6196 DO_CRC32(CRC32CH, true, MO_16)
6197 DO_CRC32(CRC32CW, true, MO_32)
6198 
6199 #undef DO_CRC32
6200 
6201 /*
6202  * Miscellaneous instructions
6203  */
6204 
6205 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6206 {
6207     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6208         return false;
6209     }
6210     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6211     return true;
6212 }
6213 
6214 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6215 {
6216     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6217         return false;
6218     }
6219     gen_msr_banked(s, a->r, a->sysm, a->rn);
6220     return true;
6221 }
6222 
6223 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6224 {
6225     TCGv_i32 tmp;
6226 
6227     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6228         return false;
6229     }
6230     if (a->r) {
6231         if (IS_USER(s)) {
6232             unallocated_encoding(s);
6233             return true;
6234         }
6235         tmp = load_cpu_field(spsr);
6236     } else {
6237         tmp = tcg_temp_new_i32();
6238         gen_helper_cpsr_read(tmp, tcg_env);
6239     }
6240     store_reg(s, a->rd, tmp);
6241     return true;
6242 }
6243 
6244 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6245 {
6246     TCGv_i32 tmp;
6247     uint32_t mask = msr_mask(s, a->mask, a->r);
6248 
6249     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6250         return false;
6251     }
6252     tmp = load_reg(s, a->rn);
6253     if (gen_set_psr(s, mask, a->r, tmp)) {
6254         unallocated_encoding(s);
6255     }
6256     return true;
6257 }
6258 
6259 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6260 {
6261     TCGv_i32 tmp;
6262 
6263     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6264         return false;
6265     }
6266     tmp = tcg_temp_new_i32();
6267     gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm));
6268     store_reg(s, a->rd, tmp);
6269     return true;
6270 }
6271 
6272 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6273 {
6274     TCGv_i32 addr, reg;
6275 
6276     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6277         return false;
6278     }
6279     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6280     reg = load_reg(s, a->rn);
6281     gen_helper_v7m_msr(tcg_env, addr, reg);
6282     /* If we wrote to CONTROL, the EL might have changed */
6283     gen_rebuild_hflags(s, true);
6284     gen_lookup_tb(s);
6285     return true;
6286 }
6287 
6288 static bool trans_BX(DisasContext *s, arg_BX *a)
6289 {
6290     if (!ENABLE_ARCH_4T) {
6291         return false;
6292     }
6293     gen_bx_excret(s, load_reg(s, a->rm));
6294     return true;
6295 }
6296 
6297 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6298 {
6299     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6300         return false;
6301     }
6302     /*
6303      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6304      * TBFLAGS bit on a basically-never-happens case, so call a helper
6305      * function to check for the trap and raise the exception if needed
6306      * (passing it the register number for the syndrome value).
6307      * v8A doesn't have this HSTR bit.
6308      */
6309     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6310         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6311         s->current_el < 2 && s->ns) {
6312         gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm));
6313     }
6314     /* Trivial implementation equivalent to bx.  */
6315     gen_bx(s, load_reg(s, a->rm));
6316     return true;
6317 }
6318 
6319 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6320 {
6321     TCGv_i32 tmp;
6322 
6323     if (!ENABLE_ARCH_5) {
6324         return false;
6325     }
6326     tmp = load_reg(s, a->rm);
6327     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6328     gen_bx(s, tmp);
6329     return true;
6330 }
6331 
6332 /*
6333  * BXNS/BLXNS: only exist for v8M with the security extensions,
6334  * and always UNDEF if NonSecure.  We don't implement these in
6335  * the user-only mode either (in theory you can use them from
6336  * Secure User mode but they are too tied in to system emulation).
6337  */
6338 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6339 {
6340     if (!s->v8m_secure || IS_USER_ONLY) {
6341         unallocated_encoding(s);
6342     } else {
6343         gen_bxns(s, a->rm);
6344     }
6345     return true;
6346 }
6347 
6348 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6349 {
6350     if (!s->v8m_secure || IS_USER_ONLY) {
6351         unallocated_encoding(s);
6352     } else {
6353         gen_blxns(s, a->rm);
6354     }
6355     return true;
6356 }
6357 
6358 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6359 {
6360     TCGv_i32 tmp;
6361 
6362     if (!ENABLE_ARCH_5) {
6363         return false;
6364     }
6365     tmp = load_reg(s, a->rm);
6366     tcg_gen_clzi_i32(tmp, tmp, 32);
6367     store_reg(s, a->rd, tmp);
6368     return true;
6369 }
6370 
6371 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6372 {
6373     TCGv_i32 tmp;
6374 
6375     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6376         return false;
6377     }
6378     if (IS_USER(s)) {
6379         unallocated_encoding(s);
6380         return true;
6381     }
6382     if (s->current_el == 2) {
6383         /* ERET from Hyp uses ELR_Hyp, not LR */
6384         tmp = load_cpu_field_low32(elr_el[2]);
6385     } else {
6386         tmp = load_reg(s, 14);
6387     }
6388     gen_exception_return(s, tmp);
6389     return true;
6390 }
6391 
6392 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6393 {
6394     gen_hlt(s, a->imm);
6395     return true;
6396 }
6397 
6398 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6399 {
6400     if (!ENABLE_ARCH_5) {
6401         return false;
6402     }
6403     /* BKPT is OK with ECI set and leaves it untouched */
6404     s->eci_handled = true;
6405     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6406         semihosting_enabled(s->current_el == 0) &&
6407         (a->imm == 0xab)) {
6408         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6409     } else {
6410         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6411     }
6412     return true;
6413 }
6414 
6415 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6416 {
6417     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6418         return false;
6419     }
6420     if (IS_USER(s)) {
6421         unallocated_encoding(s);
6422     } else {
6423         gen_hvc(s, a->imm);
6424     }
6425     return true;
6426 }
6427 
6428 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6429 {
6430     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6431         return false;
6432     }
6433     if (IS_USER(s)) {
6434         unallocated_encoding(s);
6435     } else {
6436         gen_smc(s);
6437     }
6438     return true;
6439 }
6440 
6441 static bool trans_SG(DisasContext *s, arg_SG *a)
6442 {
6443     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6444         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6445         return false;
6446     }
6447     /*
6448      * SG (v8M only)
6449      * The bulk of the behaviour for this instruction is implemented
6450      * in v7m_handle_execute_nsc(), which deals with the insn when
6451      * it is executed by a CPU in non-secure state from memory
6452      * which is Secure & NonSecure-Callable.
6453      * Here we only need to handle the remaining cases:
6454      *  * in NS memory (including the "security extension not
6455      *    implemented" case) : NOP
6456      *  * in S memory but CPU already secure (clear IT bits)
6457      * We know that the attribute for the memory this insn is
6458      * in must match the current CPU state, because otherwise
6459      * get_phys_addr_pmsav8 would have generated an exception.
6460      */
6461     if (s->v8m_secure) {
6462         /* Like the IT insn, we don't need to generate any code */
6463         s->condexec_cond = 0;
6464         s->condexec_mask = 0;
6465     }
6466     return true;
6467 }
6468 
6469 static bool trans_TT(DisasContext *s, arg_TT *a)
6470 {
6471     TCGv_i32 addr, tmp;
6472 
6473     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6474         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6475         return false;
6476     }
6477     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6478         /* We UNDEF for these UNPREDICTABLE cases */
6479         unallocated_encoding(s);
6480         return true;
6481     }
6482     if (a->A && !s->v8m_secure) {
6483         /* This case is UNDEFINED.  */
6484         unallocated_encoding(s);
6485         return true;
6486     }
6487 
6488     addr = load_reg(s, a->rn);
6489     tmp = tcg_temp_new_i32();
6490     gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6491     store_reg(s, a->rd, tmp);
6492     return true;
6493 }
6494 
6495 /*
6496  * Load/store register index
6497  */
6498 
6499 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6500 {
6501     ISSInfo ret;
6502 
6503     /* ISS not valid if writeback */
6504     if (p && !w) {
6505         ret = rd;
6506         if (curr_insn_len(s) == 2) {
6507             ret |= ISSIs16Bit;
6508         }
6509     } else {
6510         ret = ISSInvalid;
6511     }
6512     return ret;
6513 }
6514 
6515 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6516 {
6517     TCGv_i32 addr = load_reg(s, a->rn);
6518 
6519     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6520         gen_helper_v8m_stackcheck(tcg_env, addr);
6521     }
6522 
6523     if (a->p) {
6524         TCGv_i32 ofs = load_reg(s, a->rm);
6525         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6526         if (a->u) {
6527             tcg_gen_add_i32(addr, addr, ofs);
6528         } else {
6529             tcg_gen_sub_i32(addr, addr, ofs);
6530         }
6531     }
6532     return addr;
6533 }
6534 
6535 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6536                             TCGv_i32 addr, int address_offset)
6537 {
6538     if (!a->p) {
6539         TCGv_i32 ofs = load_reg(s, a->rm);
6540         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6541         if (a->u) {
6542             tcg_gen_add_i32(addr, addr, ofs);
6543         } else {
6544             tcg_gen_sub_i32(addr, addr, ofs);
6545         }
6546     } else if (!a->w) {
6547         return;
6548     }
6549     tcg_gen_addi_i32(addr, addr, address_offset);
6550     store_reg(s, a->rn, addr);
6551 }
6552 
6553 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6554                        MemOp mop, int mem_idx)
6555 {
6556     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6557     TCGv_i32 addr, tmp;
6558 
6559     addr = op_addr_rr_pre(s, a);
6560 
6561     tmp = tcg_temp_new_i32();
6562     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6563     disas_set_da_iss(s, mop, issinfo);
6564 
6565     /*
6566      * Perform base writeback before the loaded value to
6567      * ensure correct behavior with overlapping index registers.
6568      */
6569     op_addr_rr_post(s, a, addr, 0);
6570     store_reg_from_load(s, a->rt, tmp);
6571     return true;
6572 }
6573 
6574 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6575                         MemOp mop, int mem_idx)
6576 {
6577     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6578     TCGv_i32 addr, tmp;
6579 
6580     /*
6581      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6582      * is either UNPREDICTABLE or has defined behaviour
6583      */
6584     if (s->thumb && a->rn == 15) {
6585         return false;
6586     }
6587 
6588     addr = op_addr_rr_pre(s, a);
6589 
6590     tmp = load_reg(s, a->rt);
6591     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6592     disas_set_da_iss(s, mop, issinfo);
6593 
6594     op_addr_rr_post(s, a, addr, 0);
6595     return true;
6596 }
6597 
6598 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6599 {
6600     int mem_idx = get_mem_index(s);
6601     TCGv_i32 addr, tmp;
6602 
6603     if (!ENABLE_ARCH_5TE) {
6604         return false;
6605     }
6606     if (a->rt & 1) {
6607         unallocated_encoding(s);
6608         return true;
6609     }
6610     addr = op_addr_rr_pre(s, a);
6611 
6612     tmp = tcg_temp_new_i32();
6613     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6614     store_reg(s, a->rt, tmp);
6615 
6616     tcg_gen_addi_i32(addr, addr, 4);
6617 
6618     tmp = tcg_temp_new_i32();
6619     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6620     store_reg(s, a->rt + 1, tmp);
6621 
6622     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6623     op_addr_rr_post(s, a, addr, -4);
6624     return true;
6625 }
6626 
6627 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6628 {
6629     int mem_idx = get_mem_index(s);
6630     TCGv_i32 addr, tmp;
6631 
6632     if (!ENABLE_ARCH_5TE) {
6633         return false;
6634     }
6635     if (a->rt & 1) {
6636         unallocated_encoding(s);
6637         return true;
6638     }
6639     addr = op_addr_rr_pre(s, a);
6640 
6641     tmp = load_reg(s, a->rt);
6642     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6643 
6644     tcg_gen_addi_i32(addr, addr, 4);
6645 
6646     tmp = load_reg(s, a->rt + 1);
6647     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6648 
6649     op_addr_rr_post(s, a, addr, -4);
6650     return true;
6651 }
6652 
6653 /*
6654  * Load/store immediate index
6655  */
6656 
6657 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6658 {
6659     int ofs = a->imm;
6660 
6661     if (!a->u) {
6662         ofs = -ofs;
6663     }
6664 
6665     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6666         /*
6667          * Stackcheck. Here we know 'addr' is the current SP;
6668          * U is set if we're moving SP up, else down. It is
6669          * UNKNOWN whether the limit check triggers when SP starts
6670          * below the limit and ends up above it; we chose to do so.
6671          */
6672         if (!a->u) {
6673             TCGv_i32 newsp = tcg_temp_new_i32();
6674             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6675             gen_helper_v8m_stackcheck(tcg_env, newsp);
6676         } else {
6677             gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]);
6678         }
6679     }
6680 
6681     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6682 }
6683 
6684 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6685                             TCGv_i32 addr, int address_offset)
6686 {
6687     if (!a->p) {
6688         if (a->u) {
6689             address_offset += a->imm;
6690         } else {
6691             address_offset -= a->imm;
6692         }
6693     } else if (!a->w) {
6694         return;
6695     }
6696     tcg_gen_addi_i32(addr, addr, address_offset);
6697     store_reg(s, a->rn, addr);
6698 }
6699 
6700 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6701                        MemOp mop, int mem_idx)
6702 {
6703     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6704     TCGv_i32 addr, tmp;
6705 
6706     addr = op_addr_ri_pre(s, a);
6707 
6708     tmp = tcg_temp_new_i32();
6709     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6710     disas_set_da_iss(s, mop, issinfo);
6711 
6712     /*
6713      * Perform base writeback before the loaded value to
6714      * ensure correct behavior with overlapping index registers.
6715      */
6716     op_addr_ri_post(s, a, addr, 0);
6717     store_reg_from_load(s, a->rt, tmp);
6718     return true;
6719 }
6720 
6721 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6722                         MemOp mop, int mem_idx)
6723 {
6724     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6725     TCGv_i32 addr, tmp;
6726 
6727     /*
6728      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6729      * is either UNPREDICTABLE or has defined behaviour
6730      */
6731     if (s->thumb && a->rn == 15) {
6732         return false;
6733     }
6734 
6735     addr = op_addr_ri_pre(s, a);
6736 
6737     tmp = load_reg(s, a->rt);
6738     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6739     disas_set_da_iss(s, mop, issinfo);
6740 
6741     op_addr_ri_post(s, a, addr, 0);
6742     return true;
6743 }
6744 
6745 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6746 {
6747     int mem_idx = get_mem_index(s);
6748     TCGv_i32 addr, tmp;
6749 
6750     addr = op_addr_ri_pre(s, a);
6751 
6752     tmp = tcg_temp_new_i32();
6753     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6754     store_reg(s, a->rt, tmp);
6755 
6756     tcg_gen_addi_i32(addr, addr, 4);
6757 
6758     tmp = tcg_temp_new_i32();
6759     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6760     store_reg(s, rt2, tmp);
6761 
6762     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6763     op_addr_ri_post(s, a, addr, -4);
6764     return true;
6765 }
6766 
6767 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6768 {
6769     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6770         return false;
6771     }
6772     return op_ldrd_ri(s, a, a->rt + 1);
6773 }
6774 
6775 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6776 {
6777     arg_ldst_ri b = {
6778         .u = a->u, .w = a->w, .p = a->p,
6779         .rn = a->rn, .rt = a->rt, .imm = a->imm
6780     };
6781     return op_ldrd_ri(s, &b, a->rt2);
6782 }
6783 
6784 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6785 {
6786     int mem_idx = get_mem_index(s);
6787     TCGv_i32 addr, tmp;
6788 
6789     addr = op_addr_ri_pre(s, a);
6790 
6791     tmp = load_reg(s, a->rt);
6792     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6793 
6794     tcg_gen_addi_i32(addr, addr, 4);
6795 
6796     tmp = load_reg(s, rt2);
6797     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6798 
6799     op_addr_ri_post(s, a, addr, -4);
6800     return true;
6801 }
6802 
6803 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6804 {
6805     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6806         return false;
6807     }
6808     return op_strd_ri(s, a, a->rt + 1);
6809 }
6810 
6811 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6812 {
6813     arg_ldst_ri b = {
6814         .u = a->u, .w = a->w, .p = a->p,
6815         .rn = a->rn, .rt = a->rt, .imm = a->imm
6816     };
6817     return op_strd_ri(s, &b, a->rt2);
6818 }
6819 
6820 #define DO_LDST(NAME, WHICH, MEMOP) \
6821 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6822 {                                                                     \
6823     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6824 }                                                                     \
6825 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6826 {                                                                     \
6827     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6828 }                                                                     \
6829 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6830 {                                                                     \
6831     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6832 }                                                                     \
6833 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6834 {                                                                     \
6835     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6836 }
6837 
6838 DO_LDST(LDR, load, MO_UL)
6839 DO_LDST(LDRB, load, MO_UB)
6840 DO_LDST(LDRH, load, MO_UW)
6841 DO_LDST(LDRSB, load, MO_SB)
6842 DO_LDST(LDRSH, load, MO_SW)
6843 
6844 DO_LDST(STR, store, MO_UL)
6845 DO_LDST(STRB, store, MO_UB)
6846 DO_LDST(STRH, store, MO_UW)
6847 
6848 #undef DO_LDST
6849 
6850 /*
6851  * Synchronization primitives
6852  */
6853 
6854 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6855 {
6856     TCGv_i32 addr, tmp;
6857     TCGv taddr;
6858 
6859     opc |= s->be_data;
6860     addr = load_reg(s, a->rn);
6861     taddr = gen_aa32_addr(s, addr, opc);
6862 
6863     tmp = load_reg(s, a->rt2);
6864     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6865 
6866     store_reg(s, a->rt, tmp);
6867     return true;
6868 }
6869 
6870 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6871 {
6872     return op_swp(s, a, MO_UL | MO_ALIGN);
6873 }
6874 
6875 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6876 {
6877     return op_swp(s, a, MO_UB);
6878 }
6879 
6880 /*
6881  * Load/Store Exclusive and Load-Acquire/Store-Release
6882  */
6883 
6884 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6885 {
6886     TCGv_i32 addr;
6887     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6888     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6889 
6890     /* We UNDEF for these UNPREDICTABLE cases.  */
6891     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6892         || a->rd == a->rn || a->rd == a->rt
6893         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6894         || (mop == MO_64
6895             && (a->rt2 == 15
6896                 || a->rd == a->rt2
6897                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6898         unallocated_encoding(s);
6899         return true;
6900     }
6901 
6902     if (rel) {
6903         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6904     }
6905 
6906     addr = tcg_temp_new_i32();
6907     load_reg_var(s, addr, a->rn);
6908     tcg_gen_addi_i32(addr, addr, a->imm);
6909 
6910     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6911     return true;
6912 }
6913 
6914 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6915 {
6916     if (!ENABLE_ARCH_6) {
6917         return false;
6918     }
6919     return op_strex(s, a, MO_32, false);
6920 }
6921 
6922 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6923 {
6924     if (!ENABLE_ARCH_6K) {
6925         return false;
6926     }
6927     /* We UNDEF for these UNPREDICTABLE cases.  */
6928     if (a->rt & 1) {
6929         unallocated_encoding(s);
6930         return true;
6931     }
6932     a->rt2 = a->rt + 1;
6933     return op_strex(s, a, MO_64, false);
6934 }
6935 
6936 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6937 {
6938     return op_strex(s, a, MO_64, false);
6939 }
6940 
6941 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6942 {
6943     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6944         return false;
6945     }
6946     return op_strex(s, a, MO_8, false);
6947 }
6948 
6949 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6950 {
6951     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6952         return false;
6953     }
6954     return op_strex(s, a, MO_16, false);
6955 }
6956 
6957 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6958 {
6959     if (!ENABLE_ARCH_8) {
6960         return false;
6961     }
6962     return op_strex(s, a, MO_32, true);
6963 }
6964 
6965 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6966 {
6967     if (!ENABLE_ARCH_8) {
6968         return false;
6969     }
6970     /* We UNDEF for these UNPREDICTABLE cases.  */
6971     if (a->rt & 1) {
6972         unallocated_encoding(s);
6973         return true;
6974     }
6975     a->rt2 = a->rt + 1;
6976     return op_strex(s, a, MO_64, true);
6977 }
6978 
6979 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6980 {
6981     if (!ENABLE_ARCH_8) {
6982         return false;
6983     }
6984     return op_strex(s, a, MO_64, true);
6985 }
6986 
6987 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6988 {
6989     if (!ENABLE_ARCH_8) {
6990         return false;
6991     }
6992     return op_strex(s, a, MO_8, true);
6993 }
6994 
6995 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6996 {
6997     if (!ENABLE_ARCH_8) {
6998         return false;
6999     }
7000     return op_strex(s, a, MO_16, true);
7001 }
7002 
7003 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7004 {
7005     TCGv_i32 addr, tmp;
7006 
7007     if (!ENABLE_ARCH_8) {
7008         return false;
7009     }
7010     /* We UNDEF for these UNPREDICTABLE cases.  */
7011     if (a->rn == 15 || a->rt == 15) {
7012         unallocated_encoding(s);
7013         return true;
7014     }
7015 
7016     addr = load_reg(s, a->rn);
7017     tmp = load_reg(s, a->rt);
7018     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7019     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7020     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7021 
7022     return true;
7023 }
7024 
7025 static bool trans_STL(DisasContext *s, arg_STL *a)
7026 {
7027     return op_stl(s, a, MO_UL);
7028 }
7029 
7030 static bool trans_STLB(DisasContext *s, arg_STL *a)
7031 {
7032     return op_stl(s, a, MO_UB);
7033 }
7034 
7035 static bool trans_STLH(DisasContext *s, arg_STL *a)
7036 {
7037     return op_stl(s, a, MO_UW);
7038 }
7039 
7040 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7041 {
7042     TCGv_i32 addr;
7043     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7044     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7045 
7046     /* We UNDEF for these UNPREDICTABLE cases.  */
7047     if (a->rn == 15 || a->rt == 15
7048         || (!v8a && s->thumb && a->rt == 13)
7049         || (mop == MO_64
7050             && (a->rt2 == 15 || a->rt == a->rt2
7051                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7052         unallocated_encoding(s);
7053         return true;
7054     }
7055 
7056     addr = tcg_temp_new_i32();
7057     load_reg_var(s, addr, a->rn);
7058     tcg_gen_addi_i32(addr, addr, a->imm);
7059 
7060     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7061 
7062     if (acq) {
7063         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7064     }
7065     return true;
7066 }
7067 
7068 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7069 {
7070     if (!ENABLE_ARCH_6) {
7071         return false;
7072     }
7073     return op_ldrex(s, a, MO_32, false);
7074 }
7075 
7076 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7077 {
7078     if (!ENABLE_ARCH_6K) {
7079         return false;
7080     }
7081     /* We UNDEF for these UNPREDICTABLE cases.  */
7082     if (a->rt & 1) {
7083         unallocated_encoding(s);
7084         return true;
7085     }
7086     a->rt2 = a->rt + 1;
7087     return op_ldrex(s, a, MO_64, false);
7088 }
7089 
7090 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7091 {
7092     return op_ldrex(s, a, MO_64, false);
7093 }
7094 
7095 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7096 {
7097     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7098         return false;
7099     }
7100     return op_ldrex(s, a, MO_8, false);
7101 }
7102 
7103 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7104 {
7105     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7106         return false;
7107     }
7108     return op_ldrex(s, a, MO_16, false);
7109 }
7110 
7111 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7112 {
7113     if (!ENABLE_ARCH_8) {
7114         return false;
7115     }
7116     return op_ldrex(s, a, MO_32, true);
7117 }
7118 
7119 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7120 {
7121     if (!ENABLE_ARCH_8) {
7122         return false;
7123     }
7124     /* We UNDEF for these UNPREDICTABLE cases.  */
7125     if (a->rt & 1) {
7126         unallocated_encoding(s);
7127         return true;
7128     }
7129     a->rt2 = a->rt + 1;
7130     return op_ldrex(s, a, MO_64, true);
7131 }
7132 
7133 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7134 {
7135     if (!ENABLE_ARCH_8) {
7136         return false;
7137     }
7138     return op_ldrex(s, a, MO_64, true);
7139 }
7140 
7141 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7142 {
7143     if (!ENABLE_ARCH_8) {
7144         return false;
7145     }
7146     return op_ldrex(s, a, MO_8, true);
7147 }
7148 
7149 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7150 {
7151     if (!ENABLE_ARCH_8) {
7152         return false;
7153     }
7154     return op_ldrex(s, a, MO_16, true);
7155 }
7156 
7157 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7158 {
7159     TCGv_i32 addr, tmp;
7160 
7161     if (!ENABLE_ARCH_8) {
7162         return false;
7163     }
7164     /* We UNDEF for these UNPREDICTABLE cases.  */
7165     if (a->rn == 15 || a->rt == 15) {
7166         unallocated_encoding(s);
7167         return true;
7168     }
7169 
7170     addr = load_reg(s, a->rn);
7171     tmp = tcg_temp_new_i32();
7172     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7173     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7174 
7175     store_reg(s, a->rt, tmp);
7176     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7177     return true;
7178 }
7179 
7180 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7181 {
7182     return op_lda(s, a, MO_UL);
7183 }
7184 
7185 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7186 {
7187     return op_lda(s, a, MO_UB);
7188 }
7189 
7190 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7191 {
7192     return op_lda(s, a, MO_UW);
7193 }
7194 
7195 /*
7196  * Media instructions
7197  */
7198 
7199 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7200 {
7201     TCGv_i32 t1, t2;
7202 
7203     if (!ENABLE_ARCH_6) {
7204         return false;
7205     }
7206 
7207     t1 = load_reg(s, a->rn);
7208     t2 = load_reg(s, a->rm);
7209     gen_helper_usad8(t1, t1, t2);
7210     if (a->ra != 15) {
7211         t2 = load_reg(s, a->ra);
7212         tcg_gen_add_i32(t1, t1, t2);
7213     }
7214     store_reg(s, a->rd, t1);
7215     return true;
7216 }
7217 
7218 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7219 {
7220     TCGv_i32 tmp;
7221     int width = a->widthm1 + 1;
7222     int shift = a->lsb;
7223 
7224     if (!ENABLE_ARCH_6T2) {
7225         return false;
7226     }
7227     if (shift + width > 32) {
7228         /* UNPREDICTABLE; we choose to UNDEF */
7229         unallocated_encoding(s);
7230         return true;
7231     }
7232 
7233     tmp = load_reg(s, a->rn);
7234     if (u) {
7235         tcg_gen_extract_i32(tmp, tmp, shift, width);
7236     } else {
7237         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7238     }
7239     store_reg(s, a->rd, tmp);
7240     return true;
7241 }
7242 
7243 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7244 {
7245     return op_bfx(s, a, false);
7246 }
7247 
7248 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7249 {
7250     return op_bfx(s, a, true);
7251 }
7252 
7253 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7254 {
7255     int msb = a->msb, lsb = a->lsb;
7256     TCGv_i32 t_in, t_rd;
7257     int width;
7258 
7259     if (!ENABLE_ARCH_6T2) {
7260         return false;
7261     }
7262     if (msb < lsb) {
7263         /* UNPREDICTABLE; we choose to UNDEF */
7264         unallocated_encoding(s);
7265         return true;
7266     }
7267 
7268     width = msb + 1 - lsb;
7269     if (a->rn == 15) {
7270         /* BFC */
7271         t_in = tcg_constant_i32(0);
7272     } else {
7273         /* BFI */
7274         t_in = load_reg(s, a->rn);
7275     }
7276     t_rd = load_reg(s, a->rd);
7277     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7278     store_reg(s, a->rd, t_rd);
7279     return true;
7280 }
7281 
7282 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7283 {
7284     unallocated_encoding(s);
7285     return true;
7286 }
7287 
7288 /*
7289  * Parallel addition and subtraction
7290  */
7291 
7292 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7293                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7294 {
7295     TCGv_i32 t0, t1;
7296 
7297     if (s->thumb
7298         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7299         : !ENABLE_ARCH_6) {
7300         return false;
7301     }
7302 
7303     t0 = load_reg(s, a->rn);
7304     t1 = load_reg(s, a->rm);
7305 
7306     gen(t0, t0, t1);
7307 
7308     store_reg(s, a->rd, t0);
7309     return true;
7310 }
7311 
7312 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7313                              void (*gen)(TCGv_i32, TCGv_i32,
7314                                          TCGv_i32, TCGv_ptr))
7315 {
7316     TCGv_i32 t0, t1;
7317     TCGv_ptr ge;
7318 
7319     if (s->thumb
7320         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7321         : !ENABLE_ARCH_6) {
7322         return false;
7323     }
7324 
7325     t0 = load_reg(s, a->rn);
7326     t1 = load_reg(s, a->rm);
7327 
7328     ge = tcg_temp_new_ptr();
7329     tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE));
7330     gen(t0, t0, t1, ge);
7331 
7332     store_reg(s, a->rd, t0);
7333     return true;
7334 }
7335 
7336 #define DO_PAR_ADDSUB(NAME, helper) \
7337 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7338 {                                                       \
7339     return op_par_addsub(s, a, helper);                 \
7340 }
7341 
7342 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7343 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7344 {                                                       \
7345     return op_par_addsub_ge(s, a, helper);              \
7346 }
7347 
7348 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7349 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7350 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7351 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7352 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7353 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7354 
7355 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7356 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7357 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7358 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7359 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7360 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7361 
7362 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7363 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7364 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7365 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7366 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7367 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7368 
7369 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7370 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7371 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7372 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7373 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7374 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7375 
7376 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7377 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7378 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7379 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7380 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7381 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7382 
7383 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7384 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7385 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7386 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7387 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7388 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7389 
7390 #undef DO_PAR_ADDSUB
7391 #undef DO_PAR_ADDSUB_GE
7392 
7393 /*
7394  * Packing, unpacking, saturation, and reversal
7395  */
7396 
7397 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7398 {
7399     TCGv_i32 tn, tm;
7400     int shift = a->imm;
7401 
7402     if (s->thumb
7403         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7404         : !ENABLE_ARCH_6) {
7405         return false;
7406     }
7407 
7408     tn = load_reg(s, a->rn);
7409     tm = load_reg(s, a->rm);
7410     if (a->tb) {
7411         /* PKHTB */
7412         if (shift == 0) {
7413             shift = 31;
7414         }
7415         tcg_gen_sari_i32(tm, tm, shift);
7416         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7417     } else {
7418         /* PKHBT */
7419         tcg_gen_shli_i32(tm, tm, shift);
7420         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7421     }
7422     store_reg(s, a->rd, tn);
7423     return true;
7424 }
7425 
7426 static bool op_sat(DisasContext *s, arg_sat *a,
7427                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7428 {
7429     TCGv_i32 tmp;
7430     int shift = a->imm;
7431 
7432     if (!ENABLE_ARCH_6) {
7433         return false;
7434     }
7435 
7436     tmp = load_reg(s, a->rn);
7437     if (a->sh) {
7438         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7439     } else {
7440         tcg_gen_shli_i32(tmp, tmp, shift);
7441     }
7442 
7443     gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm));
7444 
7445     store_reg(s, a->rd, tmp);
7446     return true;
7447 }
7448 
7449 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7450 {
7451     return op_sat(s, a, gen_helper_ssat);
7452 }
7453 
7454 static bool trans_USAT(DisasContext *s, arg_sat *a)
7455 {
7456     return op_sat(s, a, gen_helper_usat);
7457 }
7458 
7459 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7460 {
7461     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7462         return false;
7463     }
7464     return op_sat(s, a, gen_helper_ssat16);
7465 }
7466 
7467 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7468 {
7469     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7470         return false;
7471     }
7472     return op_sat(s, a, gen_helper_usat16);
7473 }
7474 
7475 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7476                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7477                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7478 {
7479     TCGv_i32 tmp;
7480 
7481     if (!ENABLE_ARCH_6) {
7482         return false;
7483     }
7484 
7485     tmp = load_reg(s, a->rm);
7486     /*
7487      * TODO: In many cases we could do a shift instead of a rotate.
7488      * Combined with a simple extend, that becomes an extract.
7489      */
7490     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7491     gen_extract(tmp, tmp);
7492 
7493     if (a->rn != 15) {
7494         TCGv_i32 tmp2 = load_reg(s, a->rn);
7495         gen_add(tmp, tmp, tmp2);
7496     }
7497     store_reg(s, a->rd, tmp);
7498     return true;
7499 }
7500 
7501 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7502 {
7503     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7504 }
7505 
7506 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7507 {
7508     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7509 }
7510 
7511 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7512 {
7513     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7514         return false;
7515     }
7516     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7517 }
7518 
7519 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7520 {
7521     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7522 }
7523 
7524 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7525 {
7526     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7527 }
7528 
7529 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7530 {
7531     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7532         return false;
7533     }
7534     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7535 }
7536 
7537 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7538 {
7539     TCGv_i32 t1, t2, t3;
7540 
7541     if (s->thumb
7542         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7543         : !ENABLE_ARCH_6) {
7544         return false;
7545     }
7546 
7547     t1 = load_reg(s, a->rn);
7548     t2 = load_reg(s, a->rm);
7549     t3 = tcg_temp_new_i32();
7550     tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE));
7551     gen_helper_sel_flags(t1, t3, t1, t2);
7552     store_reg(s, a->rd, t1);
7553     return true;
7554 }
7555 
7556 static bool op_rr(DisasContext *s, arg_rr *a,
7557                   void (*gen)(TCGv_i32, TCGv_i32))
7558 {
7559     TCGv_i32 tmp;
7560 
7561     tmp = load_reg(s, a->rm);
7562     gen(tmp, tmp);
7563     store_reg(s, a->rd, tmp);
7564     return true;
7565 }
7566 
7567 static bool trans_REV(DisasContext *s, arg_rr *a)
7568 {
7569     if (!ENABLE_ARCH_6) {
7570         return false;
7571     }
7572     return op_rr(s, a, tcg_gen_bswap32_i32);
7573 }
7574 
7575 static bool trans_REV16(DisasContext *s, arg_rr *a)
7576 {
7577     if (!ENABLE_ARCH_6) {
7578         return false;
7579     }
7580     return op_rr(s, a, gen_rev16);
7581 }
7582 
7583 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7584 {
7585     if (!ENABLE_ARCH_6) {
7586         return false;
7587     }
7588     return op_rr(s, a, gen_revsh);
7589 }
7590 
7591 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7592 {
7593     if (!ENABLE_ARCH_6T2) {
7594         return false;
7595     }
7596     return op_rr(s, a, gen_helper_rbit);
7597 }
7598 
7599 /*
7600  * Signed multiply, signed and unsigned divide
7601  */
7602 
7603 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7604 {
7605     TCGv_i32 t1, t2;
7606 
7607     if (!ENABLE_ARCH_6) {
7608         return false;
7609     }
7610 
7611     t1 = load_reg(s, a->rn);
7612     t2 = load_reg(s, a->rm);
7613     if (m_swap) {
7614         gen_swap_half(t2, t2);
7615     }
7616     gen_smul_dual(t1, t2);
7617 
7618     if (sub) {
7619         /*
7620          * This subtraction cannot overflow, so we can do a simple
7621          * 32-bit subtraction and then a possible 32-bit saturating
7622          * addition of Ra.
7623          */
7624         tcg_gen_sub_i32(t1, t1, t2);
7625 
7626         if (a->ra != 15) {
7627             t2 = load_reg(s, a->ra);
7628             gen_helper_add_setq(t1, tcg_env, t1, t2);
7629         }
7630     } else if (a->ra == 15) {
7631         /* Single saturation-checking addition */
7632         gen_helper_add_setq(t1, tcg_env, t1, t2);
7633     } else {
7634         /*
7635          * We need to add the products and Ra together and then
7636          * determine whether the final result overflowed. Doing
7637          * this as two separate add-and-check-overflow steps incorrectly
7638          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7639          * Do all the arithmetic at 64-bits and then check for overflow.
7640          */
7641         TCGv_i64 p64, q64;
7642         TCGv_i32 t3, qf, one;
7643 
7644         p64 = tcg_temp_new_i64();
7645         q64 = tcg_temp_new_i64();
7646         tcg_gen_ext_i32_i64(p64, t1);
7647         tcg_gen_ext_i32_i64(q64, t2);
7648         tcg_gen_add_i64(p64, p64, q64);
7649         load_reg_var(s, t2, a->ra);
7650         tcg_gen_ext_i32_i64(q64, t2);
7651         tcg_gen_add_i64(p64, p64, q64);
7652 
7653         tcg_gen_extr_i64_i32(t1, t2, p64);
7654         /*
7655          * t1 is the low half of the result which goes into Rd.
7656          * We have overflow and must set Q if the high half (t2)
7657          * is different from the sign-extension of t1.
7658          */
7659         t3 = tcg_temp_new_i32();
7660         tcg_gen_sari_i32(t3, t1, 31);
7661         qf = load_cpu_field(QF);
7662         one = tcg_constant_i32(1);
7663         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7664         store_cpu_field(qf, QF);
7665     }
7666     store_reg(s, a->rd, t1);
7667     return true;
7668 }
7669 
7670 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7671 {
7672     return op_smlad(s, a, false, false);
7673 }
7674 
7675 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7676 {
7677     return op_smlad(s, a, true, false);
7678 }
7679 
7680 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7681 {
7682     return op_smlad(s, a, false, true);
7683 }
7684 
7685 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7686 {
7687     return op_smlad(s, a, true, true);
7688 }
7689 
7690 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7691 {
7692     TCGv_i32 t1, t2;
7693     TCGv_i64 l1, l2;
7694 
7695     if (!ENABLE_ARCH_6) {
7696         return false;
7697     }
7698 
7699     t1 = load_reg(s, a->rn);
7700     t2 = load_reg(s, a->rm);
7701     if (m_swap) {
7702         gen_swap_half(t2, t2);
7703     }
7704     gen_smul_dual(t1, t2);
7705 
7706     l1 = tcg_temp_new_i64();
7707     l2 = tcg_temp_new_i64();
7708     tcg_gen_ext_i32_i64(l1, t1);
7709     tcg_gen_ext_i32_i64(l2, t2);
7710 
7711     if (sub) {
7712         tcg_gen_sub_i64(l1, l1, l2);
7713     } else {
7714         tcg_gen_add_i64(l1, l1, l2);
7715     }
7716 
7717     gen_addq(s, l1, a->ra, a->rd);
7718     gen_storeq_reg(s, a->ra, a->rd, l1);
7719     return true;
7720 }
7721 
7722 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7723 {
7724     return op_smlald(s, a, false, false);
7725 }
7726 
7727 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7728 {
7729     return op_smlald(s, a, true, false);
7730 }
7731 
7732 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7733 {
7734     return op_smlald(s, a, false, true);
7735 }
7736 
7737 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7738 {
7739     return op_smlald(s, a, true, true);
7740 }
7741 
7742 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7743 {
7744     TCGv_i32 t1, t2;
7745 
7746     if (s->thumb
7747         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7748         : !ENABLE_ARCH_6) {
7749         return false;
7750     }
7751 
7752     t1 = load_reg(s, a->rn);
7753     t2 = load_reg(s, a->rm);
7754     tcg_gen_muls2_i32(t2, t1, t1, t2);
7755 
7756     if (a->ra != 15) {
7757         TCGv_i32 t3 = load_reg(s, a->ra);
7758         if (sub) {
7759             /*
7760              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7761              * a non-zero multiplicand lowpart, and the correct result
7762              * lowpart for rounding.
7763              */
7764             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7765         } else {
7766             tcg_gen_add_i32(t1, t1, t3);
7767         }
7768     }
7769     if (round) {
7770         /*
7771          * Adding 0x80000000 to the 64-bit quantity means that we have
7772          * carry in to the high word when the low word has the msb set.
7773          */
7774         tcg_gen_shri_i32(t2, t2, 31);
7775         tcg_gen_add_i32(t1, t1, t2);
7776     }
7777     store_reg(s, a->rd, t1);
7778     return true;
7779 }
7780 
7781 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7782 {
7783     return op_smmla(s, a, false, false);
7784 }
7785 
7786 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7787 {
7788     return op_smmla(s, a, true, false);
7789 }
7790 
7791 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7792 {
7793     return op_smmla(s, a, false, true);
7794 }
7795 
7796 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7797 {
7798     return op_smmla(s, a, true, true);
7799 }
7800 
7801 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7802 {
7803     TCGv_i32 t1, t2;
7804 
7805     if (s->thumb
7806         ? !dc_isar_feature(aa32_thumb_div, s)
7807         : !dc_isar_feature(aa32_arm_div, s)) {
7808         return false;
7809     }
7810 
7811     t1 = load_reg(s, a->rn);
7812     t2 = load_reg(s, a->rm);
7813     if (u) {
7814         gen_helper_udiv(t1, tcg_env, t1, t2);
7815     } else {
7816         gen_helper_sdiv(t1, tcg_env, t1, t2);
7817     }
7818     store_reg(s, a->rd, t1);
7819     return true;
7820 }
7821 
7822 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7823 {
7824     return op_div(s, a, false);
7825 }
7826 
7827 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7828 {
7829     return op_div(s, a, true);
7830 }
7831 
7832 /*
7833  * Block data transfer
7834  */
7835 
7836 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7837 {
7838     TCGv_i32 addr = load_reg(s, a->rn);
7839 
7840     if (a->b) {
7841         if (a->i) {
7842             /* pre increment */
7843             tcg_gen_addi_i32(addr, addr, 4);
7844         } else {
7845             /* pre decrement */
7846             tcg_gen_addi_i32(addr, addr, -(n * 4));
7847         }
7848     } else if (!a->i && n != 1) {
7849         /* post decrement */
7850         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7851     }
7852 
7853     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7854         /*
7855          * If the writeback is incrementing SP rather than
7856          * decrementing it, and the initial SP is below the
7857          * stack limit but the final written-back SP would
7858          * be above, then we must not perform any memory
7859          * accesses, but it is IMPDEF whether we generate
7860          * an exception. We choose to do so in this case.
7861          * At this point 'addr' is the lowest address, so
7862          * either the original SP (if incrementing) or our
7863          * final SP (if decrementing), so that's what we check.
7864          */
7865         gen_helper_v8m_stackcheck(tcg_env, addr);
7866     }
7867 
7868     return addr;
7869 }
7870 
7871 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7872                                TCGv_i32 addr, int n)
7873 {
7874     if (a->w) {
7875         /* write back */
7876         if (!a->b) {
7877             if (a->i) {
7878                 /* post increment */
7879                 tcg_gen_addi_i32(addr, addr, 4);
7880             } else {
7881                 /* post decrement */
7882                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7883             }
7884         } else if (!a->i && n != 1) {
7885             /* pre decrement */
7886             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7887         }
7888         store_reg(s, a->rn, addr);
7889     }
7890 }
7891 
7892 static bool op_stm(DisasContext *s, arg_ldst_block *a)
7893 {
7894     int i, j, n, list, mem_idx;
7895     bool user = a->u;
7896     TCGv_i32 addr, tmp;
7897 
7898     if (user) {
7899         /* STM (user) */
7900         if (IS_USER(s)) {
7901             /* Only usable in supervisor mode.  */
7902             unallocated_encoding(s);
7903             return true;
7904         }
7905     }
7906 
7907     list = a->list;
7908     n = ctpop16(list);
7909     /*
7910      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7911      * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE,
7912      * but hardware treats it like the A32 version and implements the
7913      * single-register-store, and some in-the-wild (buggy) software
7914      * assumes that, so we don't UNDEF on that case.
7915      */
7916     if (n < 1 || a->rn == 15) {
7917         unallocated_encoding(s);
7918         return true;
7919     }
7920 
7921     s->eci_handled = true;
7922 
7923     addr = op_addr_block_pre(s, a, n);
7924     mem_idx = get_mem_index(s);
7925 
7926     for (i = j = 0; i < 16; i++) {
7927         if (!(list & (1 << i))) {
7928             continue;
7929         }
7930 
7931         if (user && i != 15) {
7932             tmp = tcg_temp_new_i32();
7933             gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i));
7934         } else {
7935             tmp = load_reg(s, i);
7936         }
7937         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7938 
7939         /* No need to add after the last transfer.  */
7940         if (++j != n) {
7941             tcg_gen_addi_i32(addr, addr, 4);
7942         }
7943     }
7944 
7945     op_addr_block_post(s, a, addr, n);
7946     clear_eci_state(s);
7947     return true;
7948 }
7949 
7950 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7951 {
7952     return op_stm(s, a);
7953 }
7954 
7955 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7956 {
7957     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7958     if (a->w && (a->list & (1 << a->rn))) {
7959         unallocated_encoding(s);
7960         return true;
7961     }
7962     return op_stm(s, a);
7963 }
7964 
7965 static bool do_ldm(DisasContext *s, arg_ldst_block *a)
7966 {
7967     int i, j, n, list, mem_idx;
7968     bool loaded_base;
7969     bool user = a->u;
7970     bool exc_return = false;
7971     TCGv_i32 addr, tmp, loaded_var;
7972 
7973     if (user) {
7974         /* LDM (user), LDM (exception return) */
7975         if (IS_USER(s)) {
7976             /* Only usable in supervisor mode.  */
7977             unallocated_encoding(s);
7978             return true;
7979         }
7980         if (extract32(a->list, 15, 1)) {
7981             exc_return = true;
7982             user = false;
7983         } else {
7984             /* LDM (user) does not allow writeback.  */
7985             if (a->w) {
7986                 unallocated_encoding(s);
7987                 return true;
7988             }
7989         }
7990     }
7991 
7992     list = a->list;
7993     n = ctpop16(list);
7994     /*
7995      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7996      * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE,
7997      * but hardware treats it like the A32 version and implements the
7998      * single-register-load, and some in-the-wild (buggy) software
7999      * assumes that, so we don't UNDEF on that case.
8000      */
8001     if (n < 1 || a->rn == 15) {
8002         unallocated_encoding(s);
8003         return true;
8004     }
8005 
8006     s->eci_handled = true;
8007 
8008     addr = op_addr_block_pre(s, a, n);
8009     mem_idx = get_mem_index(s);
8010     loaded_base = false;
8011     loaded_var = NULL;
8012 
8013     for (i = j = 0; i < 16; i++) {
8014         if (!(list & (1 << i))) {
8015             continue;
8016         }
8017 
8018         tmp = tcg_temp_new_i32();
8019         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8020         if (user) {
8021             gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp);
8022         } else if (i == a->rn) {
8023             loaded_var = tmp;
8024             loaded_base = true;
8025         } else if (i == 15 && exc_return) {
8026             store_pc_exc_ret(s, tmp);
8027         } else {
8028             store_reg_from_load(s, i, tmp);
8029         }
8030 
8031         /* No need to add after the last transfer.  */
8032         if (++j != n) {
8033             tcg_gen_addi_i32(addr, addr, 4);
8034         }
8035     }
8036 
8037     op_addr_block_post(s, a, addr, n);
8038 
8039     if (loaded_base) {
8040         /* Note that we reject base == pc above.  */
8041         store_reg(s, a->rn, loaded_var);
8042     }
8043 
8044     if (exc_return) {
8045         /* Restore CPSR from SPSR.  */
8046         tmp = load_cpu_field(spsr);
8047         translator_io_start(&s->base);
8048         gen_helper_cpsr_write_eret(tcg_env, tmp);
8049         /* Must exit loop to check un-masked IRQs */
8050         s->base.is_jmp = DISAS_EXIT;
8051     }
8052     clear_eci_state(s);
8053     return true;
8054 }
8055 
8056 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8057 {
8058     /*
8059      * Writeback register in register list is UNPREDICTABLE
8060      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8061      * an UNKNOWN value to the base register.
8062      */
8063     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8064         unallocated_encoding(s);
8065         return true;
8066     }
8067     return do_ldm(s, a);
8068 }
8069 
8070 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8071 {
8072     /* Writeback register in register list is UNPREDICTABLE for T32. */
8073     if (a->w && (a->list & (1 << a->rn))) {
8074         unallocated_encoding(s);
8075         return true;
8076     }
8077     return do_ldm(s, a);
8078 }
8079 
8080 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8081 {
8082     /* Writeback is conditional on the base register not being loaded.  */
8083     a->w = !(a->list & (1 << a->rn));
8084     return do_ldm(s, a);
8085 }
8086 
8087 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8088 {
8089     int i;
8090     TCGv_i32 zero;
8091 
8092     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8093         return false;
8094     }
8095 
8096     if (extract32(a->list, 13, 1)) {
8097         return false;
8098     }
8099 
8100     if (!a->list) {
8101         /* UNPREDICTABLE; we choose to UNDEF */
8102         return false;
8103     }
8104 
8105     s->eci_handled = true;
8106 
8107     zero = tcg_constant_i32(0);
8108     for (i = 0; i < 15; i++) {
8109         if (extract32(a->list, i, 1)) {
8110             /* Clear R[i] */
8111             tcg_gen_mov_i32(cpu_R[i], zero);
8112         }
8113     }
8114     if (extract32(a->list, 15, 1)) {
8115         /*
8116          * Clear APSR (by calling the MSR helper with the same argument
8117          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8118          */
8119         gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero);
8120     }
8121     clear_eci_state(s);
8122     return true;
8123 }
8124 
8125 /*
8126  * Branch, branch with link
8127  */
8128 
8129 static bool trans_B(DisasContext *s, arg_i *a)
8130 {
8131     gen_jmp(s, jmp_diff(s, a->imm));
8132     return true;
8133 }
8134 
8135 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8136 {
8137     /* This has cond from encoding, required to be outside IT block.  */
8138     if (a->cond >= 0xe) {
8139         return false;
8140     }
8141     if (s->condexec_mask) {
8142         unallocated_encoding(s);
8143         return true;
8144     }
8145     arm_skip_unless(s, a->cond);
8146     gen_jmp(s, jmp_diff(s, a->imm));
8147     return true;
8148 }
8149 
8150 static bool trans_BL(DisasContext *s, arg_i *a)
8151 {
8152     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8153     gen_jmp(s, jmp_diff(s, a->imm));
8154     return true;
8155 }
8156 
8157 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8158 {
8159     /*
8160      * BLX <imm> would be useless on M-profile; the encoding space
8161      * is used for other insns from v8.1M onward, and UNDEFs before that.
8162      */
8163     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8164         return false;
8165     }
8166 
8167     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8168     if (s->thumb && (a->imm & 2)) {
8169         return false;
8170     }
8171     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8172     store_cpu_field_constant(!s->thumb, thumb);
8173     /* This jump is computed from an aligned PC: subtract off the low bits. */
8174     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8175     return true;
8176 }
8177 
8178 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8179 {
8180     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8181     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8182     return true;
8183 }
8184 
8185 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8186 {
8187     TCGv_i32 tmp = tcg_temp_new_i32();
8188 
8189     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8190     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8191     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8192     gen_bx(s, tmp);
8193     return true;
8194 }
8195 
8196 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8197 {
8198     TCGv_i32 tmp;
8199 
8200     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8201     if (!ENABLE_ARCH_5) {
8202         return false;
8203     }
8204     tmp = tcg_temp_new_i32();
8205     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8206     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8207     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8208     gen_bx(s, tmp);
8209     return true;
8210 }
8211 
8212 static bool trans_BF(DisasContext *s, arg_BF *a)
8213 {
8214     /*
8215      * M-profile branch future insns. The architecture permits an
8216      * implementation to implement these as NOPs (equivalent to
8217      * discarding the LO_BRANCH_INFO cache immediately), and we
8218      * take that IMPDEF option because for QEMU a "real" implementation
8219      * would be complicated and wouldn't execute any faster.
8220      */
8221     if (!dc_isar_feature(aa32_lob, s)) {
8222         return false;
8223     }
8224     if (a->boff == 0) {
8225         /* SEE "Related encodings" (loop insns) */
8226         return false;
8227     }
8228     /* Handle as NOP */
8229     return true;
8230 }
8231 
8232 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8233 {
8234     /* M-profile low-overhead loop start */
8235     TCGv_i32 tmp;
8236 
8237     if (!dc_isar_feature(aa32_lob, s)) {
8238         return false;
8239     }
8240     if (a->rn == 13 || a->rn == 15) {
8241         /*
8242          * For DLSTP rn == 15 is a related encoding (LCTP); the
8243          * other cases caught by this condition are all
8244          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8245          */
8246         return false;
8247     }
8248 
8249     if (a->size != 4) {
8250         /* DLSTP */
8251         if (!dc_isar_feature(aa32_mve, s)) {
8252             return false;
8253         }
8254         if (!vfp_access_check(s)) {
8255             return true;
8256         }
8257     }
8258 
8259     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8260     tmp = load_reg(s, a->rn);
8261     store_reg(s, 14, tmp);
8262     if (a->size != 4) {
8263         /* DLSTP: set FPSCR.LTPSIZE */
8264         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8265         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8266     }
8267     return true;
8268 }
8269 
8270 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8271 {
8272     /* M-profile low-overhead while-loop start */
8273     TCGv_i32 tmp;
8274     DisasLabel nextlabel;
8275 
8276     if (!dc_isar_feature(aa32_lob, s)) {
8277         return false;
8278     }
8279     if (a->rn == 13 || a->rn == 15) {
8280         /*
8281          * For WLSTP rn == 15 is a related encoding (LE); the
8282          * other cases caught by this condition are all
8283          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8284          */
8285         return false;
8286     }
8287     if (s->condexec_mask) {
8288         /*
8289          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8290          * we choose to UNDEF, because otherwise our use of
8291          * gen_goto_tb(1) would clash with the use of TB exit 1
8292          * in the dc->condjmp condition-failed codepath in
8293          * arm_tr_tb_stop() and we'd get an assertion.
8294          */
8295         return false;
8296     }
8297     if (a->size != 4) {
8298         /* WLSTP */
8299         if (!dc_isar_feature(aa32_mve, s)) {
8300             return false;
8301         }
8302         /*
8303          * We need to check that the FPU is enabled here, but mustn't
8304          * call vfp_access_check() to do that because we don't want to
8305          * do the lazy state preservation in the "loop count is zero" case.
8306          * Do the check-and-raise-exception by hand.
8307          */
8308         if (s->fp_excp_el) {
8309             gen_exception_insn_el(s, 0, EXCP_NOCP,
8310                                   syn_uncategorized(), s->fp_excp_el);
8311             return true;
8312         }
8313     }
8314 
8315     nextlabel = gen_disas_label(s);
8316     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8317     tmp = load_reg(s, a->rn);
8318     store_reg(s, 14, tmp);
8319     if (a->size != 4) {
8320         /*
8321          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8322          * lazy state preservation, new FP context creation, etc,
8323          * that vfp_access_check() does. We know that the actual
8324          * access check will succeed (ie it won't generate code that
8325          * throws an exception) because we did that check by hand earlier.
8326          */
8327         bool ok = vfp_access_check(s);
8328         assert(ok);
8329         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8330         /*
8331          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8332          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8333          */
8334     }
8335     gen_jmp_tb(s, curr_insn_len(s), 1);
8336 
8337     set_disas_label(s, nextlabel);
8338     gen_jmp(s, jmp_diff(s, a->imm));
8339     return true;
8340 }
8341 
8342 static bool trans_LE(DisasContext *s, arg_LE *a)
8343 {
8344     /*
8345      * M-profile low-overhead loop end. The architecture permits an
8346      * implementation to discard the LO_BRANCH_INFO cache at any time,
8347      * and we take the IMPDEF option to never set it in the first place
8348      * (equivalent to always discarding it immediately), because for QEMU
8349      * a "real" implementation would be complicated and wouldn't execute
8350      * any faster.
8351      */
8352     TCGv_i32 tmp;
8353     DisasLabel loopend;
8354     bool fpu_active;
8355 
8356     if (!dc_isar_feature(aa32_lob, s)) {
8357         return false;
8358     }
8359     if (a->f && a->tp) {
8360         return false;
8361     }
8362     if (s->condexec_mask) {
8363         /*
8364          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8365          * we choose to UNDEF, because otherwise our use of
8366          * gen_goto_tb(1) would clash with the use of TB exit 1
8367          * in the dc->condjmp condition-failed codepath in
8368          * arm_tr_tb_stop() and we'd get an assertion.
8369          */
8370         return false;
8371     }
8372     if (a->tp) {
8373         /* LETP */
8374         if (!dc_isar_feature(aa32_mve, s)) {
8375             return false;
8376         }
8377         if (!vfp_access_check(s)) {
8378             s->eci_handled = true;
8379             return true;
8380         }
8381     }
8382 
8383     /* LE/LETP is OK with ECI set and leaves it untouched */
8384     s->eci_handled = true;
8385 
8386     /*
8387      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8388      * UsageFault exception for the LE insn in that case. Note that we
8389      * are not directly checking FPSCR.LTPSIZE but instead check the
8390      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8391      * not currently active (ie ActiveFPState() returns false). We
8392      * can identify not-active purely from our TB state flags, as the
8393      * FPU is active only if:
8394      *  the FPU is enabled
8395      *  AND lazy state preservation is not active
8396      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8397      *
8398      * Usually we don't need to care about this distinction between
8399      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8400      * will either take an exception or clear the conditions that make
8401      * the FPU not active. But LE is an unusual case of a non-FP insn
8402      * that looks at LTPSIZE.
8403      */
8404     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8405 
8406     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8407         /* Need to do a runtime check for LTPSIZE != 4 */
8408         DisasLabel skipexc = gen_disas_label(s);
8409         tmp = load_cpu_field(v7m.ltpsize);
8410         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8411         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8412         set_disas_label(s, skipexc);
8413     }
8414 
8415     if (a->f) {
8416         /* Loop-forever: just jump back to the loop start */
8417         gen_jmp(s, jmp_diff(s, -a->imm));
8418         return true;
8419     }
8420 
8421     /*
8422      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8423      * For LE, we know at this point that LTPSIZE must be 4 and the
8424      * loop decrement value is 1. For LETP we need to calculate the decrement
8425      * value from LTPSIZE.
8426      */
8427     loopend = gen_disas_label(s);
8428     if (!a->tp) {
8429         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8430         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8431     } else {
8432         /*
8433          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8434          * so that decr stays live after the brcondi.
8435          */
8436         TCGv_i32 decr = tcg_temp_new_i32();
8437         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8438         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8439         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8440 
8441         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8442 
8443         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8444     }
8445     /* Jump back to the loop start */
8446     gen_jmp(s, jmp_diff(s, -a->imm));
8447 
8448     set_disas_label(s, loopend);
8449     if (a->tp) {
8450         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8451         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8452     }
8453     /* End TB, continuing to following insn */
8454     gen_jmp_tb(s, curr_insn_len(s), 1);
8455     return true;
8456 }
8457 
8458 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8459 {
8460     /*
8461      * M-profile Loop Clear with Tail Predication. Since our implementation
8462      * doesn't cache branch information, all we need to do is reset
8463      * FPSCR.LTPSIZE to 4.
8464      */
8465 
8466     if (!dc_isar_feature(aa32_lob, s) ||
8467         !dc_isar_feature(aa32_mve, s)) {
8468         return false;
8469     }
8470 
8471     if (!vfp_access_check(s)) {
8472         return true;
8473     }
8474 
8475     store_cpu_field_constant(4, v7m.ltpsize);
8476     return true;
8477 }
8478 
8479 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8480 {
8481     /*
8482      * M-profile Create Vector Tail Predicate. This insn is itself
8483      * predicated and is subject to beatwise execution.
8484      */
8485     TCGv_i32 rn_shifted, masklen;
8486 
8487     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8488         return false;
8489     }
8490 
8491     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8492         return true;
8493     }
8494 
8495     /*
8496      * We pre-calculate the mask length here to avoid having
8497      * to have multiple helpers specialized for size.
8498      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8499      */
8500     rn_shifted = tcg_temp_new_i32();
8501     masklen = load_reg(s, a->rn);
8502     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8503     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8504                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8505                         rn_shifted, tcg_constant_i32(16));
8506     gen_helper_mve_vctp(tcg_env, masklen);
8507     /* This insn updates predication bits */
8508     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8509     mve_update_eci(s);
8510     return true;
8511 }
8512 
8513 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8514 {
8515     TCGv_i32 addr, tmp;
8516 
8517     tmp = load_reg(s, a->rm);
8518     if (half) {
8519         tcg_gen_add_i32(tmp, tmp, tmp);
8520     }
8521     addr = load_reg(s, a->rn);
8522     tcg_gen_add_i32(addr, addr, tmp);
8523 
8524     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8525 
8526     tcg_gen_add_i32(tmp, tmp, tmp);
8527     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8528     tcg_gen_add_i32(tmp, tmp, addr);
8529     store_reg(s, 15, tmp);
8530     return true;
8531 }
8532 
8533 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8534 {
8535     return op_tbranch(s, a, false);
8536 }
8537 
8538 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8539 {
8540     return op_tbranch(s, a, true);
8541 }
8542 
8543 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8544 {
8545     TCGv_i32 tmp = load_reg(s, a->rn);
8546 
8547     arm_gen_condlabel(s);
8548     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8549                         tmp, 0, s->condlabel.label);
8550     gen_jmp(s, jmp_diff(s, a->imm));
8551     return true;
8552 }
8553 
8554 /*
8555  * Supervisor call - both T32 & A32 come here so we need to check
8556  * which mode we are in when checking for semihosting.
8557  */
8558 
8559 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8560 {
8561     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8562 
8563     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8564         semihosting_enabled(s->current_el == 0) &&
8565         (a->imm == semihost_imm)) {
8566         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8567     } else {
8568         if (s->fgt_svc) {
8569             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8570             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8571         } else {
8572             gen_update_pc(s, curr_insn_len(s));
8573             s->svc_imm = a->imm;
8574             s->base.is_jmp = DISAS_SWI;
8575         }
8576     }
8577     return true;
8578 }
8579 
8580 /*
8581  * Unconditional system instructions
8582  */
8583 
8584 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8585 {
8586     static const int8_t pre_offset[4] = {
8587         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8588     };
8589     static const int8_t post_offset[4] = {
8590         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8591     };
8592     TCGv_i32 addr, t1, t2;
8593 
8594     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8595         return false;
8596     }
8597     if (IS_USER(s)) {
8598         unallocated_encoding(s);
8599         return true;
8600     }
8601 
8602     addr = load_reg(s, a->rn);
8603     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8604 
8605     /* Load PC into tmp and CPSR into tmp2.  */
8606     t1 = tcg_temp_new_i32();
8607     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8608     tcg_gen_addi_i32(addr, addr, 4);
8609     t2 = tcg_temp_new_i32();
8610     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8611 
8612     if (a->w) {
8613         /* Base writeback.  */
8614         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8615         store_reg(s, a->rn, addr);
8616     }
8617     gen_rfe(s, t1, t2);
8618     return true;
8619 }
8620 
8621 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8622 {
8623     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8624         return false;
8625     }
8626     gen_srs(s, a->mode, a->pu, a->w);
8627     return true;
8628 }
8629 
8630 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8631 {
8632     uint32_t mask, val;
8633 
8634     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8635         return false;
8636     }
8637     if (IS_USER(s)) {
8638         /* Implemented as NOP in user mode.  */
8639         return true;
8640     }
8641     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8642 
8643     mask = val = 0;
8644     if (a->imod & 2) {
8645         if (a->A) {
8646             mask |= CPSR_A;
8647         }
8648         if (a->I) {
8649             mask |= CPSR_I;
8650         }
8651         if (a->F) {
8652             mask |= CPSR_F;
8653         }
8654         if (a->imod & 1) {
8655             val |= mask;
8656         }
8657     }
8658     if (a->M) {
8659         mask |= CPSR_M;
8660         val |= a->mode;
8661     }
8662     if (mask) {
8663         gen_set_psr_im(s, mask, 0, val);
8664     }
8665     return true;
8666 }
8667 
8668 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8669 {
8670     TCGv_i32 tmp, addr;
8671 
8672     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8673         return false;
8674     }
8675     if (IS_USER(s)) {
8676         /* Implemented as NOP in user mode.  */
8677         return true;
8678     }
8679 
8680     tmp = tcg_constant_i32(a->im);
8681     /* FAULTMASK */
8682     if (a->F) {
8683         addr = tcg_constant_i32(19);
8684         gen_helper_v7m_msr(tcg_env, addr, tmp);
8685     }
8686     /* PRIMASK */
8687     if (a->I) {
8688         addr = tcg_constant_i32(16);
8689         gen_helper_v7m_msr(tcg_env, addr, tmp);
8690     }
8691     gen_rebuild_hflags(s, false);
8692     gen_lookup_tb(s);
8693     return true;
8694 }
8695 
8696 /*
8697  * Clear-Exclusive, Barriers
8698  */
8699 
8700 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8701 {
8702     if (s->thumb
8703         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8704         : !ENABLE_ARCH_6K) {
8705         return false;
8706     }
8707     gen_clrex(s);
8708     return true;
8709 }
8710 
8711 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8712 {
8713     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8714         return false;
8715     }
8716     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8717     return true;
8718 }
8719 
8720 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8721 {
8722     return trans_DSB(s, NULL);
8723 }
8724 
8725 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8726 {
8727     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8728         return false;
8729     }
8730     /*
8731      * We need to break the TB after this insn to execute
8732      * self-modifying code correctly and also to take
8733      * any pending interrupts immediately.
8734      */
8735     s->base.is_jmp = DISAS_TOO_MANY;
8736     return true;
8737 }
8738 
8739 static bool trans_SB(DisasContext *s, arg_SB *a)
8740 {
8741     if (!dc_isar_feature(aa32_sb, s)) {
8742         return false;
8743     }
8744     /*
8745      * TODO: There is no speculation barrier opcode
8746      * for TCG; MB and end the TB instead.
8747      */
8748     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8749     s->base.is_jmp = DISAS_TOO_MANY;
8750     return true;
8751 }
8752 
8753 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8754 {
8755     if (!ENABLE_ARCH_6) {
8756         return false;
8757     }
8758     if (a->E != (s->be_data == MO_BE)) {
8759         gen_helper_setend(tcg_env);
8760         s->base.is_jmp = DISAS_UPDATE_EXIT;
8761     }
8762     return true;
8763 }
8764 
8765 /*
8766  * Preload instructions
8767  * All are nops, contingent on the appropriate arch level.
8768  */
8769 
8770 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8771 {
8772     return ENABLE_ARCH_5TE;
8773 }
8774 
8775 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8776 {
8777     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8778 }
8779 
8780 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8781 {
8782     return ENABLE_ARCH_7;
8783 }
8784 
8785 /*
8786  * If-then
8787  */
8788 
8789 static bool trans_IT(DisasContext *s, arg_IT *a)
8790 {
8791     int cond_mask = a->cond_mask;
8792 
8793     /*
8794      * No actual code generated for this insn, just setup state.
8795      *
8796      * Combinations of firstcond and mask which set up an 0b1111
8797      * condition are UNPREDICTABLE; we take the CONSTRAINED
8798      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8799      * i.e. both meaning "execute always".
8800      */
8801     s->condexec_cond = (cond_mask >> 4) & 0xe;
8802     s->condexec_mask = cond_mask & 0x1f;
8803     return true;
8804 }
8805 
8806 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8807 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8808 {
8809     TCGv_i32 rn, rm;
8810     DisasCompare c;
8811 
8812     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8813         return false;
8814     }
8815 
8816     if (a->rm == 13) {
8817         /* SEE "Related encodings" (MVE shifts) */
8818         return false;
8819     }
8820 
8821     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8822         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8823         return false;
8824     }
8825 
8826     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8827     rn = tcg_temp_new_i32();
8828     rm = tcg_temp_new_i32();
8829     if (a->rn == 15) {
8830         tcg_gen_movi_i32(rn, 0);
8831     } else {
8832         load_reg_var(s, rn, a->rn);
8833     }
8834     if (a->rm == 15) {
8835         tcg_gen_movi_i32(rm, 0);
8836     } else {
8837         load_reg_var(s, rm, a->rm);
8838     }
8839 
8840     switch (a->op) {
8841     case 0: /* CSEL */
8842         break;
8843     case 1: /* CSINC */
8844         tcg_gen_addi_i32(rm, rm, 1);
8845         break;
8846     case 2: /* CSINV */
8847         tcg_gen_not_i32(rm, rm);
8848         break;
8849     case 3: /* CSNEG */
8850         tcg_gen_neg_i32(rm, rm);
8851         break;
8852     default:
8853         g_assert_not_reached();
8854     }
8855 
8856     arm_test_cc(&c, a->fcond);
8857     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8858 
8859     store_reg(s, a->rd, rn);
8860     return true;
8861 }
8862 
8863 /*
8864  * Legacy decoder.
8865  */
8866 
8867 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8868 {
8869     unsigned int cond = insn >> 28;
8870 
8871     /* M variants do not implement ARM mode; this must raise the INVSTATE
8872      * UsageFault exception.
8873      */
8874     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8875         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8876         return;
8877     }
8878 
8879     if (s->pstate_il) {
8880         /*
8881          * Illegal execution state. This has priority over BTI
8882          * exceptions, but comes after instruction abort exceptions.
8883          */
8884         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8885         return;
8886     }
8887 
8888     if (cond == 0xf) {
8889         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8890          * choose to UNDEF. In ARMv5 and above the space is used
8891          * for miscellaneous unconditional instructions.
8892          */
8893         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8894             unallocated_encoding(s);
8895             return;
8896         }
8897 
8898         /* Unconditional instructions.  */
8899         /* TODO: Perhaps merge these into one decodetree output file.  */
8900         if (disas_a32_uncond(s, insn) ||
8901             disas_vfp_uncond(s, insn) ||
8902             disas_neon_dp(s, insn) ||
8903             disas_neon_ls(s, insn) ||
8904             disas_neon_shared(s, insn)) {
8905             return;
8906         }
8907         /* fall back to legacy decoder */
8908 
8909         if ((insn & 0x0e000f00) == 0x0c000100) {
8910             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8911                 /* iWMMXt register transfer.  */
8912                 if (extract32(s->c15_cpar, 1, 1)) {
8913                     if (!disas_iwmmxt_insn(s, insn)) {
8914                         return;
8915                     }
8916                 }
8917             }
8918         }
8919         goto illegal_op;
8920     }
8921     if (cond != 0xe) {
8922         /* if not always execute, we generate a conditional jump to
8923            next instruction */
8924         arm_skip_unless(s, cond);
8925     }
8926 
8927     /* TODO: Perhaps merge these into one decodetree output file.  */
8928     if (disas_a32(s, insn) ||
8929         disas_vfp(s, insn)) {
8930         return;
8931     }
8932     /* fall back to legacy decoder */
8933     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8934     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8935         if (((insn & 0x0c000e00) == 0x0c000000)
8936             && ((insn & 0x03000000) != 0x03000000)) {
8937             /* Coprocessor insn, coprocessor 0 or 1 */
8938             disas_xscale_insn(s, insn);
8939             return;
8940         }
8941     }
8942 
8943 illegal_op:
8944     unallocated_encoding(s);
8945 }
8946 
8947 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8948 {
8949     /*
8950      * Return true if this is a 16 bit instruction. We must be precise
8951      * about this (matching the decode).
8952      */
8953     if ((insn >> 11) < 0x1d) {
8954         /* Definitely a 16-bit instruction */
8955         return true;
8956     }
8957 
8958     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8959      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8960      * end up actually treating this as two 16-bit insns, though,
8961      * if it's half of a bl/blx pair that might span a page boundary.
8962      */
8963     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8964         arm_dc_feature(s, ARM_FEATURE_M)) {
8965         /* Thumb2 cores (including all M profile ones) always treat
8966          * 32-bit insns as 32-bit.
8967          */
8968         return false;
8969     }
8970 
8971     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8972         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8973          * is not on the next page; we merge this into a 32-bit
8974          * insn.
8975          */
8976         return false;
8977     }
8978     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8979      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8980      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8981      *  -- handle as single 16 bit insn
8982      */
8983     return true;
8984 }
8985 
8986 /* Translate a 32-bit thumb instruction. */
8987 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8988 {
8989     /*
8990      * ARMv6-M supports a limited subset of Thumb2 instructions.
8991      * Other Thumb1 architectures allow only 32-bit
8992      * combined BL/BLX prefix and suffix.
8993      */
8994     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8995         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8996         int i;
8997         bool found = false;
8998         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8999                                                0xf3b08040 /* dsb */,
9000                                                0xf3b08050 /* dmb */,
9001                                                0xf3b08060 /* isb */,
9002                                                0xf3e08000 /* mrs */,
9003                                                0xf000d000 /* bl */};
9004         static const uint32_t armv6m_mask[] = {0xffe0d000,
9005                                                0xfff0d0f0,
9006                                                0xfff0d0f0,
9007                                                0xfff0d0f0,
9008                                                0xffe0d000,
9009                                                0xf800d000};
9010 
9011         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9012             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9013                 found = true;
9014                 break;
9015             }
9016         }
9017         if (!found) {
9018             goto illegal_op;
9019         }
9020     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9021         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9022             unallocated_encoding(s);
9023             return;
9024         }
9025     }
9026 
9027     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9028         /*
9029          * NOCP takes precedence over any UNDEF for (almost) the
9030          * entire wide range of coprocessor-space encodings, so check
9031          * for it first before proceeding to actually decode eg VFP
9032          * insns. This decode also handles the few insns which are
9033          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9034          */
9035         if (disas_m_nocp(s, insn)) {
9036             return;
9037         }
9038     }
9039 
9040     if ((insn & 0xef000000) == 0xef000000) {
9041         /*
9042          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9043          * transform into
9044          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9045          */
9046         uint32_t a32_insn = (insn & 0xe2ffffff) |
9047             ((insn & (1 << 28)) >> 4) | (1 << 28);
9048 
9049         if (disas_neon_dp(s, a32_insn)) {
9050             return;
9051         }
9052     }
9053 
9054     if ((insn & 0xff100000) == 0xf9000000) {
9055         /*
9056          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9057          * transform into
9058          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9059          */
9060         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9061 
9062         if (disas_neon_ls(s, a32_insn)) {
9063             return;
9064         }
9065     }
9066 
9067     /*
9068      * TODO: Perhaps merge these into one decodetree output file.
9069      * Note disas_vfp is written for a32 with cond field in the
9070      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9071      */
9072     if (disas_t32(s, insn) ||
9073         disas_vfp_uncond(s, insn) ||
9074         disas_neon_shared(s, insn) ||
9075         disas_mve(s, insn) ||
9076         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9077         return;
9078     }
9079 
9080 illegal_op:
9081     unallocated_encoding(s);
9082 }
9083 
9084 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9085 {
9086     if (!disas_t16(s, insn)) {
9087         unallocated_encoding(s);
9088     }
9089 }
9090 
9091 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9092 {
9093     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9094      * (False positives are OK, false negatives are not.)
9095      * We know this is a Thumb insn, and our caller ensures we are
9096      * only called if dc->base.pc_next is less than 4 bytes from the page
9097      * boundary, so we cross the page if the first 16 bits indicate
9098      * that this is a 32 bit insn.
9099      */
9100     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9101 
9102     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9103 }
9104 
9105 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9106 {
9107     DisasContext *dc = container_of(dcbase, DisasContext, base);
9108     CPUARMState *env = cpu_env(cs);
9109     ARMCPU *cpu = env_archcpu(env);
9110     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9111     uint32_t condexec, core_mmu_idx;
9112 
9113     dc->isar = &cpu->isar;
9114     dc->condjmp = 0;
9115     dc->pc_save = dc->base.pc_first;
9116     dc->aarch64 = false;
9117     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9118     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9119     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9120     /*
9121      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9122      * is always the IT bits. On M-profile, some of the reserved encodings
9123      * of IT are used instead to indicate either ICI or ECI, which
9124      * indicate partial progress of a restartable insn that was interrupted
9125      * partway through by an exception:
9126      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9127      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9128      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9129      * insn, behave normally".
9130      */
9131     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9132     dc->eci_handled = false;
9133     if (condexec & 0xf) {
9134         dc->condexec_mask = (condexec & 0xf) << 1;
9135         dc->condexec_cond = condexec >> 4;
9136     } else {
9137         if (arm_feature(env, ARM_FEATURE_M)) {
9138             dc->eci = condexec >> 4;
9139         }
9140     }
9141 
9142     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9143     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9144     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9145 #if !defined(CONFIG_USER_ONLY)
9146     dc->user = (dc->current_el == 0);
9147 #endif
9148     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9149     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9150     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9151     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9152     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9153 
9154     if (arm_feature(env, ARM_FEATURE_M)) {
9155         dc->vfp_enabled = 1;
9156         dc->be_data = MO_TE;
9157         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9158         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9159         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9160         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9161         dc->v7m_new_fp_ctxt_needed =
9162             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9163         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9164         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9165     } else {
9166         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9167         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9168         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9169         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9170         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9171             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9172         } else {
9173             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9174             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9175         }
9176         dc->sme_trap_nonstreaming =
9177             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9178     }
9179     dc->lse2 = false; /* applies only to aarch64 */
9180     dc->cp_regs = cpu->cp_regs;
9181     dc->features = env->features;
9182 
9183     /* Single step state. The code-generation logic here is:
9184      *  SS_ACTIVE == 0:
9185      *   generate code with no special handling for single-stepping (except
9186      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9187      *   this happens anyway because those changes are all system register or
9188      *   PSTATE writes).
9189      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9190      *   emit code for one insn
9191      *   emit code to clear PSTATE.SS
9192      *   emit code to generate software step exception for completed step
9193      *   end TB (as usual for having generated an exception)
9194      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9195      *   emit code to generate a software step exception
9196      *   end the TB
9197      */
9198     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9199     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9200     dc->is_ldex = false;
9201 
9202     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9203 
9204     /* If architectural single step active, limit to 1.  */
9205     if (dc->ss_active) {
9206         dc->base.max_insns = 1;
9207     }
9208 
9209     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9210        to those left on the page.  */
9211     if (!dc->thumb) {
9212         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9213         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9214     }
9215 
9216     cpu_V0 = tcg_temp_new_i64();
9217     cpu_V1 = tcg_temp_new_i64();
9218     cpu_M0 = tcg_temp_new_i64();
9219 }
9220 
9221 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9222 {
9223     DisasContext *dc = container_of(dcbase, DisasContext, base);
9224 
9225     /* A note on handling of the condexec (IT) bits:
9226      *
9227      * We want to avoid the overhead of having to write the updated condexec
9228      * bits back to the CPUARMState for every instruction in an IT block. So:
9229      * (1) if the condexec bits are not already zero then we write
9230      * zero back into the CPUARMState now. This avoids complications trying
9231      * to do it at the end of the block. (For example if we don't do this
9232      * it's hard to identify whether we can safely skip writing condexec
9233      * at the end of the TB, which we definitely want to do for the case
9234      * where a TB doesn't do anything with the IT state at all.)
9235      * (2) if we are going to leave the TB then we call gen_set_condexec()
9236      * which will write the correct value into CPUARMState if zero is wrong.
9237      * This is done both for leaving the TB at the end, and for leaving
9238      * it because of an exception we know will happen, which is done in
9239      * gen_exception_insn(). The latter is necessary because we need to
9240      * leave the TB with the PC/IT state just prior to execution of the
9241      * instruction which caused the exception.
9242      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9243      * then the CPUARMState will be wrong and we need to reset it.
9244      * This is handled in the same way as restoration of the
9245      * PC in these situations; we save the value of the condexec bits
9246      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9247      * then uses this to restore them after an exception.
9248      *
9249      * Note that there are no instructions which can read the condexec
9250      * bits, and none which can write non-static values to them, so
9251      * we don't need to care about whether CPUARMState is correct in the
9252      * middle of a TB.
9253      */
9254 
9255     /* Reset the conditional execution bits immediately. This avoids
9256        complications trying to do it at the end of the block.  */
9257     if (dc->condexec_mask || dc->condexec_cond) {
9258         store_cpu_field_constant(0, condexec_bits);
9259     }
9260 }
9261 
9262 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9263 {
9264     DisasContext *dc = container_of(dcbase, DisasContext, base);
9265     /*
9266      * The ECI/ICI bits share PSR bits with the IT bits, so we
9267      * need to reconstitute the bits from the split-out DisasContext
9268      * fields here.
9269      */
9270     uint32_t condexec_bits;
9271     target_ulong pc_arg = dc->base.pc_next;
9272 
9273     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9274         pc_arg &= ~TARGET_PAGE_MASK;
9275     }
9276     if (dc->eci) {
9277         condexec_bits = dc->eci << 4;
9278     } else {
9279         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9280     }
9281     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9282     dc->insn_start = tcg_last_op();
9283 }
9284 
9285 static bool arm_check_kernelpage(DisasContext *dc)
9286 {
9287 #ifdef CONFIG_USER_ONLY
9288     /* Intercept jump to the magic kernel page.  */
9289     if (dc->base.pc_next >= 0xffff0000) {
9290         /* We always get here via a jump, so know we are not in a
9291            conditional execution block.  */
9292         gen_exception_internal(EXCP_KERNEL_TRAP);
9293         dc->base.is_jmp = DISAS_NORETURN;
9294         return true;
9295     }
9296 #endif
9297     return false;
9298 }
9299 
9300 static bool arm_check_ss_active(DisasContext *dc)
9301 {
9302     if (dc->ss_active && !dc->pstate_ss) {
9303         /* Singlestep state is Active-pending.
9304          * If we're in this state at the start of a TB then either
9305          *  a) we just took an exception to an EL which is being debugged
9306          *     and this is the first insn in the exception handler
9307          *  b) debug exceptions were masked and we just unmasked them
9308          *     without changing EL (eg by clearing PSTATE.D)
9309          * In either case we're going to take a swstep exception in the
9310          * "did not step an insn" case, and so the syndrome ISV and EX
9311          * bits should be zero.
9312          */
9313         assert(dc->base.num_insns == 1);
9314         gen_swstep_exception(dc, 0, 0);
9315         dc->base.is_jmp = DISAS_NORETURN;
9316         return true;
9317     }
9318 
9319     return false;
9320 }
9321 
9322 static void arm_post_translate_insn(DisasContext *dc)
9323 {
9324     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9325         if (dc->pc_save != dc->condlabel.pc_save) {
9326             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9327         }
9328         gen_set_label(dc->condlabel.label);
9329         dc->condjmp = 0;
9330     }
9331 }
9332 
9333 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9334 {
9335     DisasContext *dc = container_of(dcbase, DisasContext, base);
9336     CPUARMState *env = cpu_env(cpu);
9337     uint32_t pc = dc->base.pc_next;
9338     unsigned int insn;
9339 
9340     /* Singlestep exceptions have the highest priority. */
9341     if (arm_check_ss_active(dc)) {
9342         dc->base.pc_next = pc + 4;
9343         return;
9344     }
9345 
9346     if (pc & 3) {
9347         /*
9348          * PC alignment fault.  This has priority over the instruction abort
9349          * that we would receive from a translation fault via arm_ldl_code
9350          * (or the execution of the kernelpage entrypoint). This should only
9351          * be possible after an indirect branch, at the start of the TB.
9352          */
9353         assert(dc->base.num_insns == 1);
9354         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9355         dc->base.is_jmp = DISAS_NORETURN;
9356         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9357         return;
9358     }
9359 
9360     if (arm_check_kernelpage(dc)) {
9361         dc->base.pc_next = pc + 4;
9362         return;
9363     }
9364 
9365     dc->pc_curr = pc;
9366     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9367     dc->insn = insn;
9368     dc->base.pc_next = pc + 4;
9369     disas_arm_insn(dc, insn);
9370 
9371     arm_post_translate_insn(dc);
9372 
9373     /* ARM is a fixed-length ISA.  We performed the cross-page check
9374        in init_disas_context by adjusting max_insns.  */
9375 }
9376 
9377 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9378 {
9379     /* Return true if this Thumb insn is always unconditional,
9380      * even inside an IT block. This is true of only a very few
9381      * instructions: BKPT, HLT, and SG.
9382      *
9383      * A larger class of instructions are UNPREDICTABLE if used
9384      * inside an IT block; we do not need to detect those here, because
9385      * what we do by default (perform the cc check and update the IT
9386      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9387      * choice for those situations.
9388      *
9389      * insn is either a 16-bit or a 32-bit instruction; the two are
9390      * distinguishable because for the 16-bit case the top 16 bits
9391      * are zeroes, and that isn't a valid 32-bit encoding.
9392      */
9393     if ((insn & 0xffffff00) == 0xbe00) {
9394         /* BKPT */
9395         return true;
9396     }
9397 
9398     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9399         !arm_dc_feature(s, ARM_FEATURE_M)) {
9400         /* HLT: v8A only. This is unconditional even when it is going to
9401          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9402          * For v7 cores this was a plain old undefined encoding and so
9403          * honours its cc check. (We might be using the encoding as
9404          * a semihosting trap, but we don't change the cc check behaviour
9405          * on that account, because a debugger connected to a real v7A
9406          * core and emulating semihosting traps by catching the UNDEF
9407          * exception would also only see cases where the cc check passed.
9408          * No guest code should be trying to do a HLT semihosting trap
9409          * in an IT block anyway.
9410          */
9411         return true;
9412     }
9413 
9414     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9415         arm_dc_feature(s, ARM_FEATURE_M)) {
9416         /* SG: v8M only */
9417         return true;
9418     }
9419 
9420     return false;
9421 }
9422 
9423 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9424 {
9425     DisasContext *dc = container_of(dcbase, DisasContext, base);
9426     CPUARMState *env = cpu_env(cpu);
9427     uint32_t pc = dc->base.pc_next;
9428     uint32_t insn;
9429     bool is_16bit;
9430     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9431     TCGOp *insn_eci_rewind = NULL;
9432     target_ulong insn_eci_pc_save = -1;
9433 
9434     /* Misaligned thumb PC is architecturally impossible. */
9435     assert((dc->base.pc_next & 1) == 0);
9436 
9437     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9438         dc->base.pc_next = pc + 2;
9439         return;
9440     }
9441 
9442     dc->pc_curr = pc;
9443     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9444     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9445     pc += 2;
9446     if (!is_16bit) {
9447         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9448         insn = insn << 16 | insn2;
9449         pc += 2;
9450     }
9451     dc->base.pc_next = pc;
9452     dc->insn = insn;
9453 
9454     if (dc->pstate_il) {
9455         /*
9456          * Illegal execution state. This has priority over BTI
9457          * exceptions, but comes after instruction abort exceptions.
9458          */
9459         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9460         return;
9461     }
9462 
9463     if (dc->eci) {
9464         /*
9465          * For M-profile continuable instructions, ECI/ICI handling
9466          * falls into these cases:
9467          *  - interrupt-continuable instructions
9468          *     These are the various load/store multiple insns (both
9469          *     integer and fp). The ICI bits indicate the register
9470          *     where the load/store can resume. We make the IMPDEF
9471          *     choice to always do "instruction restart", ie ignore
9472          *     the ICI value and always execute the ldm/stm from the
9473          *     start. So all we need to do is zero PSR.ICI if the
9474          *     insn executes.
9475          *  - MVE instructions subject to beat-wise execution
9476          *     Here the ECI bits indicate which beats have already been
9477          *     executed, and we must honour this. Each insn of this
9478          *     type will handle it correctly. We will update PSR.ECI
9479          *     in the helper function for the insn (some ECI values
9480          *     mean that the following insn also has been partially
9481          *     executed).
9482          *  - Special cases which don't advance ECI
9483          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9484          *     bits untouched.
9485          *  - all other insns (the common case)
9486          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9487          *     We place a rewind-marker here. Insns in the previous
9488          *     three categories will set a flag in the DisasContext.
9489          *     If the flag isn't set after we call disas_thumb_insn()
9490          *     or disas_thumb2_insn() then we know we have a "some other
9491          *     insn" case. We will rewind to the marker (ie throwing away
9492          *     all the generated code) and instead emit "take exception".
9493          */
9494         insn_eci_rewind = tcg_last_op();
9495         insn_eci_pc_save = dc->pc_save;
9496     }
9497 
9498     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9499         uint32_t cond = dc->condexec_cond;
9500 
9501         /*
9502          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9503          * "always"; 0xf is not "never".
9504          */
9505         if (cond < 0x0e) {
9506             arm_skip_unless(dc, cond);
9507         }
9508     }
9509 
9510     if (is_16bit) {
9511         disas_thumb_insn(dc, insn);
9512     } else {
9513         disas_thumb2_insn(dc, insn);
9514     }
9515 
9516     /* Advance the Thumb condexec condition.  */
9517     if (dc->condexec_mask) {
9518         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9519                              ((dc->condexec_mask >> 4) & 1));
9520         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9521         if (dc->condexec_mask == 0) {
9522             dc->condexec_cond = 0;
9523         }
9524     }
9525 
9526     if (dc->eci && !dc->eci_handled) {
9527         /*
9528          * Insn wasn't valid for ECI/ICI at all: undo what we
9529          * just generated and instead emit an exception
9530          */
9531         tcg_remove_ops_after(insn_eci_rewind);
9532         dc->pc_save = insn_eci_pc_save;
9533         dc->condjmp = 0;
9534         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9535     }
9536 
9537     arm_post_translate_insn(dc);
9538 
9539     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9540      * will touch a new page.  This ensures that prefetch aborts occur at
9541      * the right place.
9542      *
9543      * We want to stop the TB if the next insn starts in a new page,
9544      * or if it spans between this page and the next. This means that
9545      * if we're looking at the last halfword in the page we need to
9546      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9547      * or a 32-bit Thumb insn (which won't).
9548      * This is to avoid generating a silly TB with a single 16-bit insn
9549      * in it at the end of this page (which would execute correctly
9550      * but isn't very efficient).
9551      */
9552     if (dc->base.is_jmp == DISAS_NEXT
9553         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9554             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9555                 && insn_crosses_page(env, dc)))) {
9556         dc->base.is_jmp = DISAS_TOO_MANY;
9557     }
9558 }
9559 
9560 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9561 {
9562     DisasContext *dc = container_of(dcbase, DisasContext, base);
9563 
9564     /* At this stage dc->condjmp will only be set when the skipped
9565        instruction was a conditional branch or trap, and the PC has
9566        already been written.  */
9567     gen_set_condexec(dc);
9568     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9569         /* Exception return branches need some special case code at the
9570          * end of the TB, which is complex enough that it has to
9571          * handle the single-step vs not and the condition-failed
9572          * insn codepath itself.
9573          */
9574         gen_bx_excret_final_code(dc);
9575     } else if (unlikely(dc->ss_active)) {
9576         /* Unconditional and "condition passed" instruction codepath. */
9577         switch (dc->base.is_jmp) {
9578         case DISAS_SWI:
9579             gen_ss_advance(dc);
9580             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9581             break;
9582         case DISAS_HVC:
9583             gen_ss_advance(dc);
9584             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9585             break;
9586         case DISAS_SMC:
9587             gen_ss_advance(dc);
9588             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9589             break;
9590         case DISAS_NEXT:
9591         case DISAS_TOO_MANY:
9592         case DISAS_UPDATE_EXIT:
9593         case DISAS_UPDATE_NOCHAIN:
9594             gen_update_pc(dc, curr_insn_len(dc));
9595             /* fall through */
9596         default:
9597             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9598             gen_singlestep_exception(dc);
9599             break;
9600         case DISAS_NORETURN:
9601             break;
9602         }
9603     } else {
9604         /* While branches must always occur at the end of an IT block,
9605            there are a few other things that can cause us to terminate
9606            the TB in the middle of an IT block:
9607             - Exception generating instructions (bkpt, swi, undefined).
9608             - Page boundaries.
9609             - Hardware watchpoints.
9610            Hardware breakpoints have already been handled and skip this code.
9611          */
9612         switch (dc->base.is_jmp) {
9613         case DISAS_NEXT:
9614         case DISAS_TOO_MANY:
9615             gen_goto_tb(dc, 1, curr_insn_len(dc));
9616             break;
9617         case DISAS_UPDATE_NOCHAIN:
9618             gen_update_pc(dc, curr_insn_len(dc));
9619             /* fall through */
9620         case DISAS_JUMP:
9621             gen_goto_ptr();
9622             break;
9623         case DISAS_UPDATE_EXIT:
9624             gen_update_pc(dc, curr_insn_len(dc));
9625             /* fall through */
9626         default:
9627             /* indicate that the hash table must be used to find the next TB */
9628             tcg_gen_exit_tb(NULL, 0);
9629             break;
9630         case DISAS_NORETURN:
9631             /* nothing more to generate */
9632             break;
9633         case DISAS_WFI:
9634             gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc)));
9635             /*
9636              * The helper doesn't necessarily throw an exception, but we
9637              * must go back to the main loop to check for interrupts anyway.
9638              */
9639             tcg_gen_exit_tb(NULL, 0);
9640             break;
9641         case DISAS_WFE:
9642             gen_helper_wfe(tcg_env);
9643             break;
9644         case DISAS_YIELD:
9645             gen_helper_yield(tcg_env);
9646             break;
9647         case DISAS_SWI:
9648             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9649             break;
9650         case DISAS_HVC:
9651             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9652             break;
9653         case DISAS_SMC:
9654             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9655             break;
9656         }
9657     }
9658 
9659     if (dc->condjmp) {
9660         /* "Condition failed" instruction codepath for the branch/trap insn */
9661         set_disas_label(dc, dc->condlabel);
9662         gen_set_condexec(dc);
9663         if (unlikely(dc->ss_active)) {
9664             gen_update_pc(dc, curr_insn_len(dc));
9665             gen_singlestep_exception(dc);
9666         } else {
9667             gen_goto_tb(dc, 1, curr_insn_len(dc));
9668         }
9669     }
9670 }
9671 
9672 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9673                              CPUState *cpu, FILE *logfile)
9674 {
9675     DisasContext *dc = container_of(dcbase, DisasContext, base);
9676 
9677     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9678     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9679 }
9680 
9681 static const TranslatorOps arm_translator_ops = {
9682     .init_disas_context = arm_tr_init_disas_context,
9683     .tb_start           = arm_tr_tb_start,
9684     .insn_start         = arm_tr_insn_start,
9685     .translate_insn     = arm_tr_translate_insn,
9686     .tb_stop            = arm_tr_tb_stop,
9687     .disas_log          = arm_tr_disas_log,
9688 };
9689 
9690 static const TranslatorOps thumb_translator_ops = {
9691     .init_disas_context = arm_tr_init_disas_context,
9692     .tb_start           = arm_tr_tb_start,
9693     .insn_start         = arm_tr_insn_start,
9694     .translate_insn     = thumb_tr_translate_insn,
9695     .tb_stop            = arm_tr_tb_stop,
9696     .disas_log          = arm_tr_disas_log,
9697 };
9698 
9699 /* generate intermediate code for basic block 'tb'.  */
9700 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9701                            vaddr pc, void *host_pc)
9702 {
9703     DisasContext dc = { };
9704     const TranslatorOps *ops = &arm_translator_ops;
9705     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9706 
9707     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9708         ops = &thumb_translator_ops;
9709     }
9710 #ifdef TARGET_AARCH64
9711     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9712         ops = &aarch64_translator_ops;
9713     }
9714 #endif
9715 
9716     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9717 }
9718