xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 00f463b38aa7cfca0bc65e3af7f2c49e1b9da690)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, cpu_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, cpu_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(cpu_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(cpu_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(cpu_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(cpu_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(cpu_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
904         /*
905          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
906          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
907          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
908          */
909         MO_ALIGN_16
910     };
911     g_assert(i < ARRAY_SIZE(mop_align));
912     return mop_align[i];
913 }
914 
915 /*
916  * Abstractions of "generate code to do a guest load/store for
917  * AArch32", where a vaddr is always 32 bits (and is zero
918  * extended if we're a 64 bit core) and  data is also
919  * 32 bits unless specifically doing a 64 bit access.
920  * These functions work like tcg_gen_qemu_{ld,st}* except
921  * that the address argument is TCGv_i32 rather than TCGv.
922  */
923 
924 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926     TCGv addr = tcg_temp_new();
927     tcg_gen_extu_i32_tl(addr, a32);
928 
929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932     }
933     return addr;
934 }
935 
936 /*
937  * Internal routines are used for NEON cases where the endianness
938  * and/or alignment has already been taken into account and manipulated.
939  */
940 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_ld_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951     tcg_gen_qemu_st_i32(val, addr, index, opc);
952 }
953 
954 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
955                               TCGv_i32 a32, int index, MemOp opc)
956 {
957     TCGv addr = gen_aa32_addr(s, a32, opc);
958 
959     tcg_gen_qemu_ld_i64(val, addr, index, opc);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
963         tcg_gen_rotri_i64(val, val, 32);
964     }
965 }
966 
967 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971 
972     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
973     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
974         TCGv_i64 tmp = tcg_temp_new_i64();
975         tcg_gen_rotri_i64(tmp, val, 32);
976         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
977     } else {
978         tcg_gen_qemu_st_i64(val, addr, index, opc);
979     }
980 }
981 
982 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                      int index, MemOp opc)
1002 {
1003     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004 }
1005 
1006 #define DO_GEN_LD(SUFF, OPC)                                            \
1007     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 #define DO_GEN_ST(SUFF, OPC)                                            \
1014     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                          TCGv_i32 a32, int index)       \
1016     {                                                                   \
1017         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018     }
1019 
1020 static inline void gen_hvc(DisasContext *s, int imm16)
1021 {
1022     /* The pre HVC helper handles cases when HVC gets trapped
1023      * as an undefined insn by runtime configuration (ie before
1024      * the insn really executes).
1025      */
1026     gen_update_pc(s, 0);
1027     gen_helper_pre_hvc(cpu_env);
1028     /* Otherwise we will treat this as a real exception which
1029      * happens after execution of the insn. (The distinction matters
1030      * for the PC value reported to the exception handler and also
1031      * for single stepping.)
1032      */
1033     s->svc_imm = imm16;
1034     gen_update_pc(s, curr_insn_len(s));
1035     s->base.is_jmp = DISAS_HVC;
1036 }
1037 
1038 static inline void gen_smc(DisasContext *s)
1039 {
1040     /* As with HVC, we may take an exception either before or after
1041      * the insn executes.
1042      */
1043     gen_update_pc(s, 0);
1044     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1045     gen_update_pc(s, curr_insn_len(s));
1046     s->base.is_jmp = DISAS_SMC;
1047 }
1048 
1049 static void gen_exception_internal_insn(DisasContext *s, int excp)
1050 {
1051     gen_set_condexec(s);
1052     gen_update_pc(s, 0);
1053     gen_exception_internal(excp);
1054     s->base.is_jmp = DISAS_NORETURN;
1055 }
1056 
1057 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058 {
1059     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1060                                           tcg_constant_i32(syndrome), tcg_el);
1061 }
1062 
1063 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064 {
1065     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066 }
1067 
1068 static void gen_exception(int excp, uint32_t syndrome)
1069 {
1070     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1071                                        tcg_constant_i32(syndrome));
1072 }
1073 
1074 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1076 {
1077     if (s->aarch64) {
1078         gen_a64_update_pc(s, pc_diff);
1079     } else {
1080         gen_set_condexec(s);
1081         gen_update_pc(s, pc_diff);
1082     }
1083     gen_exception_el_v(excp, syn, tcg_el);
1084     s->base.is_jmp = DISAS_NORETURN;
1085 }
1086 
1087 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                            uint32_t syn, uint32_t target_el)
1089 {
1090     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                             tcg_constant_i32(target_el));
1092 }
1093 
1094 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                         int excp, uint32_t syn)
1096 {
1097     if (s->aarch64) {
1098         gen_a64_update_pc(s, pc_diff);
1099     } else {
1100         gen_set_condexec(s);
1101         gen_update_pc(s, pc_diff);
1102     }
1103     gen_exception(excp, syn);
1104     s->base.is_jmp = DISAS_NORETURN;
1105 }
1106 
1107 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108 {
1109     gen_set_condexec(s);
1110     gen_update_pc(s, 0);
1111     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 void unallocated_encoding(DisasContext *s)
1116 {
1117     /* Unallocated and reserved encodings are uncategorized */
1118     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119 }
1120 
1121 /* Force a TB lookup after an instruction that changes the CPU state.  */
1122 void gen_lookup_tb(DisasContext *s)
1123 {
1124     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125     s->base.is_jmp = DISAS_EXIT;
1126 }
1127 
1128 static inline void gen_hlt(DisasContext *s, int imm)
1129 {
1130     /* HLT. This has two purposes.
1131      * Architecturally, it is an external halting debug instruction.
1132      * Since QEMU doesn't implement external debug, we treat this as
1133      * it is required for halting debug disabled: it will UNDEF.
1134      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136      * must trigger semihosting even for ARMv7 and earlier, where
1137      * HLT was an undefined encoding.
1138      * In system mode, we don't allow userspace access to
1139      * semihosting, to provide some semblance of security
1140      * (and for consistency with our 32-bit semihosting).
1141      */
1142     if (semihosting_enabled(s->current_el == 0) &&
1143         (imm == (s->thumb ? 0x3c : 0xf000))) {
1144         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145         return;
1146     }
1147 
1148     unallocated_encoding(s);
1149 }
1150 
1151 /*
1152  * Return the offset of a "full" NEON Dreg.
1153  */
1154 long neon_full_reg_offset(unsigned reg)
1155 {
1156     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157 }
1158 
1159 /*
1160  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161  * where 0 is the least significant end of the register.
1162  */
1163 long neon_element_offset(int reg, int element, MemOp memop)
1164 {
1165     int element_size = 1 << (memop & MO_SIZE);
1166     int ofs = element * element_size;
1167 #if HOST_BIG_ENDIAN
1168     /*
1169      * Calculate the offset assuming fully little-endian,
1170      * then XOR to account for the order of the 8-byte units.
1171      */
1172     if (element_size < 8) {
1173         ofs ^= 8 - element_size;
1174     }
1175 #endif
1176     return neon_full_reg_offset(reg) + ofs;
1177 }
1178 
1179 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180 long vfp_reg_offset(bool dp, unsigned reg)
1181 {
1182     if (dp) {
1183         return neon_element_offset(reg, 0, MO_64);
1184     } else {
1185         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186     }
1187 }
1188 
1189 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190 {
1191     long off = neon_element_offset(reg, ele, memop);
1192 
1193     switch (memop) {
1194     case MO_SB:
1195         tcg_gen_ld8s_i32(dest, cpu_env, off);
1196         break;
1197     case MO_UB:
1198         tcg_gen_ld8u_i32(dest, cpu_env, off);
1199         break;
1200     case MO_SW:
1201         tcg_gen_ld16s_i32(dest, cpu_env, off);
1202         break;
1203     case MO_UW:
1204         tcg_gen_ld16u_i32(dest, cpu_env, off);
1205         break;
1206     case MO_UL:
1207     case MO_SL:
1208         tcg_gen_ld_i32(dest, cpu_env, off);
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213 }
1214 
1215 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216 {
1217     long off = neon_element_offset(reg, ele, memop);
1218 
1219     switch (memop) {
1220     case MO_SL:
1221         tcg_gen_ld32s_i64(dest, cpu_env, off);
1222         break;
1223     case MO_UL:
1224         tcg_gen_ld32u_i64(dest, cpu_env, off);
1225         break;
1226     case MO_UQ:
1227         tcg_gen_ld_i64(dest, cpu_env, off);
1228         break;
1229     default:
1230         g_assert_not_reached();
1231     }
1232 }
1233 
1234 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_8:
1240         tcg_gen_st8_i32(src, cpu_env, off);
1241         break;
1242     case MO_16:
1243         tcg_gen_st16_i32(src, cpu_env, off);
1244         break;
1245     case MO_32:
1246         tcg_gen_st_i32(src, cpu_env, off);
1247         break;
1248     default:
1249         g_assert_not_reached();
1250     }
1251 }
1252 
1253 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254 {
1255     long off = neon_element_offset(reg, ele, memop);
1256 
1257     switch (memop) {
1258     case MO_32:
1259         tcg_gen_st32_i64(src, cpu_env, off);
1260         break;
1261     case MO_64:
1262         tcg_gen_st_i64(src, cpu_env, off);
1263         break;
1264     default:
1265         g_assert_not_reached();
1266     }
1267 }
1268 
1269 #define ARM_CP_RW_BIT   (1 << 20)
1270 
1271 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272 {
1273     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274 }
1275 
1276 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277 {
1278     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279 }
1280 
1281 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282 {
1283     TCGv_i32 var = tcg_temp_new_i32();
1284     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285     return var;
1286 }
1287 
1288 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289 {
1290     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291 }
1292 
1293 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294 {
1295     iwmmxt_store_reg(cpu_M0, rn);
1296 }
1297 
1298 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299 {
1300     iwmmxt_load_reg(cpu_M0, rn);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316 {
1317     iwmmxt_load_reg(cpu_V1, rn);
1318     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319 }
1320 
1321 #define IWMMXT_OP(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV(name) \
1329 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330 { \
1331     iwmmxt_load_reg(cpu_V1, rn); \
1332     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1333 }
1334 
1335 #define IWMMXT_OP_ENV_SIZE(name) \
1336 IWMMXT_OP_ENV(name##b) \
1337 IWMMXT_OP_ENV(name##w) \
1338 IWMMXT_OP_ENV(name##l)
1339 
1340 #define IWMMXT_OP_ENV1(name) \
1341 static inline void gen_op_iwmmxt_##name##_M0(void) \
1342 { \
1343     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1344 }
1345 
1346 IWMMXT_OP(maddsq)
1347 IWMMXT_OP(madduq)
1348 IWMMXT_OP(sadb)
1349 IWMMXT_OP(sadw)
1350 IWMMXT_OP(mulslw)
1351 IWMMXT_OP(mulshw)
1352 IWMMXT_OP(mululw)
1353 IWMMXT_OP(muluhw)
1354 IWMMXT_OP(macsw)
1355 IWMMXT_OP(macuw)
1356 
1357 IWMMXT_OP_ENV_SIZE(unpackl)
1358 IWMMXT_OP_ENV_SIZE(unpackh)
1359 
1360 IWMMXT_OP_ENV1(unpacklub)
1361 IWMMXT_OP_ENV1(unpackluw)
1362 IWMMXT_OP_ENV1(unpacklul)
1363 IWMMXT_OP_ENV1(unpackhub)
1364 IWMMXT_OP_ENV1(unpackhuw)
1365 IWMMXT_OP_ENV1(unpackhul)
1366 IWMMXT_OP_ENV1(unpacklsb)
1367 IWMMXT_OP_ENV1(unpacklsw)
1368 IWMMXT_OP_ENV1(unpacklsl)
1369 IWMMXT_OP_ENV1(unpackhsb)
1370 IWMMXT_OP_ENV1(unpackhsw)
1371 IWMMXT_OP_ENV1(unpackhsl)
1372 
1373 IWMMXT_OP_ENV_SIZE(cmpeq)
1374 IWMMXT_OP_ENV_SIZE(cmpgtu)
1375 IWMMXT_OP_ENV_SIZE(cmpgts)
1376 
1377 IWMMXT_OP_ENV_SIZE(mins)
1378 IWMMXT_OP_ENV_SIZE(minu)
1379 IWMMXT_OP_ENV_SIZE(maxs)
1380 IWMMXT_OP_ENV_SIZE(maxu)
1381 
1382 IWMMXT_OP_ENV_SIZE(subn)
1383 IWMMXT_OP_ENV_SIZE(addn)
1384 IWMMXT_OP_ENV_SIZE(subu)
1385 IWMMXT_OP_ENV_SIZE(addu)
1386 IWMMXT_OP_ENV_SIZE(subs)
1387 IWMMXT_OP_ENV_SIZE(adds)
1388 
1389 IWMMXT_OP_ENV(avgb0)
1390 IWMMXT_OP_ENV(avgb1)
1391 IWMMXT_OP_ENV(avgw0)
1392 IWMMXT_OP_ENV(avgw1)
1393 
1394 IWMMXT_OP_ENV(packuw)
1395 IWMMXT_OP_ENV(packul)
1396 IWMMXT_OP_ENV(packuq)
1397 IWMMXT_OP_ENV(packsw)
1398 IWMMXT_OP_ENV(packsl)
1399 IWMMXT_OP_ENV(packsq)
1400 
1401 static void gen_op_iwmmxt_set_mup(void)
1402 {
1403     TCGv_i32 tmp;
1404     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405     tcg_gen_ori_i32(tmp, tmp, 2);
1406     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407 }
1408 
1409 static void gen_op_iwmmxt_set_cup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 1);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_setpsr_nz(void)
1418 {
1419     TCGv_i32 tmp = tcg_temp_new_i32();
1420     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422 }
1423 
1424 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425 {
1426     iwmmxt_load_reg(cpu_V1, rn);
1427     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429 }
1430 
1431 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                      TCGv_i32 dest)
1433 {
1434     int rd;
1435     uint32_t offset;
1436     TCGv_i32 tmp;
1437 
1438     rd = (insn >> 16) & 0xf;
1439     tmp = load_reg(s, rd);
1440 
1441     offset = (insn & 0xff) << ((insn >> 7) & 2);
1442     if (insn & (1 << 24)) {
1443         /* Pre indexed */
1444         if (insn & (1 << 23))
1445             tcg_gen_addi_i32(tmp, tmp, offset);
1446         else
1447             tcg_gen_addi_i32(tmp, tmp, -offset);
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 21)) {
1450             store_reg(s, rd, tmp);
1451         }
1452     } else if (insn & (1 << 21)) {
1453         /* Post indexed */
1454         tcg_gen_mov_i32(dest, tmp);
1455         if (insn & (1 << 23))
1456             tcg_gen_addi_i32(tmp, tmp, offset);
1457         else
1458             tcg_gen_addi_i32(tmp, tmp, -offset);
1459         store_reg(s, rd, tmp);
1460     } else if (!(insn & (1 << 23)))
1461         return 1;
1462     return 0;
1463 }
1464 
1465 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466 {
1467     int rd = (insn >> 0) & 0xf;
1468     TCGv_i32 tmp;
1469 
1470     if (insn & (1 << 8)) {
1471         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472             return 1;
1473         } else {
1474             tmp = iwmmxt_load_creg(rd);
1475         }
1476     } else {
1477         tmp = tcg_temp_new_i32();
1478         iwmmxt_load_reg(cpu_V0, rd);
1479         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480     }
1481     tcg_gen_andi_i32(tmp, tmp, mask);
1482     tcg_gen_mov_i32(dest, tmp);
1483     return 0;
1484 }
1485 
1486 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487    (ie. an undefined instruction).  */
1488 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489 {
1490     int rd, wrd;
1491     int rdhi, rdlo, rd0, rd1, i;
1492     TCGv_i32 addr;
1493     TCGv_i32 tmp, tmp2, tmp3;
1494 
1495     if ((insn & 0x0e000e00) == 0x0c000000) {
1496         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497             wrd = insn & 0xf;
1498             rdlo = (insn >> 12) & 0xf;
1499             rdhi = (insn >> 16) & 0xf;
1500             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                 iwmmxt_load_reg(cpu_V0, wrd);
1502                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504             } else {                                    /* TMCRR */
1505                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                 iwmmxt_store_reg(cpu_V0, wrd);
1507                 gen_op_iwmmxt_set_mup();
1508             }
1509             return 0;
1510         }
1511 
1512         wrd = (insn >> 12) & 0xf;
1513         addr = tcg_temp_new_i32();
1514         if (gen_iwmmxt_address(s, insn, addr)) {
1515             return 1;
1516         }
1517         if (insn & ARM_CP_RW_BIT) {
1518             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                 tmp = tcg_temp_new_i32();
1520                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                 iwmmxt_store_creg(wrd, tmp);
1522             } else {
1523                 i = 1;
1524                 if (insn & (1 << 8)) {
1525                     if (insn & (1 << 22)) {             /* WLDRD */
1526                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                         i = 0;
1528                     } else {                            /* WLDRW wRd */
1529                         tmp = tcg_temp_new_i32();
1530                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 } else {
1533                     tmp = tcg_temp_new_i32();
1534                     if (insn & (1 << 22)) {             /* WLDRH */
1535                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                     } else {                            /* WLDRB */
1537                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                     }
1539                 }
1540                 if (i) {
1541                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                 }
1543                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1544             }
1545         } else {
1546             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                 tmp = iwmmxt_load_creg(wrd);
1548                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549             } else {
1550                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                 tmp = tcg_temp_new_i32();
1552                 if (insn & (1 << 8)) {
1553                     if (insn & (1 << 22)) {             /* WSTRD */
1554                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                     } else {                            /* WSTRW wRd */
1556                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                     }
1559                 } else {
1560                     if (insn & (1 << 22)) {             /* WSTRH */
1561                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                     } else {                            /* WSTRB */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 }
1568             }
1569         }
1570         return 0;
1571     }
1572 
1573     if ((insn & 0x0f000000) != 0x0e000000)
1574         return 1;
1575 
1576     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577     case 0x000:                                                 /* WOR */
1578         wrd = (insn >> 12) & 0xf;
1579         rd0 = (insn >> 0) & 0xf;
1580         rd1 = (insn >> 16) & 0xf;
1581         gen_op_iwmmxt_movq_M0_wRn(rd0);
1582         gen_op_iwmmxt_orq_M0_wRn(rd1);
1583         gen_op_iwmmxt_setpsr_nz();
1584         gen_op_iwmmxt_movq_wRn_M0(wrd);
1585         gen_op_iwmmxt_set_mup();
1586         gen_op_iwmmxt_set_cup();
1587         break;
1588     case 0x011:                                                 /* TMCR */
1589         if (insn & 0xf)
1590             return 1;
1591         rd = (insn >> 12) & 0xf;
1592         wrd = (insn >> 16) & 0xf;
1593         switch (wrd) {
1594         case ARM_IWMMXT_wCID:
1595         case ARM_IWMMXT_wCASF:
1596             break;
1597         case ARM_IWMMXT_wCon:
1598             gen_op_iwmmxt_set_cup();
1599             /* Fall through.  */
1600         case ARM_IWMMXT_wCSSF:
1601             tmp = iwmmxt_load_creg(wrd);
1602             tmp2 = load_reg(s, rd);
1603             tcg_gen_andc_i32(tmp, tmp, tmp2);
1604             iwmmxt_store_creg(wrd, tmp);
1605             break;
1606         case ARM_IWMMXT_wCGR0:
1607         case ARM_IWMMXT_wCGR1:
1608         case ARM_IWMMXT_wCGR2:
1609         case ARM_IWMMXT_wCGR3:
1610             gen_op_iwmmxt_set_cup();
1611             tmp = load_reg(s, rd);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         default:
1615             return 1;
1616         }
1617         break;
1618     case 0x100:                                                 /* WXOR */
1619         wrd = (insn >> 12) & 0xf;
1620         rd0 = (insn >> 0) & 0xf;
1621         rd1 = (insn >> 16) & 0xf;
1622         gen_op_iwmmxt_movq_M0_wRn(rd0);
1623         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624         gen_op_iwmmxt_setpsr_nz();
1625         gen_op_iwmmxt_movq_wRn_M0(wrd);
1626         gen_op_iwmmxt_set_mup();
1627         gen_op_iwmmxt_set_cup();
1628         break;
1629     case 0x111:                                                 /* TMRC */
1630         if (insn & 0xf)
1631             return 1;
1632         rd = (insn >> 12) & 0xf;
1633         wrd = (insn >> 16) & 0xf;
1634         tmp = iwmmxt_load_creg(wrd);
1635         store_reg(s, rd, tmp);
1636         break;
1637     case 0x300:                                                 /* WANDN */
1638         wrd = (insn >> 12) & 0xf;
1639         rd0 = (insn >> 0) & 0xf;
1640         rd1 = (insn >> 16) & 0xf;
1641         gen_op_iwmmxt_movq_M0_wRn(rd0);
1642         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643         gen_op_iwmmxt_andq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x200:                                                 /* WAND */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 0) & 0xf;
1652         rd1 = (insn >> 16) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         gen_op_iwmmxt_andq_M0_wRn(rd1);
1655         gen_op_iwmmxt_setpsr_nz();
1656         gen_op_iwmmxt_movq_wRn_M0(wrd);
1657         gen_op_iwmmxt_set_mup();
1658         gen_op_iwmmxt_set_cup();
1659         break;
1660     case 0x810: case 0xa10:                             /* WMADD */
1661         wrd = (insn >> 12) & 0xf;
1662         rd0 = (insn >> 0) & 0xf;
1663         rd1 = (insn >> 16) & 0xf;
1664         gen_op_iwmmxt_movq_M0_wRn(rd0);
1665         if (insn & (1 << 21))
1666             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667         else
1668             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669         gen_op_iwmmxt_movq_wRn_M0(wrd);
1670         gen_op_iwmmxt_set_mup();
1671         break;
1672     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673         wrd = (insn >> 12) & 0xf;
1674         rd0 = (insn >> 16) & 0xf;
1675         rd1 = (insn >> 0) & 0xf;
1676         gen_op_iwmmxt_movq_M0_wRn(rd0);
1677         switch ((insn >> 22) & 3) {
1678         case 0:
1679             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680             break;
1681         case 1:
1682             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683             break;
1684         case 2:
1685             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686             break;
1687         case 3:
1688             return 1;
1689         }
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 16) & 0xf;
1697         rd1 = (insn >> 0) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         switch ((insn >> 22) & 3) {
1700         case 0:
1701             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702             break;
1703         case 1:
1704             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705             break;
1706         case 2:
1707             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708             break;
1709         case 3:
1710             return 1;
1711         }
1712         gen_op_iwmmxt_movq_wRn_M0(wrd);
1713         gen_op_iwmmxt_set_mup();
1714         gen_op_iwmmxt_set_cup();
1715         break;
1716     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717         wrd = (insn >> 12) & 0xf;
1718         rd0 = (insn >> 16) & 0xf;
1719         rd1 = (insn >> 0) & 0xf;
1720         gen_op_iwmmxt_movq_M0_wRn(rd0);
1721         if (insn & (1 << 22))
1722             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723         else
1724             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725         if (!(insn & (1 << 20)))
1726             gen_op_iwmmxt_addl_M0_wRn(wrd);
1727         gen_op_iwmmxt_movq_wRn_M0(wrd);
1728         gen_op_iwmmxt_set_mup();
1729         break;
1730     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731         wrd = (insn >> 12) & 0xf;
1732         rd0 = (insn >> 16) & 0xf;
1733         rd1 = (insn >> 0) & 0xf;
1734         gen_op_iwmmxt_movq_M0_wRn(rd0);
1735         if (insn & (1 << 21)) {
1736             if (insn & (1 << 20))
1737                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738             else
1739                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740         } else {
1741             if (insn & (1 << 20))
1742                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743             else
1744                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745         }
1746         gen_op_iwmmxt_movq_wRn_M0(wrd);
1747         gen_op_iwmmxt_set_mup();
1748         break;
1749     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750         wrd = (insn >> 12) & 0xf;
1751         rd0 = (insn >> 16) & 0xf;
1752         rd1 = (insn >> 0) & 0xf;
1753         gen_op_iwmmxt_movq_M0_wRn(rd0);
1754         if (insn & (1 << 21))
1755             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756         else
1757             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758         if (!(insn & (1 << 20))) {
1759             iwmmxt_load_reg(cpu_V1, wrd);
1760             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761         }
1762         gen_op_iwmmxt_movq_wRn_M0(wrd);
1763         gen_op_iwmmxt_set_mup();
1764         break;
1765     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766         wrd = (insn >> 12) & 0xf;
1767         rd0 = (insn >> 16) & 0xf;
1768         rd1 = (insn >> 0) & 0xf;
1769         gen_op_iwmmxt_movq_M0_wRn(rd0);
1770         switch ((insn >> 22) & 3) {
1771         case 0:
1772             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773             break;
1774         case 1:
1775             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776             break;
1777         case 2:
1778             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779             break;
1780         case 3:
1781             return 1;
1782         }
1783         gen_op_iwmmxt_movq_wRn_M0(wrd);
1784         gen_op_iwmmxt_set_mup();
1785         gen_op_iwmmxt_set_cup();
1786         break;
1787     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 22)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813         tcg_gen_andi_i32(tmp, tmp, 7);
1814         iwmmxt_load_reg(cpu_V1, rd1);
1815         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816         gen_op_iwmmxt_movq_wRn_M0(wrd);
1817         gen_op_iwmmxt_set_mup();
1818         break;
1819     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820         if (((insn >> 6) & 3) == 3)
1821             return 1;
1822         rd = (insn >> 12) & 0xf;
1823         wrd = (insn >> 16) & 0xf;
1824         tmp = load_reg(s, rd);
1825         gen_op_iwmmxt_movq_M0_wRn(wrd);
1826         switch ((insn >> 6) & 3) {
1827         case 0:
1828             tmp2 = tcg_constant_i32(0xff);
1829             tmp3 = tcg_constant_i32((insn & 7) << 3);
1830             break;
1831         case 1:
1832             tmp2 = tcg_constant_i32(0xffff);
1833             tmp3 = tcg_constant_i32((insn & 3) << 4);
1834             break;
1835         case 2:
1836             tmp2 = tcg_constant_i32(0xffffffff);
1837             tmp3 = tcg_constant_i32((insn & 1) << 5);
1838             break;
1839         default:
1840             g_assert_not_reached();
1841         }
1842         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         break;
1846     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847         rd = (insn >> 12) & 0xf;
1848         wrd = (insn >> 16) & 0xf;
1849         if (rd == 15 || ((insn >> 22) & 3) == 3)
1850             return 1;
1851         gen_op_iwmmxt_movq_M0_wRn(wrd);
1852         tmp = tcg_temp_new_i32();
1853         switch ((insn >> 22) & 3) {
1854         case 0:
1855             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857             if (insn & 8) {
1858                 tcg_gen_ext8s_i32(tmp, tmp);
1859             } else {
1860                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1861             }
1862             break;
1863         case 1:
1864             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866             if (insn & 8) {
1867                 tcg_gen_ext16s_i32(tmp, tmp);
1868             } else {
1869                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870             }
1871             break;
1872         case 2:
1873             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875             break;
1876         }
1877         store_reg(s, rd, tmp);
1878         break;
1879     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881             return 1;
1882         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883         switch ((insn >> 22) & 3) {
1884         case 0:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886             break;
1887         case 1:
1888             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889             break;
1890         case 2:
1891             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892             break;
1893         }
1894         tcg_gen_shli_i32(tmp, tmp, 28);
1895         gen_set_nzcv(tmp);
1896         break;
1897     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898         if (((insn >> 6) & 3) == 3)
1899             return 1;
1900         rd = (insn >> 12) & 0xf;
1901         wrd = (insn >> 16) & 0xf;
1902         tmp = load_reg(s, rd);
1903         switch ((insn >> 6) & 3) {
1904         case 0:
1905             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906             break;
1907         case 1:
1908             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909             break;
1910         case 2:
1911             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912             break;
1913         }
1914         gen_op_iwmmxt_movq_wRn_M0(wrd);
1915         gen_op_iwmmxt_set_mup();
1916         break;
1917     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919             return 1;
1920         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921         tmp2 = tcg_temp_new_i32();
1922         tcg_gen_mov_i32(tmp2, tmp);
1923         switch ((insn >> 22) & 3) {
1924         case 0:
1925             for (i = 0; i < 7; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 1:
1931             for (i = 0; i < 3; i ++) {
1932                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                 tcg_gen_and_i32(tmp, tmp, tmp2);
1934             }
1935             break;
1936         case 2:
1937             tcg_gen_shli_i32(tmp2, tmp2, 16);
1938             tcg_gen_and_i32(tmp, tmp, tmp2);
1939             break;
1940         }
1941         gen_set_nzcv(tmp);
1942         break;
1943     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944         wrd = (insn >> 12) & 0xf;
1945         rd0 = (insn >> 16) & 0xf;
1946         gen_op_iwmmxt_movq_M0_wRn(rd0);
1947         switch ((insn >> 22) & 3) {
1948         case 0:
1949             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950             break;
1951         case 1:
1952             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953             break;
1954         case 2:
1955             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956             break;
1957         case 3:
1958             return 1;
1959         }
1960         gen_op_iwmmxt_movq_wRn_M0(wrd);
1961         gen_op_iwmmxt_set_mup();
1962         break;
1963     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965             return 1;
1966         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967         tmp2 = tcg_temp_new_i32();
1968         tcg_gen_mov_i32(tmp2, tmp);
1969         switch ((insn >> 22) & 3) {
1970         case 0:
1971             for (i = 0; i < 7; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 1:
1977             for (i = 0; i < 3; i ++) {
1978                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                 tcg_gen_or_i32(tmp, tmp, tmp2);
1980             }
1981             break;
1982         case 2:
1983             tcg_gen_shli_i32(tmp2, tmp2, 16);
1984             tcg_gen_or_i32(tmp, tmp, tmp2);
1985             break;
1986         }
1987         gen_set_nzcv(tmp);
1988         break;
1989     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990         rd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993             return 1;
1994         gen_op_iwmmxt_movq_M0_wRn(rd0);
1995         tmp = tcg_temp_new_i32();
1996         switch ((insn >> 22) & 3) {
1997         case 0:
1998             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999             break;
2000         case 1:
2001             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002             break;
2003         case 2:
2004             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005             break;
2006         }
2007         store_reg(s, rd, tmp);
2008         break;
2009     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011         wrd = (insn >> 12) & 0xf;
2012         rd0 = (insn >> 16) & 0xf;
2013         rd1 = (insn >> 0) & 0xf;
2014         gen_op_iwmmxt_movq_M0_wRn(rd0);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021             break;
2022         case 1:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027             break;
2028         case 2:
2029             if (insn & (1 << 21))
2030                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031             else
2032                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033             break;
2034         case 3:
2035             return 1;
2036         }
2037         gen_op_iwmmxt_movq_wRn_M0(wrd);
2038         gen_op_iwmmxt_set_mup();
2039         gen_op_iwmmxt_set_cup();
2040         break;
2041     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043         wrd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsb_M0();
2050             else
2051                 gen_op_iwmmxt_unpacklub_M0();
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsw_M0();
2056             else
2057                 gen_op_iwmmxt_unpackluw_M0();
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_unpacklsl_M0();
2062             else
2063                 gen_op_iwmmxt_unpacklul_M0();
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhuw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpackhsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpackhul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104     case 0x214: case 0x614: case 0xa14: case 0xe14:
2105         if (((insn >> 22) & 3) == 0)
2106             return 1;
2107         wrd = (insn >> 12) & 0xf;
2108         rd0 = (insn >> 16) & 0xf;
2109         gen_op_iwmmxt_movq_M0_wRn(rd0);
2110         tmp = tcg_temp_new_i32();
2111         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112             return 1;
2113         }
2114         switch ((insn >> 22) & 3) {
2115         case 1:
2116             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2117             break;
2118         case 2:
2119             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2120             break;
2121         case 3:
2122             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2123             break;
2124         }
2125         gen_op_iwmmxt_movq_wRn_M0(wrd);
2126         gen_op_iwmmxt_set_mup();
2127         gen_op_iwmmxt_set_cup();
2128         break;
2129     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130     case 0x014: case 0x414: case 0x814: case 0xc14:
2131         if (((insn >> 22) & 3) == 0)
2132             return 1;
2133         wrd = (insn >> 12) & 0xf;
2134         rd0 = (insn >> 16) & 0xf;
2135         gen_op_iwmmxt_movq_M0_wRn(rd0);
2136         tmp = tcg_temp_new_i32();
2137         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138             return 1;
2139         }
2140         switch ((insn >> 22) & 3) {
2141         case 1:
2142             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2143             break;
2144         case 2:
2145             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2146             break;
2147         case 3:
2148             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2149             break;
2150         }
2151         gen_op_iwmmxt_movq_wRn_M0(wrd);
2152         gen_op_iwmmxt_set_mup();
2153         gen_op_iwmmxt_set_cup();
2154         break;
2155     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156     case 0x114: case 0x514: case 0x914: case 0xd14:
2157         if (((insn >> 22) & 3) == 0)
2158             return 1;
2159         wrd = (insn >> 12) & 0xf;
2160         rd0 = (insn >> 16) & 0xf;
2161         gen_op_iwmmxt_movq_M0_wRn(rd0);
2162         tmp = tcg_temp_new_i32();
2163         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164             return 1;
2165         }
2166         switch ((insn >> 22) & 3) {
2167         case 1:
2168             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2169             break;
2170         case 2:
2171             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2172             break;
2173         case 3:
2174             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2175             break;
2176         }
2177         gen_op_iwmmxt_movq_wRn_M0(wrd);
2178         gen_op_iwmmxt_set_mup();
2179         gen_op_iwmmxt_set_cup();
2180         break;
2181     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182     case 0x314: case 0x714: case 0xb14: case 0xf14:
2183         if (((insn >> 22) & 3) == 0)
2184             return 1;
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         gen_op_iwmmxt_movq_M0_wRn(rd0);
2188         tmp = tcg_temp_new_i32();
2189         switch ((insn >> 22) & 3) {
2190         case 1:
2191             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2195             break;
2196         case 2:
2197             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2201             break;
2202         case 3:
2203             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                 return 1;
2205             }
2206             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2207             break;
2208         }
2209         gen_op_iwmmxt_movq_wRn_M0(wrd);
2210         gen_op_iwmmxt_set_mup();
2211         gen_op_iwmmxt_set_cup();
2212         break;
2213     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215         wrd = (insn >> 12) & 0xf;
2216         rd0 = (insn >> 16) & 0xf;
2217         rd1 = (insn >> 0) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         switch ((insn >> 22) & 3) {
2220         case 0:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2225             break;
2226         case 1:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231             break;
2232         case 2:
2233             if (insn & (1 << 21))
2234                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235             else
2236                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2237             break;
2238         case 3:
2239             return 1;
2240         }
2241         gen_op_iwmmxt_movq_wRn_M0(wrd);
2242         gen_op_iwmmxt_set_mup();
2243         break;
2244     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246         wrd = (insn >> 12) & 0xf;
2247         rd0 = (insn >> 16) & 0xf;
2248         rd1 = (insn >> 0) & 0xf;
2249         gen_op_iwmmxt_movq_M0_wRn(rd0);
2250         switch ((insn >> 22) & 3) {
2251         case 0:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256             break;
2257         case 1:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262             break;
2263         case 2:
2264             if (insn & (1 << 21))
2265                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266             else
2267                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268             break;
2269         case 3:
2270             return 1;
2271         }
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         break;
2275     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276     case 0x402: case 0x502: case 0x602: case 0x702:
2277         wrd = (insn >> 12) & 0xf;
2278         rd0 = (insn >> 16) & 0xf;
2279         rd1 = (insn >> 0) & 0xf;
2280         gen_op_iwmmxt_movq_M0_wRn(rd0);
2281         iwmmxt_load_reg(cpu_V1, rd1);
2282         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                 tcg_constant_i32((insn >> 20) & 3));
2284         gen_op_iwmmxt_movq_wRn_M0(wrd);
2285         gen_op_iwmmxt_set_mup();
2286         break;
2287     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291         wrd = (insn >> 12) & 0xf;
2292         rd0 = (insn >> 16) & 0xf;
2293         rd1 = (insn >> 0) & 0xf;
2294         gen_op_iwmmxt_movq_M0_wRn(rd0);
2295         switch ((insn >> 20) & 0xf) {
2296         case 0x0:
2297             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298             break;
2299         case 0x1:
2300             gen_op_iwmmxt_subub_M0_wRn(rd1);
2301             break;
2302         case 0x3:
2303             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304             break;
2305         case 0x4:
2306             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307             break;
2308         case 0x5:
2309             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310             break;
2311         case 0x7:
2312             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313             break;
2314         case 0x8:
2315             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316             break;
2317         case 0x9:
2318             gen_op_iwmmxt_subul_M0_wRn(rd1);
2319             break;
2320         case 0xb:
2321             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322             break;
2323         default:
2324             return 1;
2325         }
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         gen_op_iwmmxt_set_cup();
2329         break;
2330     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2339         gen_op_iwmmxt_movq_wRn_M0(wrd);
2340         gen_op_iwmmxt_set_mup();
2341         gen_op_iwmmxt_set_cup();
2342         break;
2343     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344     case 0x418: case 0x518: case 0x618: case 0x718:
2345     case 0x818: case 0x918: case 0xa18: case 0xb18:
2346     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347         wrd = (insn >> 12) & 0xf;
2348         rd0 = (insn >> 16) & 0xf;
2349         rd1 = (insn >> 0) & 0xf;
2350         gen_op_iwmmxt_movq_M0_wRn(rd0);
2351         switch ((insn >> 20) & 0xf) {
2352         case 0x0:
2353             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354             break;
2355         case 0x1:
2356             gen_op_iwmmxt_addub_M0_wRn(rd1);
2357             break;
2358         case 0x3:
2359             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360             break;
2361         case 0x4:
2362             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363             break;
2364         case 0x5:
2365             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366             break;
2367         case 0x7:
2368             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369             break;
2370         case 0x8:
2371             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372             break;
2373         case 0x9:
2374             gen_op_iwmmxt_addul_M0_wRn(rd1);
2375             break;
2376         case 0xb:
2377             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378             break;
2379         default:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387     case 0x408: case 0x508: case 0x608: case 0x708:
2388     case 0x808: case 0x908: case 0xa08: case 0xb08:
2389     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391             return 1;
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         rd1 = (insn >> 0) & 0xf;
2395         gen_op_iwmmxt_movq_M0_wRn(rd0);
2396         switch ((insn >> 22) & 3) {
2397         case 1:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402             break;
2403         case 2:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2408             break;
2409         case 3:
2410             if (insn & (1 << 21))
2411                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412             else
2413                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414             break;
2415         }
2416         gen_op_iwmmxt_movq_wRn_M0(wrd);
2417         gen_op_iwmmxt_set_mup();
2418         gen_op_iwmmxt_set_cup();
2419         break;
2420     case 0x201: case 0x203: case 0x205: case 0x207:
2421     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422     case 0x211: case 0x213: case 0x215: case 0x217:
2423     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424         wrd = (insn >> 5) & 0xf;
2425         rd0 = (insn >> 12) & 0xf;
2426         rd1 = (insn >> 0) & 0xf;
2427         if (rd0 == 0xf || rd1 == 0xf)
2428             return 1;
2429         gen_op_iwmmxt_movq_M0_wRn(wrd);
2430         tmp = load_reg(s, rd0);
2431         tmp2 = load_reg(s, rd1);
2432         switch ((insn >> 16) & 0xf) {
2433         case 0x0:                                       /* TMIA */
2434             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435             break;
2436         case 0x8:                                       /* TMIAPH */
2437             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440             if (insn & (1 << 16))
2441                 tcg_gen_shri_i32(tmp, tmp, 16);
2442             if (insn & (1 << 17))
2443                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2444             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445             break;
2446         default:
2447             return 1;
2448         }
2449         gen_op_iwmmxt_movq_wRn_M0(wrd);
2450         gen_op_iwmmxt_set_mup();
2451         break;
2452     default:
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460    (ie. an undefined instruction).  */
2461 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462 {
2463     int acc, rd0, rd1, rdhi, rdlo;
2464     TCGv_i32 tmp, tmp2;
2465 
2466     if ((insn & 0x0ff00f10) == 0x0e200010) {
2467         /* Multiply with Internal Accumulate Format */
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = insn & 0xf;
2470         acc = (insn >> 5) & 7;
2471 
2472         if (acc != 0)
2473             return 1;
2474 
2475         tmp = load_reg(s, rd0);
2476         tmp2 = load_reg(s, rd1);
2477         switch ((insn >> 16) & 0xf) {
2478         case 0x0:                                       /* MIA */
2479             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480             break;
2481         case 0x8:                                       /* MIAPH */
2482             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483             break;
2484         case 0xc:                                       /* MIABB */
2485         case 0xd:                                       /* MIABT */
2486         case 0xe:                                       /* MIATB */
2487         case 0xf:                                       /* MIATT */
2488             if (insn & (1 << 16))
2489                 tcg_gen_shri_i32(tmp, tmp, 16);
2490             if (insn & (1 << 17))
2491                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2492             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         default:
2495             return 1;
2496         }
2497 
2498         gen_op_iwmmxt_movq_wRn_M0(acc);
2499         return 0;
2500     }
2501 
2502     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503         /* Internal Accumulator Access Format */
2504         rdhi = (insn >> 16) & 0xf;
2505         rdlo = (insn >> 12) & 0xf;
2506         acc = insn & 7;
2507 
2508         if (acc != 0)
2509             return 1;
2510 
2511         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512             iwmmxt_load_reg(cpu_V0, acc);
2513             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516         } else {                                        /* MAR */
2517             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518             iwmmxt_store_reg(cpu_V0, acc);
2519         }
2520         return 0;
2521     }
2522 
2523     return 1;
2524 }
2525 
2526 static void gen_goto_ptr(void)
2527 {
2528     tcg_gen_lookup_and_goto_ptr();
2529 }
2530 
2531 /* This will end the TB but doesn't guarantee we'll return to
2532  * cpu_loop_exec. Any live exit_requests will be processed as we
2533  * enter the next TB.
2534  */
2535 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536 {
2537     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538         /*
2539          * For pcrel, the pc must always be up-to-date on entry to
2540          * the linked TB, so that it can use simple additions for all
2541          * further adjustments.  For !pcrel, the linked TB is compiled
2542          * to know its full virtual address, so we can delay the
2543          * update to pc to the unlinked path.  A long chain of links
2544          * can thus avoid many updates to the PC.
2545          */
2546         if (tb_cflags(s->base.tb) & CF_PCREL) {
2547             gen_update_pc(s, diff);
2548             tcg_gen_goto_tb(n);
2549         } else {
2550             tcg_gen_goto_tb(n);
2551             gen_update_pc(s, diff);
2552         }
2553         tcg_gen_exit_tb(s->base.tb, n);
2554     } else {
2555         gen_update_pc(s, diff);
2556         gen_goto_ptr();
2557     }
2558     s->base.is_jmp = DISAS_NORETURN;
2559 }
2560 
2561 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2562 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563 {
2564     if (unlikely(s->ss_active)) {
2565         /* An indirect jump so that we still trigger the debug exception.  */
2566         gen_update_pc(s, diff);
2567         s->base.is_jmp = DISAS_JUMP;
2568         return;
2569     }
2570     switch (s->base.is_jmp) {
2571     case DISAS_NEXT:
2572     case DISAS_TOO_MANY:
2573     case DISAS_NORETURN:
2574         /*
2575          * The normal case: just go to the destination TB.
2576          * NB: NORETURN happens if we generate code like
2577          *    gen_brcondi(l);
2578          *    gen_jmp();
2579          *    gen_set_label(l);
2580          *    gen_jmp();
2581          * on the second call to gen_jmp().
2582          */
2583         gen_goto_tb(s, tbno, diff);
2584         break;
2585     case DISAS_UPDATE_NOCHAIN:
2586     case DISAS_UPDATE_EXIT:
2587         /*
2588          * We already decided we're leaving the TB for some other reason.
2589          * Avoid using goto_tb so we really do exit back to the main loop
2590          * and don't chain to another TB.
2591          */
2592         gen_update_pc(s, diff);
2593         gen_goto_ptr();
2594         s->base.is_jmp = DISAS_NORETURN;
2595         break;
2596     default:
2597         /*
2598          * We shouldn't be emitting code for a jump and also have
2599          * is_jmp set to one of the special cases like DISAS_SWI.
2600          */
2601         g_assert_not_reached();
2602     }
2603 }
2604 
2605 static inline void gen_jmp(DisasContext *s, target_long diff)
2606 {
2607     gen_jmp_tb(s, diff, 0);
2608 }
2609 
2610 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611 {
2612     if (x)
2613         tcg_gen_sari_i32(t0, t0, 16);
2614     else
2615         gen_sxth(t0);
2616     if (y)
2617         tcg_gen_sari_i32(t1, t1, 16);
2618     else
2619         gen_sxth(t1);
2620     tcg_gen_mul_i32(t0, t0, t1);
2621 }
2622 
2623 /* Return the mask of PSR bits set by a MSR instruction.  */
2624 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625 {
2626     uint32_t mask = 0;
2627 
2628     if (flags & (1 << 0)) {
2629         mask |= 0xff;
2630     }
2631     if (flags & (1 << 1)) {
2632         mask |= 0xff00;
2633     }
2634     if (flags & (1 << 2)) {
2635         mask |= 0xff0000;
2636     }
2637     if (flags & (1 << 3)) {
2638         mask |= 0xff000000;
2639     }
2640 
2641     /* Mask out undefined and reserved bits.  */
2642     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643 
2644     /* Mask out execution state.  */
2645     if (!spsr) {
2646         mask &= ~CPSR_EXEC;
2647     }
2648 
2649     /* Mask out privileged bits.  */
2650     if (IS_USER(s)) {
2651         mask &= CPSR_USER;
2652     }
2653     return mask;
2654 }
2655 
2656 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658 {
2659     TCGv_i32 tmp;
2660     if (spsr) {
2661         /* ??? This is also undefined in system mode.  */
2662         if (IS_USER(s))
2663             return 1;
2664 
2665         tmp = load_cpu_field(spsr);
2666         tcg_gen_andi_i32(tmp, tmp, ~mask);
2667         tcg_gen_andi_i32(t0, t0, mask);
2668         tcg_gen_or_i32(tmp, tmp, t0);
2669         store_cpu_field(tmp, spsr);
2670     } else {
2671         gen_set_cpsr(t0, mask);
2672     }
2673     gen_lookup_tb(s);
2674     return 0;
2675 }
2676 
2677 /* Returns nonzero if access to the PSR is not permitted.  */
2678 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679 {
2680     TCGv_i32 tmp;
2681     tmp = tcg_temp_new_i32();
2682     tcg_gen_movi_i32(tmp, val);
2683     return gen_set_psr(s, mask, spsr, tmp);
2684 }
2685 
2686 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                      int *tgtmode, int *regno)
2688 {
2689     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690      * the target mode and register number, and identify the various
2691      * unpredictable cases.
2692      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693      *  + executed in user mode
2694      *  + using R15 as the src/dest register
2695      *  + accessing an unimplemented register
2696      *  + accessing a register that's inaccessible at current PL/security state*
2697      *  + accessing a register that you could access with a different insn
2698      * We choose to UNDEF in all these cases.
2699      * Since we don't know which of the various AArch32 modes we are in
2700      * we have to defer some checks to runtime.
2701      * Accesses to Monitor mode registers from Secure EL1 (which implies
2702      * that EL3 is AArch64) must trap to EL3.
2703      *
2704      * If the access checks fail this function will emit code to take
2705      * an exception and return false. Otherwise it will return true,
2706      * and set *tgtmode and *regno appropriately.
2707      */
2708     /* These instructions are present only in ARMv8, or in ARMv7 with the
2709      * Virtualization Extensions.
2710      */
2711     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713         goto undef;
2714     }
2715 
2716     if (IS_USER(s) || rn == 15) {
2717         goto undef;
2718     }
2719 
2720     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721      * of registers into (r, sysm).
2722      */
2723     if (r) {
2724         /* SPSRs for other modes */
2725         switch (sysm) {
2726         case 0xe: /* SPSR_fiq */
2727             *tgtmode = ARM_CPU_MODE_FIQ;
2728             break;
2729         case 0x10: /* SPSR_irq */
2730             *tgtmode = ARM_CPU_MODE_IRQ;
2731             break;
2732         case 0x12: /* SPSR_svc */
2733             *tgtmode = ARM_CPU_MODE_SVC;
2734             break;
2735         case 0x14: /* SPSR_abt */
2736             *tgtmode = ARM_CPU_MODE_ABT;
2737             break;
2738         case 0x16: /* SPSR_und */
2739             *tgtmode = ARM_CPU_MODE_UND;
2740             break;
2741         case 0x1c: /* SPSR_mon */
2742             *tgtmode = ARM_CPU_MODE_MON;
2743             break;
2744         case 0x1e: /* SPSR_hyp */
2745             *tgtmode = ARM_CPU_MODE_HYP;
2746             break;
2747         default: /* unallocated */
2748             goto undef;
2749         }
2750         /* We arbitrarily assign SPSR a register number of 16. */
2751         *regno = 16;
2752     } else {
2753         /* general purpose registers for other modes */
2754         switch (sysm) {
2755         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756             *tgtmode = ARM_CPU_MODE_USR;
2757             *regno = sysm + 8;
2758             break;
2759         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760             *tgtmode = ARM_CPU_MODE_FIQ;
2761             *regno = sysm;
2762             break;
2763         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764             *tgtmode = ARM_CPU_MODE_IRQ;
2765             *regno = sysm & 1 ? 13 : 14;
2766             break;
2767         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768             *tgtmode = ARM_CPU_MODE_SVC;
2769             *regno = sysm & 1 ? 13 : 14;
2770             break;
2771         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772             *tgtmode = ARM_CPU_MODE_ABT;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776             *tgtmode = ARM_CPU_MODE_UND;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780             *tgtmode = ARM_CPU_MODE_MON;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784             *tgtmode = ARM_CPU_MODE_HYP;
2785             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786             *regno = sysm & 1 ? 13 : 17;
2787             break;
2788         default: /* unallocated */
2789             goto undef;
2790         }
2791     }
2792 
2793     /* Catch the 'accessing inaccessible register' cases we can detect
2794      * at translate time.
2795      */
2796     switch (*tgtmode) {
2797     case ARM_CPU_MODE_MON:
2798         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799             goto undef;
2800         }
2801         if (s->current_el == 1) {
2802             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803              * then accesses to Mon registers trap to Secure EL2, if it exists,
2804              * otherwise EL3.
2805              */
2806             TCGv_i32 tcg_el;
2807 
2808             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                 dc_isar_feature(aa64_sel2, s)) {
2810                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814             } else {
2815                 tcg_el = tcg_constant_i32(3);
2816             }
2817 
2818             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                     syn_uncategorized(), tcg_el);
2820             return false;
2821         }
2822         break;
2823     case ARM_CPU_MODE_HYP:
2824         /*
2825          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2826          * (and so we can forbid accesses from EL2 or below). elr_hyp
2827          * can be accessed also from Hyp mode, so forbid accesses from
2828          * EL0 or EL1.
2829          */
2830         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2831             (s->current_el < 3 && *regno != 17)) {
2832             goto undef;
2833         }
2834         break;
2835     default:
2836         break;
2837     }
2838 
2839     return true;
2840 
2841 undef:
2842     /* If we get here then some access check did not pass */
2843     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844     return false;
2845 }
2846 
2847 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848 {
2849     TCGv_i32 tcg_reg;
2850     int tgtmode = 0, regno = 0;
2851 
2852     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853         return;
2854     }
2855 
2856     /* Sync state because msr_banked() can raise exceptions */
2857     gen_set_condexec(s);
2858     gen_update_pc(s, 0);
2859     tcg_reg = load_reg(s, rn);
2860     gen_helper_msr_banked(cpu_env, tcg_reg,
2861                           tcg_constant_i32(tgtmode),
2862                           tcg_constant_i32(regno));
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864 }
2865 
2866 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867 {
2868     TCGv_i32 tcg_reg;
2869     int tgtmode = 0, regno = 0;
2870 
2871     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872         return;
2873     }
2874 
2875     /* Sync state because mrs_banked() can raise exceptions */
2876     gen_set_condexec(s);
2877     gen_update_pc(s, 0);
2878     tcg_reg = tcg_temp_new_i32();
2879     gen_helper_mrs_banked(tcg_reg, cpu_env,
2880                           tcg_constant_i32(tgtmode),
2881                           tcg_constant_i32(regno));
2882     store_reg(s, rn, tcg_reg);
2883     s->base.is_jmp = DISAS_UPDATE_EXIT;
2884 }
2885 
2886 /* Store value to PC as for an exception return (ie don't
2887  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888  * will do the masking based on the new value of the Thumb bit.
2889  */
2890 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891 {
2892     tcg_gen_mov_i32(cpu_R[15], pc);
2893 }
2894 
2895 /* Generate a v6 exception return.  Marks both values as dead.  */
2896 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897 {
2898     store_pc_exc_ret(s, pc);
2899     /* The cpsr_write_eret helper will mask the low bits of PC
2900      * appropriately depending on the new Thumb bit, so it must
2901      * be called after storing the new PC.
2902      */
2903     translator_io_start(&s->base);
2904     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2905     /* Must exit loop to check un-masked IRQs */
2906     s->base.is_jmp = DISAS_EXIT;
2907 }
2908 
2909 /* Generate an old-style exception return. Marks pc as dead. */
2910 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911 {
2912     gen_rfe(s, pc, load_cpu_field(spsr));
2913 }
2914 
2915 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                             uint32_t opr_sz, uint32_t max_sz,
2917                             gen_helper_gvec_3_ptr *fn)
2918 {
2919     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920 
2921     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2922     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                        opr_sz, max_sz, 0, fn);
2924 }
2925 
2926 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928 {
2929     static gen_helper_gvec_3_ptr * const fns[2] = {
2930         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931     };
2932     tcg_debug_assert(vece >= 1 && vece <= 2);
2933     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934 }
2935 
2936 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 #define GEN_CMP0(NAME, COND)                                            \
2947     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2948     {                                                                   \
2949         tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0));        \
2950     }                                                                   \
2951     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2952     {                                                                   \
2953         tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0));        \
2954     }                                                                   \
2955     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2956     {                                                                   \
2957         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2958         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2959     }                                                                   \
2960     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2961                             uint32_t opr_sz, uint32_t max_sz)           \
2962     {                                                                   \
2963         const GVecGen2 op[4] = {                                        \
2964             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2965               .fniv = gen_##NAME##0_vec,                                \
2966               .opt_opc = vecop_list_cmp,                                \
2967               .vece = MO_8 },                                           \
2968             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2969               .fniv = gen_##NAME##0_vec,                                \
2970               .opt_opc = vecop_list_cmp,                                \
2971               .vece = MO_16 },                                          \
2972             { .fni4 = gen_##NAME##0_i32,                                \
2973               .fniv = gen_##NAME##0_vec,                                \
2974               .opt_opc = vecop_list_cmp,                                \
2975               .vece = MO_32 },                                          \
2976             { .fni8 = gen_##NAME##0_i64,                                \
2977               .fniv = gen_##NAME##0_vec,                                \
2978               .opt_opc = vecop_list_cmp,                                \
2979               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2980               .vece = MO_64 },                                          \
2981         };                                                              \
2982         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2983     }
2984 
2985 static const TCGOpcode vecop_list_cmp[] = {
2986     INDEX_op_cmp_vec, 0
2987 };
2988 
2989 GEN_CMP0(ceq, TCG_COND_EQ)
2990 GEN_CMP0(cle, TCG_COND_LE)
2991 GEN_CMP0(cge, TCG_COND_GE)
2992 GEN_CMP0(clt, TCG_COND_LT)
2993 GEN_CMP0(cgt, TCG_COND_GT)
2994 
2995 #undef GEN_CMP0
2996 
2997 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2998 {
2999     tcg_gen_vec_sar8i_i64(a, a, shift);
3000     tcg_gen_vec_add8_i64(d, d, a);
3001 }
3002 
3003 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3004 {
3005     tcg_gen_vec_sar16i_i64(a, a, shift);
3006     tcg_gen_vec_add16_i64(d, d, a);
3007 }
3008 
3009 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3010 {
3011     tcg_gen_sari_i32(a, a, shift);
3012     tcg_gen_add_i32(d, d, a);
3013 }
3014 
3015 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016 {
3017     tcg_gen_sari_i64(a, a, shift);
3018     tcg_gen_add_i64(d, d, a);
3019 }
3020 
3021 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3022 {
3023     tcg_gen_sari_vec(vece, a, a, sh);
3024     tcg_gen_add_vec(vece, d, d, a);
3025 }
3026 
3027 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3028                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3029 {
3030     static const TCGOpcode vecop_list[] = {
3031         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3032     };
3033     static const GVecGen2i ops[4] = {
3034         { .fni8 = gen_ssra8_i64,
3035           .fniv = gen_ssra_vec,
3036           .fno = gen_helper_gvec_ssra_b,
3037           .load_dest = true,
3038           .opt_opc = vecop_list,
3039           .vece = MO_8 },
3040         { .fni8 = gen_ssra16_i64,
3041           .fniv = gen_ssra_vec,
3042           .fno = gen_helper_gvec_ssra_h,
3043           .load_dest = true,
3044           .opt_opc = vecop_list,
3045           .vece = MO_16 },
3046         { .fni4 = gen_ssra32_i32,
3047           .fniv = gen_ssra_vec,
3048           .fno = gen_helper_gvec_ssra_s,
3049           .load_dest = true,
3050           .opt_opc = vecop_list,
3051           .vece = MO_32 },
3052         { .fni8 = gen_ssra64_i64,
3053           .fniv = gen_ssra_vec,
3054           .fno = gen_helper_gvec_ssra_d,
3055           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3056           .opt_opc = vecop_list,
3057           .load_dest = true,
3058           .vece = MO_64 },
3059     };
3060 
3061     /* tszimm encoding produces immediates in the range [1..esize]. */
3062     tcg_debug_assert(shift > 0);
3063     tcg_debug_assert(shift <= (8 << vece));
3064 
3065     /*
3066      * Shifts larger than the element size are architecturally valid.
3067      * Signed results in all sign bits.
3068      */
3069     shift = MIN(shift, (8 << vece) - 1);
3070     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3071 }
3072 
3073 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3074 {
3075     tcg_gen_vec_shr8i_i64(a, a, shift);
3076     tcg_gen_vec_add8_i64(d, d, a);
3077 }
3078 
3079 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3080 {
3081     tcg_gen_vec_shr16i_i64(a, a, shift);
3082     tcg_gen_vec_add16_i64(d, d, a);
3083 }
3084 
3085 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3086 {
3087     tcg_gen_shri_i32(a, a, shift);
3088     tcg_gen_add_i32(d, d, a);
3089 }
3090 
3091 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3092 {
3093     tcg_gen_shri_i64(a, a, shift);
3094     tcg_gen_add_i64(d, d, a);
3095 }
3096 
3097 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3098 {
3099     tcg_gen_shri_vec(vece, a, a, sh);
3100     tcg_gen_add_vec(vece, d, d, a);
3101 }
3102 
3103 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3104                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3105 {
3106     static const TCGOpcode vecop_list[] = {
3107         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3108     };
3109     static const GVecGen2i ops[4] = {
3110         { .fni8 = gen_usra8_i64,
3111           .fniv = gen_usra_vec,
3112           .fno = gen_helper_gvec_usra_b,
3113           .load_dest = true,
3114           .opt_opc = vecop_list,
3115           .vece = MO_8, },
3116         { .fni8 = gen_usra16_i64,
3117           .fniv = gen_usra_vec,
3118           .fno = gen_helper_gvec_usra_h,
3119           .load_dest = true,
3120           .opt_opc = vecop_list,
3121           .vece = MO_16, },
3122         { .fni4 = gen_usra32_i32,
3123           .fniv = gen_usra_vec,
3124           .fno = gen_helper_gvec_usra_s,
3125           .load_dest = true,
3126           .opt_opc = vecop_list,
3127           .vece = MO_32, },
3128         { .fni8 = gen_usra64_i64,
3129           .fniv = gen_usra_vec,
3130           .fno = gen_helper_gvec_usra_d,
3131           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3132           .load_dest = true,
3133           .opt_opc = vecop_list,
3134           .vece = MO_64, },
3135     };
3136 
3137     /* tszimm encoding produces immediates in the range [1..esize]. */
3138     tcg_debug_assert(shift > 0);
3139     tcg_debug_assert(shift <= (8 << vece));
3140 
3141     /*
3142      * Shifts larger than the element size are architecturally valid.
3143      * Unsigned results in all zeros as input to accumulate: nop.
3144      */
3145     if (shift < (8 << vece)) {
3146         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3147     } else {
3148         /* Nop, but we do need to clear the tail. */
3149         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3150     }
3151 }
3152 
3153 /*
3154  * Shift one less than the requested amount, and the low bit is
3155  * the rounding bit.  For the 8 and 16-bit operations, because we
3156  * mask the low bit, we can perform a normal integer shift instead
3157  * of a vector shift.
3158  */
3159 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3160 {
3161     TCGv_i64 t = tcg_temp_new_i64();
3162 
3163     tcg_gen_shri_i64(t, a, sh - 1);
3164     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3165     tcg_gen_vec_sar8i_i64(d, a, sh);
3166     tcg_gen_vec_add8_i64(d, d, t);
3167 }
3168 
3169 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3170 {
3171     TCGv_i64 t = tcg_temp_new_i64();
3172 
3173     tcg_gen_shri_i64(t, a, sh - 1);
3174     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3175     tcg_gen_vec_sar16i_i64(d, a, sh);
3176     tcg_gen_vec_add16_i64(d, d, t);
3177 }
3178 
3179 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3180 {
3181     TCGv_i32 t;
3182 
3183     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3184     if (sh == 32) {
3185         tcg_gen_movi_i32(d, 0);
3186         return;
3187     }
3188     t = tcg_temp_new_i32();
3189     tcg_gen_extract_i32(t, a, sh - 1, 1);
3190     tcg_gen_sari_i32(d, a, sh);
3191     tcg_gen_add_i32(d, d, t);
3192 }
3193 
3194 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3195 {
3196     TCGv_i64 t = tcg_temp_new_i64();
3197 
3198     tcg_gen_extract_i64(t, a, sh - 1, 1);
3199     tcg_gen_sari_i64(d, a, sh);
3200     tcg_gen_add_i64(d, d, t);
3201 }
3202 
3203 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3204 {
3205     TCGv_vec t = tcg_temp_new_vec_matching(d);
3206     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3207 
3208     tcg_gen_shri_vec(vece, t, a, sh - 1);
3209     tcg_gen_dupi_vec(vece, ones, 1);
3210     tcg_gen_and_vec(vece, t, t, ones);
3211     tcg_gen_sari_vec(vece, d, a, sh);
3212     tcg_gen_add_vec(vece, d, d, t);
3213 }
3214 
3215 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3216                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3217 {
3218     static const TCGOpcode vecop_list[] = {
3219         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3220     };
3221     static const GVecGen2i ops[4] = {
3222         { .fni8 = gen_srshr8_i64,
3223           .fniv = gen_srshr_vec,
3224           .fno = gen_helper_gvec_srshr_b,
3225           .opt_opc = vecop_list,
3226           .vece = MO_8 },
3227         { .fni8 = gen_srshr16_i64,
3228           .fniv = gen_srshr_vec,
3229           .fno = gen_helper_gvec_srshr_h,
3230           .opt_opc = vecop_list,
3231           .vece = MO_16 },
3232         { .fni4 = gen_srshr32_i32,
3233           .fniv = gen_srshr_vec,
3234           .fno = gen_helper_gvec_srshr_s,
3235           .opt_opc = vecop_list,
3236           .vece = MO_32 },
3237         { .fni8 = gen_srshr64_i64,
3238           .fniv = gen_srshr_vec,
3239           .fno = gen_helper_gvec_srshr_d,
3240           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3241           .opt_opc = vecop_list,
3242           .vece = MO_64 },
3243     };
3244 
3245     /* tszimm encoding produces immediates in the range [1..esize] */
3246     tcg_debug_assert(shift > 0);
3247     tcg_debug_assert(shift <= (8 << vece));
3248 
3249     if (shift == (8 << vece)) {
3250         /*
3251          * Shifts larger than the element size are architecturally valid.
3252          * Signed results in all sign bits.  With rounding, this produces
3253          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3254          * I.e. always zero.
3255          */
3256         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3257     } else {
3258         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3259     }
3260 }
3261 
3262 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3263 {
3264     TCGv_i64 t = tcg_temp_new_i64();
3265 
3266     gen_srshr8_i64(t, a, sh);
3267     tcg_gen_vec_add8_i64(d, d, t);
3268 }
3269 
3270 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3271 {
3272     TCGv_i64 t = tcg_temp_new_i64();
3273 
3274     gen_srshr16_i64(t, a, sh);
3275     tcg_gen_vec_add16_i64(d, d, t);
3276 }
3277 
3278 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3279 {
3280     TCGv_i32 t = tcg_temp_new_i32();
3281 
3282     gen_srshr32_i32(t, a, sh);
3283     tcg_gen_add_i32(d, d, t);
3284 }
3285 
3286 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3287 {
3288     TCGv_i64 t = tcg_temp_new_i64();
3289 
3290     gen_srshr64_i64(t, a, sh);
3291     tcg_gen_add_i64(d, d, t);
3292 }
3293 
3294 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3295 {
3296     TCGv_vec t = tcg_temp_new_vec_matching(d);
3297 
3298     gen_srshr_vec(vece, t, a, sh);
3299     tcg_gen_add_vec(vece, d, d, t);
3300 }
3301 
3302 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3303                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3304 {
3305     static const TCGOpcode vecop_list[] = {
3306         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3307     };
3308     static const GVecGen2i ops[4] = {
3309         { .fni8 = gen_srsra8_i64,
3310           .fniv = gen_srsra_vec,
3311           .fno = gen_helper_gvec_srsra_b,
3312           .opt_opc = vecop_list,
3313           .load_dest = true,
3314           .vece = MO_8 },
3315         { .fni8 = gen_srsra16_i64,
3316           .fniv = gen_srsra_vec,
3317           .fno = gen_helper_gvec_srsra_h,
3318           .opt_opc = vecop_list,
3319           .load_dest = true,
3320           .vece = MO_16 },
3321         { .fni4 = gen_srsra32_i32,
3322           .fniv = gen_srsra_vec,
3323           .fno = gen_helper_gvec_srsra_s,
3324           .opt_opc = vecop_list,
3325           .load_dest = true,
3326           .vece = MO_32 },
3327         { .fni8 = gen_srsra64_i64,
3328           .fniv = gen_srsra_vec,
3329           .fno = gen_helper_gvec_srsra_d,
3330           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3331           .opt_opc = vecop_list,
3332           .load_dest = true,
3333           .vece = MO_64 },
3334     };
3335 
3336     /* tszimm encoding produces immediates in the range [1..esize] */
3337     tcg_debug_assert(shift > 0);
3338     tcg_debug_assert(shift <= (8 << vece));
3339 
3340     /*
3341      * Shifts larger than the element size are architecturally valid.
3342      * Signed results in all sign bits.  With rounding, this produces
3343      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3344      * I.e. always zero.  With accumulation, this leaves D unchanged.
3345      */
3346     if (shift == (8 << vece)) {
3347         /* Nop, but we do need to clear the tail. */
3348         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3349     } else {
3350         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3351     }
3352 }
3353 
3354 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3355 {
3356     TCGv_i64 t = tcg_temp_new_i64();
3357 
3358     tcg_gen_shri_i64(t, a, sh - 1);
3359     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3360     tcg_gen_vec_shr8i_i64(d, a, sh);
3361     tcg_gen_vec_add8_i64(d, d, t);
3362 }
3363 
3364 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3365 {
3366     TCGv_i64 t = tcg_temp_new_i64();
3367 
3368     tcg_gen_shri_i64(t, a, sh - 1);
3369     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3370     tcg_gen_vec_shr16i_i64(d, a, sh);
3371     tcg_gen_vec_add16_i64(d, d, t);
3372 }
3373 
3374 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3375 {
3376     TCGv_i32 t;
3377 
3378     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3379     if (sh == 32) {
3380         tcg_gen_extract_i32(d, a, sh - 1, 1);
3381         return;
3382     }
3383     t = tcg_temp_new_i32();
3384     tcg_gen_extract_i32(t, a, sh - 1, 1);
3385     tcg_gen_shri_i32(d, a, sh);
3386     tcg_gen_add_i32(d, d, t);
3387 }
3388 
3389 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3390 {
3391     TCGv_i64 t = tcg_temp_new_i64();
3392 
3393     tcg_gen_extract_i64(t, a, sh - 1, 1);
3394     tcg_gen_shri_i64(d, a, sh);
3395     tcg_gen_add_i64(d, d, t);
3396 }
3397 
3398 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3399 {
3400     TCGv_vec t = tcg_temp_new_vec_matching(d);
3401     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3402 
3403     tcg_gen_shri_vec(vece, t, a, shift - 1);
3404     tcg_gen_dupi_vec(vece, ones, 1);
3405     tcg_gen_and_vec(vece, t, t, ones);
3406     tcg_gen_shri_vec(vece, d, a, shift);
3407     tcg_gen_add_vec(vece, d, d, t);
3408 }
3409 
3410 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3411                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3412 {
3413     static const TCGOpcode vecop_list[] = {
3414         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3415     };
3416     static const GVecGen2i ops[4] = {
3417         { .fni8 = gen_urshr8_i64,
3418           .fniv = gen_urshr_vec,
3419           .fno = gen_helper_gvec_urshr_b,
3420           .opt_opc = vecop_list,
3421           .vece = MO_8 },
3422         { .fni8 = gen_urshr16_i64,
3423           .fniv = gen_urshr_vec,
3424           .fno = gen_helper_gvec_urshr_h,
3425           .opt_opc = vecop_list,
3426           .vece = MO_16 },
3427         { .fni4 = gen_urshr32_i32,
3428           .fniv = gen_urshr_vec,
3429           .fno = gen_helper_gvec_urshr_s,
3430           .opt_opc = vecop_list,
3431           .vece = MO_32 },
3432         { .fni8 = gen_urshr64_i64,
3433           .fniv = gen_urshr_vec,
3434           .fno = gen_helper_gvec_urshr_d,
3435           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3436           .opt_opc = vecop_list,
3437           .vece = MO_64 },
3438     };
3439 
3440     /* tszimm encoding produces immediates in the range [1..esize] */
3441     tcg_debug_assert(shift > 0);
3442     tcg_debug_assert(shift <= (8 << vece));
3443 
3444     if (shift == (8 << vece)) {
3445         /*
3446          * Shifts larger than the element size are architecturally valid.
3447          * Unsigned results in zero.  With rounding, this produces a
3448          * copy of the most significant bit.
3449          */
3450         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3451     } else {
3452         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3453     }
3454 }
3455 
3456 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3457 {
3458     TCGv_i64 t = tcg_temp_new_i64();
3459 
3460     if (sh == 8) {
3461         tcg_gen_vec_shr8i_i64(t, a, 7);
3462     } else {
3463         gen_urshr8_i64(t, a, sh);
3464     }
3465     tcg_gen_vec_add8_i64(d, d, t);
3466 }
3467 
3468 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3469 {
3470     TCGv_i64 t = tcg_temp_new_i64();
3471 
3472     if (sh == 16) {
3473         tcg_gen_vec_shr16i_i64(t, a, 15);
3474     } else {
3475         gen_urshr16_i64(t, a, sh);
3476     }
3477     tcg_gen_vec_add16_i64(d, d, t);
3478 }
3479 
3480 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3481 {
3482     TCGv_i32 t = tcg_temp_new_i32();
3483 
3484     if (sh == 32) {
3485         tcg_gen_shri_i32(t, a, 31);
3486     } else {
3487         gen_urshr32_i32(t, a, sh);
3488     }
3489     tcg_gen_add_i32(d, d, t);
3490 }
3491 
3492 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3493 {
3494     TCGv_i64 t = tcg_temp_new_i64();
3495 
3496     if (sh == 64) {
3497         tcg_gen_shri_i64(t, a, 63);
3498     } else {
3499         gen_urshr64_i64(t, a, sh);
3500     }
3501     tcg_gen_add_i64(d, d, t);
3502 }
3503 
3504 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3505 {
3506     TCGv_vec t = tcg_temp_new_vec_matching(d);
3507 
3508     if (sh == (8 << vece)) {
3509         tcg_gen_shri_vec(vece, t, a, sh - 1);
3510     } else {
3511         gen_urshr_vec(vece, t, a, sh);
3512     }
3513     tcg_gen_add_vec(vece, d, d, t);
3514 }
3515 
3516 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3517                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3518 {
3519     static const TCGOpcode vecop_list[] = {
3520         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3521     };
3522     static const GVecGen2i ops[4] = {
3523         { .fni8 = gen_ursra8_i64,
3524           .fniv = gen_ursra_vec,
3525           .fno = gen_helper_gvec_ursra_b,
3526           .opt_opc = vecop_list,
3527           .load_dest = true,
3528           .vece = MO_8 },
3529         { .fni8 = gen_ursra16_i64,
3530           .fniv = gen_ursra_vec,
3531           .fno = gen_helper_gvec_ursra_h,
3532           .opt_opc = vecop_list,
3533           .load_dest = true,
3534           .vece = MO_16 },
3535         { .fni4 = gen_ursra32_i32,
3536           .fniv = gen_ursra_vec,
3537           .fno = gen_helper_gvec_ursra_s,
3538           .opt_opc = vecop_list,
3539           .load_dest = true,
3540           .vece = MO_32 },
3541         { .fni8 = gen_ursra64_i64,
3542           .fniv = gen_ursra_vec,
3543           .fno = gen_helper_gvec_ursra_d,
3544           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3545           .opt_opc = vecop_list,
3546           .load_dest = true,
3547           .vece = MO_64 },
3548     };
3549 
3550     /* tszimm encoding produces immediates in the range [1..esize] */
3551     tcg_debug_assert(shift > 0);
3552     tcg_debug_assert(shift <= (8 << vece));
3553 
3554     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3555 }
3556 
3557 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3558 {
3559     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3560     TCGv_i64 t = tcg_temp_new_i64();
3561 
3562     tcg_gen_shri_i64(t, a, shift);
3563     tcg_gen_andi_i64(t, t, mask);
3564     tcg_gen_andi_i64(d, d, ~mask);
3565     tcg_gen_or_i64(d, d, t);
3566 }
3567 
3568 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3569 {
3570     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3571     TCGv_i64 t = tcg_temp_new_i64();
3572 
3573     tcg_gen_shri_i64(t, a, shift);
3574     tcg_gen_andi_i64(t, t, mask);
3575     tcg_gen_andi_i64(d, d, ~mask);
3576     tcg_gen_or_i64(d, d, t);
3577 }
3578 
3579 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3580 {
3581     tcg_gen_shri_i32(a, a, shift);
3582     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3583 }
3584 
3585 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3586 {
3587     tcg_gen_shri_i64(a, a, shift);
3588     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3589 }
3590 
3591 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3592 {
3593     TCGv_vec t = tcg_temp_new_vec_matching(d);
3594     TCGv_vec m = tcg_temp_new_vec_matching(d);
3595 
3596     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3597     tcg_gen_shri_vec(vece, t, a, sh);
3598     tcg_gen_and_vec(vece, d, d, m);
3599     tcg_gen_or_vec(vece, d, d, t);
3600 }
3601 
3602 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3603                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3604 {
3605     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3606     const GVecGen2i ops[4] = {
3607         { .fni8 = gen_shr8_ins_i64,
3608           .fniv = gen_shr_ins_vec,
3609           .fno = gen_helper_gvec_sri_b,
3610           .load_dest = true,
3611           .opt_opc = vecop_list,
3612           .vece = MO_8 },
3613         { .fni8 = gen_shr16_ins_i64,
3614           .fniv = gen_shr_ins_vec,
3615           .fno = gen_helper_gvec_sri_h,
3616           .load_dest = true,
3617           .opt_opc = vecop_list,
3618           .vece = MO_16 },
3619         { .fni4 = gen_shr32_ins_i32,
3620           .fniv = gen_shr_ins_vec,
3621           .fno = gen_helper_gvec_sri_s,
3622           .load_dest = true,
3623           .opt_opc = vecop_list,
3624           .vece = MO_32 },
3625         { .fni8 = gen_shr64_ins_i64,
3626           .fniv = gen_shr_ins_vec,
3627           .fno = gen_helper_gvec_sri_d,
3628           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3629           .load_dest = true,
3630           .opt_opc = vecop_list,
3631           .vece = MO_64 },
3632     };
3633 
3634     /* tszimm encoding produces immediates in the range [1..esize]. */
3635     tcg_debug_assert(shift > 0);
3636     tcg_debug_assert(shift <= (8 << vece));
3637 
3638     /* Shift of esize leaves destination unchanged. */
3639     if (shift < (8 << vece)) {
3640         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3641     } else {
3642         /* Nop, but we do need to clear the tail. */
3643         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3644     }
3645 }
3646 
3647 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3648 {
3649     uint64_t mask = dup_const(MO_8, 0xff << shift);
3650     TCGv_i64 t = tcg_temp_new_i64();
3651 
3652     tcg_gen_shli_i64(t, a, shift);
3653     tcg_gen_andi_i64(t, t, mask);
3654     tcg_gen_andi_i64(d, d, ~mask);
3655     tcg_gen_or_i64(d, d, t);
3656 }
3657 
3658 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3659 {
3660     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3661     TCGv_i64 t = tcg_temp_new_i64();
3662 
3663     tcg_gen_shli_i64(t, a, shift);
3664     tcg_gen_andi_i64(t, t, mask);
3665     tcg_gen_andi_i64(d, d, ~mask);
3666     tcg_gen_or_i64(d, d, t);
3667 }
3668 
3669 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3670 {
3671     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3672 }
3673 
3674 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3675 {
3676     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3677 }
3678 
3679 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3680 {
3681     TCGv_vec t = tcg_temp_new_vec_matching(d);
3682     TCGv_vec m = tcg_temp_new_vec_matching(d);
3683 
3684     tcg_gen_shli_vec(vece, t, a, sh);
3685     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3686     tcg_gen_and_vec(vece, d, d, m);
3687     tcg_gen_or_vec(vece, d, d, t);
3688 }
3689 
3690 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3691                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3692 {
3693     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3694     const GVecGen2i ops[4] = {
3695         { .fni8 = gen_shl8_ins_i64,
3696           .fniv = gen_shl_ins_vec,
3697           .fno = gen_helper_gvec_sli_b,
3698           .load_dest = true,
3699           .opt_opc = vecop_list,
3700           .vece = MO_8 },
3701         { .fni8 = gen_shl16_ins_i64,
3702           .fniv = gen_shl_ins_vec,
3703           .fno = gen_helper_gvec_sli_h,
3704           .load_dest = true,
3705           .opt_opc = vecop_list,
3706           .vece = MO_16 },
3707         { .fni4 = gen_shl32_ins_i32,
3708           .fniv = gen_shl_ins_vec,
3709           .fno = gen_helper_gvec_sli_s,
3710           .load_dest = true,
3711           .opt_opc = vecop_list,
3712           .vece = MO_32 },
3713         { .fni8 = gen_shl64_ins_i64,
3714           .fniv = gen_shl_ins_vec,
3715           .fno = gen_helper_gvec_sli_d,
3716           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3717           .load_dest = true,
3718           .opt_opc = vecop_list,
3719           .vece = MO_64 },
3720     };
3721 
3722     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3723     tcg_debug_assert(shift >= 0);
3724     tcg_debug_assert(shift < (8 << vece));
3725 
3726     if (shift == 0) {
3727         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3728     } else {
3729         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3730     }
3731 }
3732 
3733 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3734 {
3735     gen_helper_neon_mul_u8(a, a, b);
3736     gen_helper_neon_add_u8(d, d, a);
3737 }
3738 
3739 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3740 {
3741     gen_helper_neon_mul_u8(a, a, b);
3742     gen_helper_neon_sub_u8(d, d, a);
3743 }
3744 
3745 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3746 {
3747     gen_helper_neon_mul_u16(a, a, b);
3748     gen_helper_neon_add_u16(d, d, a);
3749 }
3750 
3751 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3752 {
3753     gen_helper_neon_mul_u16(a, a, b);
3754     gen_helper_neon_sub_u16(d, d, a);
3755 }
3756 
3757 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3758 {
3759     tcg_gen_mul_i32(a, a, b);
3760     tcg_gen_add_i32(d, d, a);
3761 }
3762 
3763 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3764 {
3765     tcg_gen_mul_i32(a, a, b);
3766     tcg_gen_sub_i32(d, d, a);
3767 }
3768 
3769 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3770 {
3771     tcg_gen_mul_i64(a, a, b);
3772     tcg_gen_add_i64(d, d, a);
3773 }
3774 
3775 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3776 {
3777     tcg_gen_mul_i64(a, a, b);
3778     tcg_gen_sub_i64(d, d, a);
3779 }
3780 
3781 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3782 {
3783     tcg_gen_mul_vec(vece, a, a, b);
3784     tcg_gen_add_vec(vece, d, d, a);
3785 }
3786 
3787 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3788 {
3789     tcg_gen_mul_vec(vece, a, a, b);
3790     tcg_gen_sub_vec(vece, d, d, a);
3791 }
3792 
3793 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3794  * these tables are shared with AArch64 which does support them.
3795  */
3796 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3797                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3798 {
3799     static const TCGOpcode vecop_list[] = {
3800         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3801     };
3802     static const GVecGen3 ops[4] = {
3803         { .fni4 = gen_mla8_i32,
3804           .fniv = gen_mla_vec,
3805           .load_dest = true,
3806           .opt_opc = vecop_list,
3807           .vece = MO_8 },
3808         { .fni4 = gen_mla16_i32,
3809           .fniv = gen_mla_vec,
3810           .load_dest = true,
3811           .opt_opc = vecop_list,
3812           .vece = MO_16 },
3813         { .fni4 = gen_mla32_i32,
3814           .fniv = gen_mla_vec,
3815           .load_dest = true,
3816           .opt_opc = vecop_list,
3817           .vece = MO_32 },
3818         { .fni8 = gen_mla64_i64,
3819           .fniv = gen_mla_vec,
3820           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3821           .load_dest = true,
3822           .opt_opc = vecop_list,
3823           .vece = MO_64 },
3824     };
3825     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3826 }
3827 
3828 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3829                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3830 {
3831     static const TCGOpcode vecop_list[] = {
3832         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3833     };
3834     static const GVecGen3 ops[4] = {
3835         { .fni4 = gen_mls8_i32,
3836           .fniv = gen_mls_vec,
3837           .load_dest = true,
3838           .opt_opc = vecop_list,
3839           .vece = MO_8 },
3840         { .fni4 = gen_mls16_i32,
3841           .fniv = gen_mls_vec,
3842           .load_dest = true,
3843           .opt_opc = vecop_list,
3844           .vece = MO_16 },
3845         { .fni4 = gen_mls32_i32,
3846           .fniv = gen_mls_vec,
3847           .load_dest = true,
3848           .opt_opc = vecop_list,
3849           .vece = MO_32 },
3850         { .fni8 = gen_mls64_i64,
3851           .fniv = gen_mls_vec,
3852           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3853           .load_dest = true,
3854           .opt_opc = vecop_list,
3855           .vece = MO_64 },
3856     };
3857     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3858 }
3859 
3860 /* CMTST : test is "if (X & Y != 0)". */
3861 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3862 {
3863     tcg_gen_and_i32(d, a, b);
3864     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3865 }
3866 
3867 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3868 {
3869     tcg_gen_and_i64(d, a, b);
3870     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3871 }
3872 
3873 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3874 {
3875     tcg_gen_and_vec(vece, d, a, b);
3876     tcg_gen_dupi_vec(vece, a, 0);
3877     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3878 }
3879 
3880 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3881                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3882 {
3883     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3884     static const GVecGen3 ops[4] = {
3885         { .fni4 = gen_helper_neon_tst_u8,
3886           .fniv = gen_cmtst_vec,
3887           .opt_opc = vecop_list,
3888           .vece = MO_8 },
3889         { .fni4 = gen_helper_neon_tst_u16,
3890           .fniv = gen_cmtst_vec,
3891           .opt_opc = vecop_list,
3892           .vece = MO_16 },
3893         { .fni4 = gen_cmtst_i32,
3894           .fniv = gen_cmtst_vec,
3895           .opt_opc = vecop_list,
3896           .vece = MO_32 },
3897         { .fni8 = gen_cmtst_i64,
3898           .fniv = gen_cmtst_vec,
3899           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3900           .opt_opc = vecop_list,
3901           .vece = MO_64 },
3902     };
3903     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3904 }
3905 
3906 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3907 {
3908     TCGv_i32 lval = tcg_temp_new_i32();
3909     TCGv_i32 rval = tcg_temp_new_i32();
3910     TCGv_i32 lsh = tcg_temp_new_i32();
3911     TCGv_i32 rsh = tcg_temp_new_i32();
3912     TCGv_i32 zero = tcg_constant_i32(0);
3913     TCGv_i32 max = tcg_constant_i32(32);
3914 
3915     /*
3916      * Rely on the TCG guarantee that out of range shifts produce
3917      * unspecified results, not undefined behaviour (i.e. no trap).
3918      * Discard out-of-range results after the fact.
3919      */
3920     tcg_gen_ext8s_i32(lsh, shift);
3921     tcg_gen_neg_i32(rsh, lsh);
3922     tcg_gen_shl_i32(lval, src, lsh);
3923     tcg_gen_shr_i32(rval, src, rsh);
3924     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3925     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3926 }
3927 
3928 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3929 {
3930     TCGv_i64 lval = tcg_temp_new_i64();
3931     TCGv_i64 rval = tcg_temp_new_i64();
3932     TCGv_i64 lsh = tcg_temp_new_i64();
3933     TCGv_i64 rsh = tcg_temp_new_i64();
3934     TCGv_i64 zero = tcg_constant_i64(0);
3935     TCGv_i64 max = tcg_constant_i64(64);
3936 
3937     /*
3938      * Rely on the TCG guarantee that out of range shifts produce
3939      * unspecified results, not undefined behaviour (i.e. no trap).
3940      * Discard out-of-range results after the fact.
3941      */
3942     tcg_gen_ext8s_i64(lsh, shift);
3943     tcg_gen_neg_i64(rsh, lsh);
3944     tcg_gen_shl_i64(lval, src, lsh);
3945     tcg_gen_shr_i64(rval, src, rsh);
3946     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3947     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3948 }
3949 
3950 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3951                          TCGv_vec src, TCGv_vec shift)
3952 {
3953     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3954     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3955     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3956     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3957     TCGv_vec msk, max;
3958 
3959     tcg_gen_neg_vec(vece, rsh, shift);
3960     if (vece == MO_8) {
3961         tcg_gen_mov_vec(lsh, shift);
3962     } else {
3963         msk = tcg_temp_new_vec_matching(dst);
3964         tcg_gen_dupi_vec(vece, msk, 0xff);
3965         tcg_gen_and_vec(vece, lsh, shift, msk);
3966         tcg_gen_and_vec(vece, rsh, rsh, msk);
3967     }
3968 
3969     /*
3970      * Rely on the TCG guarantee that out of range shifts produce
3971      * unspecified results, not undefined behaviour (i.e. no trap).
3972      * Discard out-of-range results after the fact.
3973      */
3974     tcg_gen_shlv_vec(vece, lval, src, lsh);
3975     tcg_gen_shrv_vec(vece, rval, src, rsh);
3976 
3977     max = tcg_temp_new_vec_matching(dst);
3978     tcg_gen_dupi_vec(vece, max, 8 << vece);
3979 
3980     /*
3981      * The choice of LT (signed) and GEU (unsigned) are biased toward
3982      * the instructions of the x86_64 host.  For MO_8, the whole byte
3983      * is significant so we must use an unsigned compare; otherwise we
3984      * have already masked to a byte and so a signed compare works.
3985      * Other tcg hosts have a full set of comparisons and do not care.
3986      */
3987     if (vece == MO_8) {
3988         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3989         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3990         tcg_gen_andc_vec(vece, lval, lval, lsh);
3991         tcg_gen_andc_vec(vece, rval, rval, rsh);
3992     } else {
3993         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3994         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3995         tcg_gen_and_vec(vece, lval, lval, lsh);
3996         tcg_gen_and_vec(vece, rval, rval, rsh);
3997     }
3998     tcg_gen_or_vec(vece, dst, lval, rval);
3999 }
4000 
4001 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4002                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4003 {
4004     static const TCGOpcode vecop_list[] = {
4005         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4006         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4007     };
4008     static const GVecGen3 ops[4] = {
4009         { .fniv = gen_ushl_vec,
4010           .fno = gen_helper_gvec_ushl_b,
4011           .opt_opc = vecop_list,
4012           .vece = MO_8 },
4013         { .fniv = gen_ushl_vec,
4014           .fno = gen_helper_gvec_ushl_h,
4015           .opt_opc = vecop_list,
4016           .vece = MO_16 },
4017         { .fni4 = gen_ushl_i32,
4018           .fniv = gen_ushl_vec,
4019           .opt_opc = vecop_list,
4020           .vece = MO_32 },
4021         { .fni8 = gen_ushl_i64,
4022           .fniv = gen_ushl_vec,
4023           .opt_opc = vecop_list,
4024           .vece = MO_64 },
4025     };
4026     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4027 }
4028 
4029 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4030 {
4031     TCGv_i32 lval = tcg_temp_new_i32();
4032     TCGv_i32 rval = tcg_temp_new_i32();
4033     TCGv_i32 lsh = tcg_temp_new_i32();
4034     TCGv_i32 rsh = tcg_temp_new_i32();
4035     TCGv_i32 zero = tcg_constant_i32(0);
4036     TCGv_i32 max = tcg_constant_i32(31);
4037 
4038     /*
4039      * Rely on the TCG guarantee that out of range shifts produce
4040      * unspecified results, not undefined behaviour (i.e. no trap).
4041      * Discard out-of-range results after the fact.
4042      */
4043     tcg_gen_ext8s_i32(lsh, shift);
4044     tcg_gen_neg_i32(rsh, lsh);
4045     tcg_gen_shl_i32(lval, src, lsh);
4046     tcg_gen_umin_i32(rsh, rsh, max);
4047     tcg_gen_sar_i32(rval, src, rsh);
4048     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4049     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4050 }
4051 
4052 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4053 {
4054     TCGv_i64 lval = tcg_temp_new_i64();
4055     TCGv_i64 rval = tcg_temp_new_i64();
4056     TCGv_i64 lsh = tcg_temp_new_i64();
4057     TCGv_i64 rsh = tcg_temp_new_i64();
4058     TCGv_i64 zero = tcg_constant_i64(0);
4059     TCGv_i64 max = tcg_constant_i64(63);
4060 
4061     /*
4062      * Rely on the TCG guarantee that out of range shifts produce
4063      * unspecified results, not undefined behaviour (i.e. no trap).
4064      * Discard out-of-range results after the fact.
4065      */
4066     tcg_gen_ext8s_i64(lsh, shift);
4067     tcg_gen_neg_i64(rsh, lsh);
4068     tcg_gen_shl_i64(lval, src, lsh);
4069     tcg_gen_umin_i64(rsh, rsh, max);
4070     tcg_gen_sar_i64(rval, src, rsh);
4071     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4072     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4073 }
4074 
4075 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4076                          TCGv_vec src, TCGv_vec shift)
4077 {
4078     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4079     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4080     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4081     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4082     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4083 
4084     /*
4085      * Rely on the TCG guarantee that out of range shifts produce
4086      * unspecified results, not undefined behaviour (i.e. no trap).
4087      * Discard out-of-range results after the fact.
4088      */
4089     tcg_gen_neg_vec(vece, rsh, shift);
4090     if (vece == MO_8) {
4091         tcg_gen_mov_vec(lsh, shift);
4092     } else {
4093         tcg_gen_dupi_vec(vece, tmp, 0xff);
4094         tcg_gen_and_vec(vece, lsh, shift, tmp);
4095         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4096     }
4097 
4098     /* Bound rsh so out of bound right shift gets -1.  */
4099     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4100     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4101     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4102 
4103     tcg_gen_shlv_vec(vece, lval, src, lsh);
4104     tcg_gen_sarv_vec(vece, rval, src, rsh);
4105 
4106     /* Select in-bound left shift.  */
4107     tcg_gen_andc_vec(vece, lval, lval, tmp);
4108 
4109     /* Select between left and right shift.  */
4110     if (vece == MO_8) {
4111         tcg_gen_dupi_vec(vece, tmp, 0);
4112         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4113     } else {
4114         tcg_gen_dupi_vec(vece, tmp, 0x80);
4115         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4116     }
4117 }
4118 
4119 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4120                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4121 {
4122     static const TCGOpcode vecop_list[] = {
4123         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4124         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4125     };
4126     static const GVecGen3 ops[4] = {
4127         { .fniv = gen_sshl_vec,
4128           .fno = gen_helper_gvec_sshl_b,
4129           .opt_opc = vecop_list,
4130           .vece = MO_8 },
4131         { .fniv = gen_sshl_vec,
4132           .fno = gen_helper_gvec_sshl_h,
4133           .opt_opc = vecop_list,
4134           .vece = MO_16 },
4135         { .fni4 = gen_sshl_i32,
4136           .fniv = gen_sshl_vec,
4137           .opt_opc = vecop_list,
4138           .vece = MO_32 },
4139         { .fni8 = gen_sshl_i64,
4140           .fniv = gen_sshl_vec,
4141           .opt_opc = vecop_list,
4142           .vece = MO_64 },
4143     };
4144     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4145 }
4146 
4147 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4148                           TCGv_vec a, TCGv_vec b)
4149 {
4150     TCGv_vec x = tcg_temp_new_vec_matching(t);
4151     tcg_gen_add_vec(vece, x, a, b);
4152     tcg_gen_usadd_vec(vece, t, a, b);
4153     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4154     tcg_gen_or_vec(vece, sat, sat, x);
4155 }
4156 
4157 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4158                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4159 {
4160     static const TCGOpcode vecop_list[] = {
4161         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4162     };
4163     static const GVecGen4 ops[4] = {
4164         { .fniv = gen_uqadd_vec,
4165           .fno = gen_helper_gvec_uqadd_b,
4166           .write_aofs = true,
4167           .opt_opc = vecop_list,
4168           .vece = MO_8 },
4169         { .fniv = gen_uqadd_vec,
4170           .fno = gen_helper_gvec_uqadd_h,
4171           .write_aofs = true,
4172           .opt_opc = vecop_list,
4173           .vece = MO_16 },
4174         { .fniv = gen_uqadd_vec,
4175           .fno = gen_helper_gvec_uqadd_s,
4176           .write_aofs = true,
4177           .opt_opc = vecop_list,
4178           .vece = MO_32 },
4179         { .fniv = gen_uqadd_vec,
4180           .fno = gen_helper_gvec_uqadd_d,
4181           .write_aofs = true,
4182           .opt_opc = vecop_list,
4183           .vece = MO_64 },
4184     };
4185     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4186                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4187 }
4188 
4189 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4190                           TCGv_vec a, TCGv_vec b)
4191 {
4192     TCGv_vec x = tcg_temp_new_vec_matching(t);
4193     tcg_gen_add_vec(vece, x, a, b);
4194     tcg_gen_ssadd_vec(vece, t, a, b);
4195     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4196     tcg_gen_or_vec(vece, sat, sat, x);
4197 }
4198 
4199 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4200                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4201 {
4202     static const TCGOpcode vecop_list[] = {
4203         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4204     };
4205     static const GVecGen4 ops[4] = {
4206         { .fniv = gen_sqadd_vec,
4207           .fno = gen_helper_gvec_sqadd_b,
4208           .opt_opc = vecop_list,
4209           .write_aofs = true,
4210           .vece = MO_8 },
4211         { .fniv = gen_sqadd_vec,
4212           .fno = gen_helper_gvec_sqadd_h,
4213           .opt_opc = vecop_list,
4214           .write_aofs = true,
4215           .vece = MO_16 },
4216         { .fniv = gen_sqadd_vec,
4217           .fno = gen_helper_gvec_sqadd_s,
4218           .opt_opc = vecop_list,
4219           .write_aofs = true,
4220           .vece = MO_32 },
4221         { .fniv = gen_sqadd_vec,
4222           .fno = gen_helper_gvec_sqadd_d,
4223           .opt_opc = vecop_list,
4224           .write_aofs = true,
4225           .vece = MO_64 },
4226     };
4227     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4228                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4229 }
4230 
4231 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4232                           TCGv_vec a, TCGv_vec b)
4233 {
4234     TCGv_vec x = tcg_temp_new_vec_matching(t);
4235     tcg_gen_sub_vec(vece, x, a, b);
4236     tcg_gen_ussub_vec(vece, t, a, b);
4237     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4238     tcg_gen_or_vec(vece, sat, sat, x);
4239 }
4240 
4241 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4242                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4243 {
4244     static const TCGOpcode vecop_list[] = {
4245         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4246     };
4247     static const GVecGen4 ops[4] = {
4248         { .fniv = gen_uqsub_vec,
4249           .fno = gen_helper_gvec_uqsub_b,
4250           .opt_opc = vecop_list,
4251           .write_aofs = true,
4252           .vece = MO_8 },
4253         { .fniv = gen_uqsub_vec,
4254           .fno = gen_helper_gvec_uqsub_h,
4255           .opt_opc = vecop_list,
4256           .write_aofs = true,
4257           .vece = MO_16 },
4258         { .fniv = gen_uqsub_vec,
4259           .fno = gen_helper_gvec_uqsub_s,
4260           .opt_opc = vecop_list,
4261           .write_aofs = true,
4262           .vece = MO_32 },
4263         { .fniv = gen_uqsub_vec,
4264           .fno = gen_helper_gvec_uqsub_d,
4265           .opt_opc = vecop_list,
4266           .write_aofs = true,
4267           .vece = MO_64 },
4268     };
4269     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4270                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4271 }
4272 
4273 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4274                           TCGv_vec a, TCGv_vec b)
4275 {
4276     TCGv_vec x = tcg_temp_new_vec_matching(t);
4277     tcg_gen_sub_vec(vece, x, a, b);
4278     tcg_gen_sssub_vec(vece, t, a, b);
4279     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4280     tcg_gen_or_vec(vece, sat, sat, x);
4281 }
4282 
4283 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4284                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4285 {
4286     static const TCGOpcode vecop_list[] = {
4287         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4288     };
4289     static const GVecGen4 ops[4] = {
4290         { .fniv = gen_sqsub_vec,
4291           .fno = gen_helper_gvec_sqsub_b,
4292           .opt_opc = vecop_list,
4293           .write_aofs = true,
4294           .vece = MO_8 },
4295         { .fniv = gen_sqsub_vec,
4296           .fno = gen_helper_gvec_sqsub_h,
4297           .opt_opc = vecop_list,
4298           .write_aofs = true,
4299           .vece = MO_16 },
4300         { .fniv = gen_sqsub_vec,
4301           .fno = gen_helper_gvec_sqsub_s,
4302           .opt_opc = vecop_list,
4303           .write_aofs = true,
4304           .vece = MO_32 },
4305         { .fniv = gen_sqsub_vec,
4306           .fno = gen_helper_gvec_sqsub_d,
4307           .opt_opc = vecop_list,
4308           .write_aofs = true,
4309           .vece = MO_64 },
4310     };
4311     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4312                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4313 }
4314 
4315 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4316 {
4317     TCGv_i32 t = tcg_temp_new_i32();
4318 
4319     tcg_gen_sub_i32(t, a, b);
4320     tcg_gen_sub_i32(d, b, a);
4321     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4322 }
4323 
4324 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4325 {
4326     TCGv_i64 t = tcg_temp_new_i64();
4327 
4328     tcg_gen_sub_i64(t, a, b);
4329     tcg_gen_sub_i64(d, b, a);
4330     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4331 }
4332 
4333 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4334 {
4335     TCGv_vec t = tcg_temp_new_vec_matching(d);
4336 
4337     tcg_gen_smin_vec(vece, t, a, b);
4338     tcg_gen_smax_vec(vece, d, a, b);
4339     tcg_gen_sub_vec(vece, d, d, t);
4340 }
4341 
4342 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4343                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4344 {
4345     static const TCGOpcode vecop_list[] = {
4346         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4347     };
4348     static const GVecGen3 ops[4] = {
4349         { .fniv = gen_sabd_vec,
4350           .fno = gen_helper_gvec_sabd_b,
4351           .opt_opc = vecop_list,
4352           .vece = MO_8 },
4353         { .fniv = gen_sabd_vec,
4354           .fno = gen_helper_gvec_sabd_h,
4355           .opt_opc = vecop_list,
4356           .vece = MO_16 },
4357         { .fni4 = gen_sabd_i32,
4358           .fniv = gen_sabd_vec,
4359           .fno = gen_helper_gvec_sabd_s,
4360           .opt_opc = vecop_list,
4361           .vece = MO_32 },
4362         { .fni8 = gen_sabd_i64,
4363           .fniv = gen_sabd_vec,
4364           .fno = gen_helper_gvec_sabd_d,
4365           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4366           .opt_opc = vecop_list,
4367           .vece = MO_64 },
4368     };
4369     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4370 }
4371 
4372 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4373 {
4374     TCGv_i32 t = tcg_temp_new_i32();
4375 
4376     tcg_gen_sub_i32(t, a, b);
4377     tcg_gen_sub_i32(d, b, a);
4378     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4379 }
4380 
4381 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4382 {
4383     TCGv_i64 t = tcg_temp_new_i64();
4384 
4385     tcg_gen_sub_i64(t, a, b);
4386     tcg_gen_sub_i64(d, b, a);
4387     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4388 }
4389 
4390 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4391 {
4392     TCGv_vec t = tcg_temp_new_vec_matching(d);
4393 
4394     tcg_gen_umin_vec(vece, t, a, b);
4395     tcg_gen_umax_vec(vece, d, a, b);
4396     tcg_gen_sub_vec(vece, d, d, t);
4397 }
4398 
4399 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4400                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4401 {
4402     static const TCGOpcode vecop_list[] = {
4403         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4404     };
4405     static const GVecGen3 ops[4] = {
4406         { .fniv = gen_uabd_vec,
4407           .fno = gen_helper_gvec_uabd_b,
4408           .opt_opc = vecop_list,
4409           .vece = MO_8 },
4410         { .fniv = gen_uabd_vec,
4411           .fno = gen_helper_gvec_uabd_h,
4412           .opt_opc = vecop_list,
4413           .vece = MO_16 },
4414         { .fni4 = gen_uabd_i32,
4415           .fniv = gen_uabd_vec,
4416           .fno = gen_helper_gvec_uabd_s,
4417           .opt_opc = vecop_list,
4418           .vece = MO_32 },
4419         { .fni8 = gen_uabd_i64,
4420           .fniv = gen_uabd_vec,
4421           .fno = gen_helper_gvec_uabd_d,
4422           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4423           .opt_opc = vecop_list,
4424           .vece = MO_64 },
4425     };
4426     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4427 }
4428 
4429 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4430 {
4431     TCGv_i32 t = tcg_temp_new_i32();
4432     gen_sabd_i32(t, a, b);
4433     tcg_gen_add_i32(d, d, t);
4434 }
4435 
4436 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4437 {
4438     TCGv_i64 t = tcg_temp_new_i64();
4439     gen_sabd_i64(t, a, b);
4440     tcg_gen_add_i64(d, d, t);
4441 }
4442 
4443 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4444 {
4445     TCGv_vec t = tcg_temp_new_vec_matching(d);
4446     gen_sabd_vec(vece, t, a, b);
4447     tcg_gen_add_vec(vece, d, d, t);
4448 }
4449 
4450 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4451                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4452 {
4453     static const TCGOpcode vecop_list[] = {
4454         INDEX_op_sub_vec, INDEX_op_add_vec,
4455         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4456     };
4457     static const GVecGen3 ops[4] = {
4458         { .fniv = gen_saba_vec,
4459           .fno = gen_helper_gvec_saba_b,
4460           .opt_opc = vecop_list,
4461           .load_dest = true,
4462           .vece = MO_8 },
4463         { .fniv = gen_saba_vec,
4464           .fno = gen_helper_gvec_saba_h,
4465           .opt_opc = vecop_list,
4466           .load_dest = true,
4467           .vece = MO_16 },
4468         { .fni4 = gen_saba_i32,
4469           .fniv = gen_saba_vec,
4470           .fno = gen_helper_gvec_saba_s,
4471           .opt_opc = vecop_list,
4472           .load_dest = true,
4473           .vece = MO_32 },
4474         { .fni8 = gen_saba_i64,
4475           .fniv = gen_saba_vec,
4476           .fno = gen_helper_gvec_saba_d,
4477           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4478           .opt_opc = vecop_list,
4479           .load_dest = true,
4480           .vece = MO_64 },
4481     };
4482     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4483 }
4484 
4485 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4486 {
4487     TCGv_i32 t = tcg_temp_new_i32();
4488     gen_uabd_i32(t, a, b);
4489     tcg_gen_add_i32(d, d, t);
4490 }
4491 
4492 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4493 {
4494     TCGv_i64 t = tcg_temp_new_i64();
4495     gen_uabd_i64(t, a, b);
4496     tcg_gen_add_i64(d, d, t);
4497 }
4498 
4499 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4500 {
4501     TCGv_vec t = tcg_temp_new_vec_matching(d);
4502     gen_uabd_vec(vece, t, a, b);
4503     tcg_gen_add_vec(vece, d, d, t);
4504 }
4505 
4506 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4507                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4508 {
4509     static const TCGOpcode vecop_list[] = {
4510         INDEX_op_sub_vec, INDEX_op_add_vec,
4511         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4512     };
4513     static const GVecGen3 ops[4] = {
4514         { .fniv = gen_uaba_vec,
4515           .fno = gen_helper_gvec_uaba_b,
4516           .opt_opc = vecop_list,
4517           .load_dest = true,
4518           .vece = MO_8 },
4519         { .fniv = gen_uaba_vec,
4520           .fno = gen_helper_gvec_uaba_h,
4521           .opt_opc = vecop_list,
4522           .load_dest = true,
4523           .vece = MO_16 },
4524         { .fni4 = gen_uaba_i32,
4525           .fniv = gen_uaba_vec,
4526           .fno = gen_helper_gvec_uaba_s,
4527           .opt_opc = vecop_list,
4528           .load_dest = true,
4529           .vece = MO_32 },
4530         { .fni8 = gen_uaba_i64,
4531           .fniv = gen_uaba_vec,
4532           .fno = gen_helper_gvec_uaba_d,
4533           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4534           .opt_opc = vecop_list,
4535           .load_dest = true,
4536           .vece = MO_64 },
4537     };
4538     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4539 }
4540 
4541 static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
4542 {
4543     static const uint16_t mask[3] = {
4544         0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
4545         0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
4546         0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
4547     };
4548 
4549     if (crn >= 9 && crn <= 11) {
4550         return (mask[crn - 9] >> crm) & 1;
4551     }
4552     return false;
4553 }
4554 
4555 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4556                            int opc1, int crn, int crm, int opc2,
4557                            bool isread, int rt, int rt2)
4558 {
4559     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4560     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4561     TCGv_ptr tcg_ri = NULL;
4562     bool need_exit_tb = false;
4563     uint32_t syndrome;
4564 
4565     /*
4566      * Note that since we are an implementation which takes an
4567      * exception on a trapped conditional instruction only if the
4568      * instruction passes its condition code check, we can take
4569      * advantage of the clause in the ARM ARM that allows us to set
4570      * the COND field in the instruction to 0xE in all cases.
4571      * We could fish the actual condition out of the insn (ARM)
4572      * or the condexec bits (Thumb) but it isn't necessary.
4573      */
4574     switch (cpnum) {
4575     case 14:
4576         if (is64) {
4577             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4578                                          isread, false);
4579         } else {
4580             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4581                                         rt, isread, false);
4582         }
4583         break;
4584     case 15:
4585         if (is64) {
4586             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4587                                          isread, false);
4588         } else {
4589             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4590                                         rt, isread, false);
4591         }
4592         break;
4593     default:
4594         /*
4595          * ARMv8 defines that only coprocessors 14 and 15 exist,
4596          * so this can only happen if this is an ARMv7 or earlier CPU,
4597          * in which case the syndrome information won't actually be
4598          * guest visible.
4599          */
4600         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4601         syndrome = syn_uncategorized();
4602         break;
4603     }
4604 
4605     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4606         /*
4607          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4608          * over the UNDEF for "no such register" or the UNDEF for "access
4609          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4610          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4611          * access_check_cp_reg(), after the checks for whether the access
4612          * configurably trapped to EL1.
4613          */
4614         uint32_t maskbit = is64 ? crm : crn;
4615 
4616         if (maskbit != 4 && maskbit != 14) {
4617             /* T4 and T14 are RES0 so never cause traps */
4618             TCGv_i32 t;
4619             DisasLabel over = gen_disas_label(s);
4620 
4621             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4622             tcg_gen_andi_i32(t, t, 1u << maskbit);
4623             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4624 
4625             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4626             /*
4627              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4628              * but since we're conditionally branching over it, we want
4629              * to assume continue-to-next-instruction.
4630              */
4631             s->base.is_jmp = DISAS_NEXT;
4632             set_disas_label(s, over);
4633         }
4634     }
4635 
4636     if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
4637         /*
4638          * Check for TIDCP trap, which must take precedence over the UNDEF
4639          * for "no such register" etc.  It shares precedence with HSTR,
4640          * but raises the same exception, so order doesn't matter.
4641          */
4642         switch (s->current_el) {
4643         case 0:
4644             if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
4645                 && dc_isar_feature(aa64_tidcp1, s)) {
4646                 gen_helper_tidcp_el0(cpu_env, tcg_constant_i32(syndrome));
4647             }
4648             break;
4649         case 1:
4650             gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome));
4651             break;
4652         }
4653     }
4654 
4655     if (!ri) {
4656         /*
4657          * Unknown register; this might be a guest error or a QEMU
4658          * unimplemented feature.
4659          */
4660         if (is64) {
4661             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4662                           "64 bit system register cp:%d opc1: %d crm:%d "
4663                           "(%s)\n",
4664                           isread ? "read" : "write", cpnum, opc1, crm,
4665                           s->ns ? "non-secure" : "secure");
4666         } else {
4667             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4668                           "system register cp:%d opc1:%d crn:%d crm:%d "
4669                           "opc2:%d (%s)\n",
4670                           isread ? "read" : "write", cpnum, opc1, crn,
4671                           crm, opc2, s->ns ? "non-secure" : "secure");
4672         }
4673         unallocated_encoding(s);
4674         return;
4675     }
4676 
4677     /* Check access permissions */
4678     if (!cp_access_ok(s->current_el, ri, isread)) {
4679         unallocated_encoding(s);
4680         return;
4681     }
4682 
4683     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4684         (ri->fgt && s->fgt_active) ||
4685         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4686         /*
4687          * Emit code to perform further access permissions checks at
4688          * runtime; this may result in an exception.
4689          * Note that on XScale all cp0..c13 registers do an access check
4690          * call in order to handle c15_cpar.
4691          */
4692         gen_set_condexec(s);
4693         gen_update_pc(s, 0);
4694         tcg_ri = tcg_temp_new_ptr();
4695         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4696                                        tcg_constant_i32(key),
4697                                        tcg_constant_i32(syndrome),
4698                                        tcg_constant_i32(isread));
4699     } else if (ri->type & ARM_CP_RAISES_EXC) {
4700         /*
4701          * The readfn or writefn might raise an exception;
4702          * synchronize the CPU state in case it does.
4703          */
4704         gen_set_condexec(s);
4705         gen_update_pc(s, 0);
4706     }
4707 
4708     /* Handle special cases first */
4709     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4710     case 0:
4711         break;
4712     case ARM_CP_NOP:
4713         return;
4714     case ARM_CP_WFI:
4715         if (isread) {
4716             unallocated_encoding(s);
4717         } else {
4718             gen_update_pc(s, curr_insn_len(s));
4719             s->base.is_jmp = DISAS_WFI;
4720         }
4721         return;
4722     default:
4723         g_assert_not_reached();
4724     }
4725 
4726     if (ri->type & ARM_CP_IO) {
4727         /* I/O operations must end the TB here (whether read or write) */
4728         need_exit_tb = translator_io_start(&s->base);
4729     }
4730 
4731     if (isread) {
4732         /* Read */
4733         if (is64) {
4734             TCGv_i64 tmp64;
4735             TCGv_i32 tmp;
4736             if (ri->type & ARM_CP_CONST) {
4737                 tmp64 = tcg_constant_i64(ri->resetvalue);
4738             } else if (ri->readfn) {
4739                 if (!tcg_ri) {
4740                     tcg_ri = gen_lookup_cp_reg(key);
4741                 }
4742                 tmp64 = tcg_temp_new_i64();
4743                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4744             } else {
4745                 tmp64 = tcg_temp_new_i64();
4746                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4747             }
4748             tmp = tcg_temp_new_i32();
4749             tcg_gen_extrl_i64_i32(tmp, tmp64);
4750             store_reg(s, rt, tmp);
4751             tmp = tcg_temp_new_i32();
4752             tcg_gen_extrh_i64_i32(tmp, tmp64);
4753             store_reg(s, rt2, tmp);
4754         } else {
4755             TCGv_i32 tmp;
4756             if (ri->type & ARM_CP_CONST) {
4757                 tmp = tcg_constant_i32(ri->resetvalue);
4758             } else if (ri->readfn) {
4759                 if (!tcg_ri) {
4760                     tcg_ri = gen_lookup_cp_reg(key);
4761                 }
4762                 tmp = tcg_temp_new_i32();
4763                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4764             } else {
4765                 tmp = load_cpu_offset(ri->fieldoffset);
4766             }
4767             if (rt == 15) {
4768                 /* Destination register of r15 for 32 bit loads sets
4769                  * the condition codes from the high 4 bits of the value
4770                  */
4771                 gen_set_nzcv(tmp);
4772             } else {
4773                 store_reg(s, rt, tmp);
4774             }
4775         }
4776     } else {
4777         /* Write */
4778         if (ri->type & ARM_CP_CONST) {
4779             /* If not forbidden by access permissions, treat as WI */
4780             return;
4781         }
4782 
4783         if (is64) {
4784             TCGv_i32 tmplo, tmphi;
4785             TCGv_i64 tmp64 = tcg_temp_new_i64();
4786             tmplo = load_reg(s, rt);
4787             tmphi = load_reg(s, rt2);
4788             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4789             if (ri->writefn) {
4790                 if (!tcg_ri) {
4791                     tcg_ri = gen_lookup_cp_reg(key);
4792                 }
4793                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4794             } else {
4795                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4796             }
4797         } else {
4798             TCGv_i32 tmp = load_reg(s, rt);
4799             if (ri->writefn) {
4800                 if (!tcg_ri) {
4801                     tcg_ri = gen_lookup_cp_reg(key);
4802                 }
4803                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4804             } else {
4805                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4806             }
4807         }
4808     }
4809 
4810     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4811         /*
4812          * A write to any coprocessor register that ends a TB
4813          * must rebuild the hflags for the next TB.
4814          */
4815         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4816         /*
4817          * We default to ending the TB on a coprocessor register write,
4818          * but allow this to be suppressed by the register definition
4819          * (usually only necessary to work around guest bugs).
4820          */
4821         need_exit_tb = true;
4822     }
4823     if (need_exit_tb) {
4824         gen_lookup_tb(s);
4825     }
4826 }
4827 
4828 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4829 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4830 {
4831     int cpnum = (insn >> 8) & 0xf;
4832 
4833     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4834         unallocated_encoding(s);
4835     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4836         if (disas_iwmmxt_insn(s, insn)) {
4837             unallocated_encoding(s);
4838         }
4839     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4840         if (disas_dsp_insn(s, insn)) {
4841             unallocated_encoding(s);
4842         }
4843     }
4844 }
4845 
4846 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4847 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4848 {
4849     TCGv_i32 tmp;
4850     tmp = tcg_temp_new_i32();
4851     tcg_gen_extrl_i64_i32(tmp, val);
4852     store_reg(s, rlow, tmp);
4853     tmp = tcg_temp_new_i32();
4854     tcg_gen_extrh_i64_i32(tmp, val);
4855     store_reg(s, rhigh, tmp);
4856 }
4857 
4858 /* load and add a 64-bit value from a register pair.  */
4859 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4860 {
4861     TCGv_i64 tmp;
4862     TCGv_i32 tmpl;
4863     TCGv_i32 tmph;
4864 
4865     /* Load 64-bit value rd:rn.  */
4866     tmpl = load_reg(s, rlow);
4867     tmph = load_reg(s, rhigh);
4868     tmp = tcg_temp_new_i64();
4869     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4870     tcg_gen_add_i64(val, val, tmp);
4871 }
4872 
4873 /* Set N and Z flags from hi|lo.  */
4874 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4875 {
4876     tcg_gen_mov_i32(cpu_NF, hi);
4877     tcg_gen_or_i32(cpu_ZF, lo, hi);
4878 }
4879 
4880 /* Load/Store exclusive instructions are implemented by remembering
4881    the value/address loaded, and seeing if these are the same
4882    when the store is performed.  This should be sufficient to implement
4883    the architecturally mandated semantics, and avoids having to monitor
4884    regular stores.  The compare vs the remembered value is done during
4885    the cmpxchg operation, but we must compare the addresses manually.  */
4886 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4887                                TCGv_i32 addr, int size)
4888 {
4889     TCGv_i32 tmp = tcg_temp_new_i32();
4890     MemOp opc = size | MO_ALIGN | s->be_data;
4891 
4892     s->is_ldex = true;
4893 
4894     if (size == 3) {
4895         TCGv_i32 tmp2 = tcg_temp_new_i32();
4896         TCGv_i64 t64 = tcg_temp_new_i64();
4897 
4898         /*
4899          * For AArch32, architecturally the 32-bit word at the lowest
4900          * address is always Rt and the one at addr+4 is Rt2, even if
4901          * the CPU is big-endian. That means we don't want to do a
4902          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4903          * architecturally 64-bit access, but instead do a 64-bit access
4904          * using MO_BE if appropriate and then split the two halves.
4905          */
4906         TCGv taddr = gen_aa32_addr(s, addr, opc);
4907 
4908         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4909         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4910         if (s->be_data == MO_BE) {
4911             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4912         } else {
4913             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4914         }
4915         store_reg(s, rt2, tmp2);
4916     } else {
4917         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4918         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4919     }
4920 
4921     store_reg(s, rt, tmp);
4922     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4923 }
4924 
4925 static void gen_clrex(DisasContext *s)
4926 {
4927     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4928 }
4929 
4930 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4931                                 TCGv_i32 addr, int size)
4932 {
4933     TCGv_i32 t0, t1, t2;
4934     TCGv_i64 extaddr;
4935     TCGv taddr;
4936     TCGLabel *done_label;
4937     TCGLabel *fail_label;
4938     MemOp opc = size | MO_ALIGN | s->be_data;
4939 
4940     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4941          [addr] = {Rt};
4942          {Rd} = 0;
4943        } else {
4944          {Rd} = 1;
4945        } */
4946     fail_label = gen_new_label();
4947     done_label = gen_new_label();
4948     extaddr = tcg_temp_new_i64();
4949     tcg_gen_extu_i32_i64(extaddr, addr);
4950     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4951 
4952     taddr = gen_aa32_addr(s, addr, opc);
4953     t0 = tcg_temp_new_i32();
4954     t1 = load_reg(s, rt);
4955     if (size == 3) {
4956         TCGv_i64 o64 = tcg_temp_new_i64();
4957         TCGv_i64 n64 = tcg_temp_new_i64();
4958 
4959         t2 = load_reg(s, rt2);
4960 
4961         /*
4962          * For AArch32, architecturally the 32-bit word at the lowest
4963          * address is always Rt and the one at addr+4 is Rt2, even if
4964          * the CPU is big-endian. Since we're going to treat this as a
4965          * single 64-bit BE store, we need to put the two halves in the
4966          * opposite order for BE to LE, so that they end up in the right
4967          * places.  We don't want gen_aa32_st_i64, because that checks
4968          * SCTLR_B as if for an architectural 64-bit access.
4969          */
4970         if (s->be_data == MO_BE) {
4971             tcg_gen_concat_i32_i64(n64, t2, t1);
4972         } else {
4973             tcg_gen_concat_i32_i64(n64, t1, t2);
4974         }
4975 
4976         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4977                                    get_mem_index(s), opc);
4978 
4979         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4980         tcg_gen_extrl_i64_i32(t0, o64);
4981     } else {
4982         t2 = tcg_temp_new_i32();
4983         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4984         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4985         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4986     }
4987     tcg_gen_mov_i32(cpu_R[rd], t0);
4988     tcg_gen_br(done_label);
4989 
4990     gen_set_label(fail_label);
4991     tcg_gen_movi_i32(cpu_R[rd], 1);
4992     gen_set_label(done_label);
4993     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4994 }
4995 
4996 /* gen_srs:
4997  * @env: CPUARMState
4998  * @s: DisasContext
4999  * @mode: mode field from insn (which stack to store to)
5000  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5001  * @writeback: true if writeback bit set
5002  *
5003  * Generate code for the SRS (Store Return State) insn.
5004  */
5005 static void gen_srs(DisasContext *s,
5006                     uint32_t mode, uint32_t amode, bool writeback)
5007 {
5008     int32_t offset;
5009     TCGv_i32 addr, tmp;
5010     bool undef = false;
5011 
5012     /* SRS is:
5013      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5014      *   and specified mode is monitor mode
5015      * - UNDEFINED in Hyp mode
5016      * - UNPREDICTABLE in User or System mode
5017      * - UNPREDICTABLE if the specified mode is:
5018      * -- not implemented
5019      * -- not a valid mode number
5020      * -- a mode that's at a higher exception level
5021      * -- Monitor, if we are Non-secure
5022      * For the UNPREDICTABLE cases we choose to UNDEF.
5023      */
5024     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5025         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5026         return;
5027     }
5028 
5029     if (s->current_el == 0 || s->current_el == 2) {
5030         undef = true;
5031     }
5032 
5033     switch (mode) {
5034     case ARM_CPU_MODE_USR:
5035     case ARM_CPU_MODE_FIQ:
5036     case ARM_CPU_MODE_IRQ:
5037     case ARM_CPU_MODE_SVC:
5038     case ARM_CPU_MODE_ABT:
5039     case ARM_CPU_MODE_UND:
5040     case ARM_CPU_MODE_SYS:
5041         break;
5042     case ARM_CPU_MODE_HYP:
5043         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5044             undef = true;
5045         }
5046         break;
5047     case ARM_CPU_MODE_MON:
5048         /* No need to check specifically for "are we non-secure" because
5049          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5050          * so if this isn't EL3 then we must be non-secure.
5051          */
5052         if (s->current_el != 3) {
5053             undef = true;
5054         }
5055         break;
5056     default:
5057         undef = true;
5058     }
5059 
5060     if (undef) {
5061         unallocated_encoding(s);
5062         return;
5063     }
5064 
5065     addr = tcg_temp_new_i32();
5066     /* get_r13_banked() will raise an exception if called from System mode */
5067     gen_set_condexec(s);
5068     gen_update_pc(s, 0);
5069     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5070     switch (amode) {
5071     case 0: /* DA */
5072         offset = -4;
5073         break;
5074     case 1: /* IA */
5075         offset = 0;
5076         break;
5077     case 2: /* DB */
5078         offset = -8;
5079         break;
5080     case 3: /* IB */
5081         offset = 4;
5082         break;
5083     default:
5084         g_assert_not_reached();
5085     }
5086     tcg_gen_addi_i32(addr, addr, offset);
5087     tmp = load_reg(s, 14);
5088     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5089     tmp = load_cpu_field(spsr);
5090     tcg_gen_addi_i32(addr, addr, 4);
5091     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5092     if (writeback) {
5093         switch (amode) {
5094         case 0:
5095             offset = -8;
5096             break;
5097         case 1:
5098             offset = 4;
5099             break;
5100         case 2:
5101             offset = -4;
5102             break;
5103         case 3:
5104             offset = 0;
5105             break;
5106         default:
5107             g_assert_not_reached();
5108         }
5109         tcg_gen_addi_i32(addr, addr, offset);
5110         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5111     }
5112     s->base.is_jmp = DISAS_UPDATE_EXIT;
5113 }
5114 
5115 /* Skip this instruction if the ARM condition is false */
5116 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5117 {
5118     arm_gen_condlabel(s);
5119     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5120 }
5121 
5122 
5123 /*
5124  * Constant expanders used by T16/T32 decode
5125  */
5126 
5127 /* Return only the rotation part of T32ExpandImm.  */
5128 static int t32_expandimm_rot(DisasContext *s, int x)
5129 {
5130     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5131 }
5132 
5133 /* Return the unrotated immediate from T32ExpandImm.  */
5134 static int t32_expandimm_imm(DisasContext *s, int x)
5135 {
5136     int imm = extract32(x, 0, 8);
5137 
5138     switch (extract32(x, 8, 4)) {
5139     case 0: /* XY */
5140         /* Nothing to do.  */
5141         break;
5142     case 1: /* 00XY00XY */
5143         imm *= 0x00010001;
5144         break;
5145     case 2: /* XY00XY00 */
5146         imm *= 0x01000100;
5147         break;
5148     case 3: /* XYXYXYXY */
5149         imm *= 0x01010101;
5150         break;
5151     default:
5152         /* Rotated constant.  */
5153         imm |= 0x80;
5154         break;
5155     }
5156     return imm;
5157 }
5158 
5159 static int t32_branch24(DisasContext *s, int x)
5160 {
5161     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5162     x ^= !(x < 0) * (3 << 21);
5163     /* Append the final zero.  */
5164     return x << 1;
5165 }
5166 
5167 static int t16_setflags(DisasContext *s)
5168 {
5169     return s->condexec_mask == 0;
5170 }
5171 
5172 static int t16_push_list(DisasContext *s, int x)
5173 {
5174     return (x & 0xff) | (x & 0x100) << (14 - 8);
5175 }
5176 
5177 static int t16_pop_list(DisasContext *s, int x)
5178 {
5179     return (x & 0xff) | (x & 0x100) << (15 - 8);
5180 }
5181 
5182 /*
5183  * Include the generated decoders.
5184  */
5185 
5186 #include "decode-a32.c.inc"
5187 #include "decode-a32-uncond.c.inc"
5188 #include "decode-t32.c.inc"
5189 #include "decode-t16.c.inc"
5190 
5191 static bool valid_cp(DisasContext *s, int cp)
5192 {
5193     /*
5194      * Return true if this coprocessor field indicates something
5195      * that's really a possible coprocessor.
5196      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5197      * and of those only cp14 and cp15 were used for registers.
5198      * cp10 and cp11 were used for VFP and Neon, whose decode is
5199      * dealt with elsewhere. With the advent of fp16, cp9 is also
5200      * now part of VFP.
5201      * For v8A and later, the encoding has been tightened so that
5202      * only cp14 and cp15 are valid, and other values aren't considered
5203      * to be in the coprocessor-instruction space at all. v8M still
5204      * permits coprocessors 0..7.
5205      * For XScale, we must not decode the XScale cp0, cp1 space as
5206      * a standard coprocessor insn, because we want to fall through to
5207      * the legacy disas_xscale_insn() decoder after decodetree is done.
5208      */
5209     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5210         return false;
5211     }
5212 
5213     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5214         !arm_dc_feature(s, ARM_FEATURE_M)) {
5215         return cp >= 14;
5216     }
5217     return cp < 8 || cp >= 14;
5218 }
5219 
5220 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5221 {
5222     if (!valid_cp(s, a->cp)) {
5223         return false;
5224     }
5225     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5226                    false, a->rt, 0);
5227     return true;
5228 }
5229 
5230 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5231 {
5232     if (!valid_cp(s, a->cp)) {
5233         return false;
5234     }
5235     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5236                    true, a->rt, 0);
5237     return true;
5238 }
5239 
5240 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5241 {
5242     if (!valid_cp(s, a->cp)) {
5243         return false;
5244     }
5245     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5246                    false, a->rt, a->rt2);
5247     return true;
5248 }
5249 
5250 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5251 {
5252     if (!valid_cp(s, a->cp)) {
5253         return false;
5254     }
5255     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5256                    true, a->rt, a->rt2);
5257     return true;
5258 }
5259 
5260 /* Helpers to swap operands for reverse-subtract.  */
5261 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5262 {
5263     tcg_gen_sub_i32(dst, b, a);
5264 }
5265 
5266 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5267 {
5268     gen_sub_CC(dst, b, a);
5269 }
5270 
5271 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5272 {
5273     gen_sub_carry(dest, b, a);
5274 }
5275 
5276 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5277 {
5278     gen_sbc_CC(dest, b, a);
5279 }
5280 
5281 /*
5282  * Helpers for the data processing routines.
5283  *
5284  * After the computation store the results back.
5285  * This may be suppressed altogether (STREG_NONE), require a runtime
5286  * check against the stack limits (STREG_SP_CHECK), or generate an
5287  * exception return.  Oh, or store into a register.
5288  *
5289  * Always return true, indicating success for a trans_* function.
5290  */
5291 typedef enum {
5292    STREG_NONE,
5293    STREG_NORMAL,
5294    STREG_SP_CHECK,
5295    STREG_EXC_RET,
5296 } StoreRegKind;
5297 
5298 static bool store_reg_kind(DisasContext *s, int rd,
5299                             TCGv_i32 val, StoreRegKind kind)
5300 {
5301     switch (kind) {
5302     case STREG_NONE:
5303         return true;
5304     case STREG_NORMAL:
5305         /* See ALUWritePC: Interworking only from a32 mode. */
5306         if (s->thumb) {
5307             store_reg(s, rd, val);
5308         } else {
5309             store_reg_bx(s, rd, val);
5310         }
5311         return true;
5312     case STREG_SP_CHECK:
5313         store_sp_checked(s, val);
5314         return true;
5315     case STREG_EXC_RET:
5316         gen_exception_return(s, val);
5317         return true;
5318     }
5319     g_assert_not_reached();
5320 }
5321 
5322 /*
5323  * Data Processing (register)
5324  *
5325  * Operate, with set flags, one register source,
5326  * one immediate shifted register source, and a destination.
5327  */
5328 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5329                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5330                          int logic_cc, StoreRegKind kind)
5331 {
5332     TCGv_i32 tmp1, tmp2;
5333 
5334     tmp2 = load_reg(s, a->rm);
5335     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5336     tmp1 = load_reg(s, a->rn);
5337 
5338     gen(tmp1, tmp1, tmp2);
5339 
5340     if (logic_cc) {
5341         gen_logic_CC(tmp1);
5342     }
5343     return store_reg_kind(s, a->rd, tmp1, kind);
5344 }
5345 
5346 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5347                          void (*gen)(TCGv_i32, TCGv_i32),
5348                          int logic_cc, StoreRegKind kind)
5349 {
5350     TCGv_i32 tmp;
5351 
5352     tmp = load_reg(s, a->rm);
5353     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5354 
5355     gen(tmp, tmp);
5356     if (logic_cc) {
5357         gen_logic_CC(tmp);
5358     }
5359     return store_reg_kind(s, a->rd, tmp, kind);
5360 }
5361 
5362 /*
5363  * Data-processing (register-shifted register)
5364  *
5365  * Operate, with set flags, one register source,
5366  * one register shifted register source, and a destination.
5367  */
5368 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5369                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5370                          int logic_cc, StoreRegKind kind)
5371 {
5372     TCGv_i32 tmp1, tmp2;
5373 
5374     tmp1 = load_reg(s, a->rs);
5375     tmp2 = load_reg(s, a->rm);
5376     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5377     tmp1 = load_reg(s, a->rn);
5378 
5379     gen(tmp1, tmp1, tmp2);
5380 
5381     if (logic_cc) {
5382         gen_logic_CC(tmp1);
5383     }
5384     return store_reg_kind(s, a->rd, tmp1, kind);
5385 }
5386 
5387 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5388                          void (*gen)(TCGv_i32, TCGv_i32),
5389                          int logic_cc, StoreRegKind kind)
5390 {
5391     TCGv_i32 tmp1, tmp2;
5392 
5393     tmp1 = load_reg(s, a->rs);
5394     tmp2 = load_reg(s, a->rm);
5395     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5396 
5397     gen(tmp2, tmp2);
5398     if (logic_cc) {
5399         gen_logic_CC(tmp2);
5400     }
5401     return store_reg_kind(s, a->rd, tmp2, kind);
5402 }
5403 
5404 /*
5405  * Data-processing (immediate)
5406  *
5407  * Operate, with set flags, one register source,
5408  * one rotated immediate, and a destination.
5409  *
5410  * Note that logic_cc && a->rot setting CF based on the msb of the
5411  * immediate is the reason why we must pass in the unrotated form
5412  * of the immediate.
5413  */
5414 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5415                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5416                          int logic_cc, StoreRegKind kind)
5417 {
5418     TCGv_i32 tmp1;
5419     uint32_t imm;
5420 
5421     imm = ror32(a->imm, a->rot);
5422     if (logic_cc && a->rot) {
5423         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5424     }
5425     tmp1 = load_reg(s, a->rn);
5426 
5427     gen(tmp1, tmp1, tcg_constant_i32(imm));
5428 
5429     if (logic_cc) {
5430         gen_logic_CC(tmp1);
5431     }
5432     return store_reg_kind(s, a->rd, tmp1, kind);
5433 }
5434 
5435 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5436                          void (*gen)(TCGv_i32, TCGv_i32),
5437                          int logic_cc, StoreRegKind kind)
5438 {
5439     TCGv_i32 tmp;
5440     uint32_t imm;
5441 
5442     imm = ror32(a->imm, a->rot);
5443     if (logic_cc && a->rot) {
5444         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5445     }
5446 
5447     tmp = tcg_temp_new_i32();
5448     gen(tmp, tcg_constant_i32(imm));
5449 
5450     if (logic_cc) {
5451         gen_logic_CC(tmp);
5452     }
5453     return store_reg_kind(s, a->rd, tmp, kind);
5454 }
5455 
5456 #define DO_ANY3(NAME, OP, L, K)                                         \
5457     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5458     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5459     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5460     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5461     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5462     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5463 
5464 #define DO_ANY2(NAME, OP, L, K)                                         \
5465     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5466     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5467     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5468     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5469     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5470     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5471 
5472 #define DO_CMP2(NAME, OP, L)                                            \
5473     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5474     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5475     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5476     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5477     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5478     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5479 
5480 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5481 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5482 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5483 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5484 
5485 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5486 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5487 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5488 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5489 
5490 DO_CMP2(TST, tcg_gen_and_i32, true)
5491 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5492 DO_CMP2(CMN, gen_add_CC, false)
5493 DO_CMP2(CMP, gen_sub_CC, false)
5494 
5495 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5496         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5497 
5498 /*
5499  * Note for the computation of StoreRegKind we return out of the
5500  * middle of the functions that are expanded by DO_ANY3, and that
5501  * we modify a->s via that parameter before it is used by OP.
5502  */
5503 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5504         ({
5505             StoreRegKind ret = STREG_NORMAL;
5506             if (a->rd == 15 && a->s) {
5507                 /*
5508                  * See ALUExceptionReturn:
5509                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5510                  * In Hyp mode, UNDEFINED.
5511                  */
5512                 if (IS_USER(s) || s->current_el == 2) {
5513                     unallocated_encoding(s);
5514                     return true;
5515                 }
5516                 /* There is no writeback of nzcv to PSTATE.  */
5517                 a->s = 0;
5518                 ret = STREG_EXC_RET;
5519             } else if (a->rd == 13 && a->rn == 13) {
5520                 ret = STREG_SP_CHECK;
5521             }
5522             ret;
5523         }))
5524 
5525 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5526         ({
5527             StoreRegKind ret = STREG_NORMAL;
5528             if (a->rd == 15 && a->s) {
5529                 /*
5530                  * See ALUExceptionReturn:
5531                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5532                  * In Hyp mode, UNDEFINED.
5533                  */
5534                 if (IS_USER(s) || s->current_el == 2) {
5535                     unallocated_encoding(s);
5536                     return true;
5537                 }
5538                 /* There is no writeback of nzcv to PSTATE.  */
5539                 a->s = 0;
5540                 ret = STREG_EXC_RET;
5541             } else if (a->rd == 13) {
5542                 ret = STREG_SP_CHECK;
5543             }
5544             ret;
5545         }))
5546 
5547 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5548 
5549 /*
5550  * ORN is only available with T32, so there is no register-shifted-register
5551  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5552  */
5553 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5554 {
5555     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5556 }
5557 
5558 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5559 {
5560     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5561 }
5562 
5563 #undef DO_ANY3
5564 #undef DO_ANY2
5565 #undef DO_CMP2
5566 
5567 static bool trans_ADR(DisasContext *s, arg_ri *a)
5568 {
5569     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5570     return true;
5571 }
5572 
5573 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5574 {
5575     if (!ENABLE_ARCH_6T2) {
5576         return false;
5577     }
5578 
5579     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5580     return true;
5581 }
5582 
5583 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5584 {
5585     TCGv_i32 tmp;
5586 
5587     if (!ENABLE_ARCH_6T2) {
5588         return false;
5589     }
5590 
5591     tmp = load_reg(s, a->rd);
5592     tcg_gen_ext16u_i32(tmp, tmp);
5593     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5594     store_reg(s, a->rd, tmp);
5595     return true;
5596 }
5597 
5598 /*
5599  * v8.1M MVE wide-shifts
5600  */
5601 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5602                           WideShiftImmFn *fn)
5603 {
5604     TCGv_i64 rda;
5605     TCGv_i32 rdalo, rdahi;
5606 
5607     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5608         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5609         return false;
5610     }
5611     if (a->rdahi == 15) {
5612         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5613         return false;
5614     }
5615     if (!dc_isar_feature(aa32_mve, s) ||
5616         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5617         a->rdahi == 13) {
5618         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5619         unallocated_encoding(s);
5620         return true;
5621     }
5622 
5623     if (a->shim == 0) {
5624         a->shim = 32;
5625     }
5626 
5627     rda = tcg_temp_new_i64();
5628     rdalo = load_reg(s, a->rdalo);
5629     rdahi = load_reg(s, a->rdahi);
5630     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5631 
5632     fn(rda, rda, a->shim);
5633 
5634     tcg_gen_extrl_i64_i32(rdalo, rda);
5635     tcg_gen_extrh_i64_i32(rdahi, rda);
5636     store_reg(s, a->rdalo, rdalo);
5637     store_reg(s, a->rdahi, rdahi);
5638 
5639     return true;
5640 }
5641 
5642 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5643 {
5644     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5645 }
5646 
5647 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5648 {
5649     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5650 }
5651 
5652 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5653 {
5654     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5655 }
5656 
5657 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5658 {
5659     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5660 }
5661 
5662 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5663 {
5664     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5665 }
5666 
5667 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5668 {
5669     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5670 }
5671 
5672 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5673 {
5674     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5675 }
5676 
5677 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5678 {
5679     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5680 }
5681 
5682 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5683 {
5684     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5685 }
5686 
5687 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5688 {
5689     TCGv_i64 rda;
5690     TCGv_i32 rdalo, rdahi;
5691 
5692     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5693         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5694         return false;
5695     }
5696     if (a->rdahi == 15) {
5697         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5698         return false;
5699     }
5700     if (!dc_isar_feature(aa32_mve, s) ||
5701         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5702         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5703         a->rm == a->rdahi || a->rm == a->rdalo) {
5704         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5705         unallocated_encoding(s);
5706         return true;
5707     }
5708 
5709     rda = tcg_temp_new_i64();
5710     rdalo = load_reg(s, a->rdalo);
5711     rdahi = load_reg(s, a->rdahi);
5712     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5713 
5714     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5715     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5716 
5717     tcg_gen_extrl_i64_i32(rdalo, rda);
5718     tcg_gen_extrh_i64_i32(rdahi, rda);
5719     store_reg(s, a->rdalo, rdalo);
5720     store_reg(s, a->rdahi, rdahi);
5721 
5722     return true;
5723 }
5724 
5725 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5726 {
5727     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5728 }
5729 
5730 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5731 {
5732     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5733 }
5734 
5735 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5736 {
5737     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5738 }
5739 
5740 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5741 {
5742     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5743 }
5744 
5745 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5746 {
5747     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5748 }
5749 
5750 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5751 {
5752     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5753 }
5754 
5755 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5756 {
5757     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5758         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5759         return false;
5760     }
5761     if (!dc_isar_feature(aa32_mve, s) ||
5762         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5763         a->rda == 13 || a->rda == 15) {
5764         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5765         unallocated_encoding(s);
5766         return true;
5767     }
5768 
5769     if (a->shim == 0) {
5770         a->shim = 32;
5771     }
5772     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5773 
5774     return true;
5775 }
5776 
5777 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5778 {
5779     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5780 }
5781 
5782 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5783 {
5784     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5785 }
5786 
5787 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5788 {
5789     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5790 }
5791 
5792 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5793 {
5794     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5795 }
5796 
5797 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5798 {
5799     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5800 }
5801 
5802 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5803 {
5804     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5805 }
5806 
5807 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5808 {
5809     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5810         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5811         return false;
5812     }
5813     if (!dc_isar_feature(aa32_mve, s) ||
5814         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5815         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5816         a->rm == a->rda) {
5817         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5818         unallocated_encoding(s);
5819         return true;
5820     }
5821 
5822     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5823     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5824     return true;
5825 }
5826 
5827 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5828 {
5829     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5830 }
5831 
5832 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5833 {
5834     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5835 }
5836 
5837 /*
5838  * Multiply and multiply accumulate
5839  */
5840 
5841 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5842 {
5843     TCGv_i32 t1, t2;
5844 
5845     t1 = load_reg(s, a->rn);
5846     t2 = load_reg(s, a->rm);
5847     tcg_gen_mul_i32(t1, t1, t2);
5848     if (add) {
5849         t2 = load_reg(s, a->ra);
5850         tcg_gen_add_i32(t1, t1, t2);
5851     }
5852     if (a->s) {
5853         gen_logic_CC(t1);
5854     }
5855     store_reg(s, a->rd, t1);
5856     return true;
5857 }
5858 
5859 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5860 {
5861     return op_mla(s, a, false);
5862 }
5863 
5864 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5865 {
5866     return op_mla(s, a, true);
5867 }
5868 
5869 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5870 {
5871     TCGv_i32 t1, t2;
5872 
5873     if (!ENABLE_ARCH_6T2) {
5874         return false;
5875     }
5876     t1 = load_reg(s, a->rn);
5877     t2 = load_reg(s, a->rm);
5878     tcg_gen_mul_i32(t1, t1, t2);
5879     t2 = load_reg(s, a->ra);
5880     tcg_gen_sub_i32(t1, t2, t1);
5881     store_reg(s, a->rd, t1);
5882     return true;
5883 }
5884 
5885 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5886 {
5887     TCGv_i32 t0, t1, t2, t3;
5888 
5889     t0 = load_reg(s, a->rm);
5890     t1 = load_reg(s, a->rn);
5891     if (uns) {
5892         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5893     } else {
5894         tcg_gen_muls2_i32(t0, t1, t0, t1);
5895     }
5896     if (add) {
5897         t2 = load_reg(s, a->ra);
5898         t3 = load_reg(s, a->rd);
5899         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5900     }
5901     if (a->s) {
5902         gen_logicq_cc(t0, t1);
5903     }
5904     store_reg(s, a->ra, t0);
5905     store_reg(s, a->rd, t1);
5906     return true;
5907 }
5908 
5909 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5910 {
5911     return op_mlal(s, a, true, false);
5912 }
5913 
5914 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5915 {
5916     return op_mlal(s, a, false, false);
5917 }
5918 
5919 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5920 {
5921     return op_mlal(s, a, true, true);
5922 }
5923 
5924 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5925 {
5926     return op_mlal(s, a, false, true);
5927 }
5928 
5929 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5930 {
5931     TCGv_i32 t0, t1, t2, zero;
5932 
5933     if (s->thumb
5934         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5935         : !ENABLE_ARCH_6) {
5936         return false;
5937     }
5938 
5939     t0 = load_reg(s, a->rm);
5940     t1 = load_reg(s, a->rn);
5941     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5942     zero = tcg_constant_i32(0);
5943     t2 = load_reg(s, a->ra);
5944     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5945     t2 = load_reg(s, a->rd);
5946     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5947     store_reg(s, a->ra, t0);
5948     store_reg(s, a->rd, t1);
5949     return true;
5950 }
5951 
5952 /*
5953  * Saturating addition and subtraction
5954  */
5955 
5956 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5957 {
5958     TCGv_i32 t0, t1;
5959 
5960     if (s->thumb
5961         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5962         : !ENABLE_ARCH_5TE) {
5963         return false;
5964     }
5965 
5966     t0 = load_reg(s, a->rm);
5967     t1 = load_reg(s, a->rn);
5968     if (doub) {
5969         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5970     }
5971     if (add) {
5972         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5973     } else {
5974         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5975     }
5976     store_reg(s, a->rd, t0);
5977     return true;
5978 }
5979 
5980 #define DO_QADDSUB(NAME, ADD, DOUB) \
5981 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5982 {                                                        \
5983     return op_qaddsub(s, a, ADD, DOUB);                  \
5984 }
5985 
5986 DO_QADDSUB(QADD, true, false)
5987 DO_QADDSUB(QSUB, false, false)
5988 DO_QADDSUB(QDADD, true, true)
5989 DO_QADDSUB(QDSUB, false, true)
5990 
5991 #undef DO_QADDSUB
5992 
5993 /*
5994  * Halfword multiply and multiply accumulate
5995  */
5996 
5997 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5998                        int add_long, bool nt, bool mt)
5999 {
6000     TCGv_i32 t0, t1, tl, th;
6001 
6002     if (s->thumb
6003         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6004         : !ENABLE_ARCH_5TE) {
6005         return false;
6006     }
6007 
6008     t0 = load_reg(s, a->rn);
6009     t1 = load_reg(s, a->rm);
6010     gen_mulxy(t0, t1, nt, mt);
6011 
6012     switch (add_long) {
6013     case 0:
6014         store_reg(s, a->rd, t0);
6015         break;
6016     case 1:
6017         t1 = load_reg(s, a->ra);
6018         gen_helper_add_setq(t0, cpu_env, t0, t1);
6019         store_reg(s, a->rd, t0);
6020         break;
6021     case 2:
6022         tl = load_reg(s, a->ra);
6023         th = load_reg(s, a->rd);
6024         /* Sign-extend the 32-bit product to 64 bits.  */
6025         t1 = tcg_temp_new_i32();
6026         tcg_gen_sari_i32(t1, t0, 31);
6027         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6028         store_reg(s, a->ra, tl);
6029         store_reg(s, a->rd, th);
6030         break;
6031     default:
6032         g_assert_not_reached();
6033     }
6034     return true;
6035 }
6036 
6037 #define DO_SMLAX(NAME, add, nt, mt) \
6038 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6039 {                                                          \
6040     return op_smlaxxx(s, a, add, nt, mt);                  \
6041 }
6042 
6043 DO_SMLAX(SMULBB, 0, 0, 0)
6044 DO_SMLAX(SMULBT, 0, 0, 1)
6045 DO_SMLAX(SMULTB, 0, 1, 0)
6046 DO_SMLAX(SMULTT, 0, 1, 1)
6047 
6048 DO_SMLAX(SMLABB, 1, 0, 0)
6049 DO_SMLAX(SMLABT, 1, 0, 1)
6050 DO_SMLAX(SMLATB, 1, 1, 0)
6051 DO_SMLAX(SMLATT, 1, 1, 1)
6052 
6053 DO_SMLAX(SMLALBB, 2, 0, 0)
6054 DO_SMLAX(SMLALBT, 2, 0, 1)
6055 DO_SMLAX(SMLALTB, 2, 1, 0)
6056 DO_SMLAX(SMLALTT, 2, 1, 1)
6057 
6058 #undef DO_SMLAX
6059 
6060 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6061 {
6062     TCGv_i32 t0, t1;
6063 
6064     if (!ENABLE_ARCH_5TE) {
6065         return false;
6066     }
6067 
6068     t0 = load_reg(s, a->rn);
6069     t1 = load_reg(s, a->rm);
6070     /*
6071      * Since the nominal result is product<47:16>, shift the 16-bit
6072      * input up by 16 bits, so that the result is at product<63:32>.
6073      */
6074     if (mt) {
6075         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6076     } else {
6077         tcg_gen_shli_i32(t1, t1, 16);
6078     }
6079     tcg_gen_muls2_i32(t0, t1, t0, t1);
6080     if (add) {
6081         t0 = load_reg(s, a->ra);
6082         gen_helper_add_setq(t1, cpu_env, t1, t0);
6083     }
6084     store_reg(s, a->rd, t1);
6085     return true;
6086 }
6087 
6088 #define DO_SMLAWX(NAME, add, mt) \
6089 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6090 {                                                          \
6091     return op_smlawx(s, a, add, mt);                       \
6092 }
6093 
6094 DO_SMLAWX(SMULWB, 0, 0)
6095 DO_SMLAWX(SMULWT, 0, 1)
6096 DO_SMLAWX(SMLAWB, 1, 0)
6097 DO_SMLAWX(SMLAWT, 1, 1)
6098 
6099 #undef DO_SMLAWX
6100 
6101 /*
6102  * MSR (immediate) and hints
6103  */
6104 
6105 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6106 {
6107     /*
6108      * When running single-threaded TCG code, use the helper to ensure that
6109      * the next round-robin scheduled vCPU gets a crack.  When running in
6110      * MTTCG we don't generate jumps to the helper as it won't affect the
6111      * scheduling of other vCPUs.
6112      */
6113     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6114         gen_update_pc(s, curr_insn_len(s));
6115         s->base.is_jmp = DISAS_YIELD;
6116     }
6117     return true;
6118 }
6119 
6120 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6121 {
6122     /*
6123      * When running single-threaded TCG code, use the helper to ensure that
6124      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6125      * just skip this instruction.  Currently the SEV/SEVL instructions,
6126      * which are *one* of many ways to wake the CPU from WFE, are not
6127      * implemented so we can't sleep like WFI does.
6128      */
6129     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6130         gen_update_pc(s, curr_insn_len(s));
6131         s->base.is_jmp = DISAS_WFE;
6132     }
6133     return true;
6134 }
6135 
6136 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6137 {
6138     /* For WFI, halt the vCPU until an IRQ. */
6139     gen_update_pc(s, curr_insn_len(s));
6140     s->base.is_jmp = DISAS_WFI;
6141     return true;
6142 }
6143 
6144 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6145 {
6146     /*
6147      * For M-profile, minimal-RAS ESB can be a NOP.
6148      * Without RAS, we must implement this as NOP.
6149      */
6150     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6151         /*
6152          * QEMU does not have a source of physical SErrors,
6153          * so we are only concerned with virtual SErrors.
6154          * The pseudocode in the ARM for this case is
6155          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6156          *      AArch32.vESBOperation();
6157          * Most of the condition can be evaluated at translation time.
6158          * Test for EL2 present, and defer test for SEL2 to runtime.
6159          */
6160         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6161             gen_helper_vesb(cpu_env);
6162         }
6163     }
6164     return true;
6165 }
6166 
6167 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6168 {
6169     return true;
6170 }
6171 
6172 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6173 {
6174     uint32_t val = ror32(a->imm, a->rot * 2);
6175     uint32_t mask = msr_mask(s, a->mask, a->r);
6176 
6177     if (gen_set_psr_im(s, mask, a->r, val)) {
6178         unallocated_encoding(s);
6179     }
6180     return true;
6181 }
6182 
6183 /*
6184  * Cyclic Redundancy Check
6185  */
6186 
6187 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6188 {
6189     TCGv_i32 t1, t2, t3;
6190 
6191     if (!dc_isar_feature(aa32_crc32, s)) {
6192         return false;
6193     }
6194 
6195     t1 = load_reg(s, a->rn);
6196     t2 = load_reg(s, a->rm);
6197     switch (sz) {
6198     case MO_8:
6199         gen_uxtb(t2);
6200         break;
6201     case MO_16:
6202         gen_uxth(t2);
6203         break;
6204     case MO_32:
6205         break;
6206     default:
6207         g_assert_not_reached();
6208     }
6209     t3 = tcg_constant_i32(1 << sz);
6210     if (c) {
6211         gen_helper_crc32c(t1, t1, t2, t3);
6212     } else {
6213         gen_helper_crc32(t1, t1, t2, t3);
6214     }
6215     store_reg(s, a->rd, t1);
6216     return true;
6217 }
6218 
6219 #define DO_CRC32(NAME, c, sz) \
6220 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6221     { return op_crc32(s, a, c, sz); }
6222 
6223 DO_CRC32(CRC32B, false, MO_8)
6224 DO_CRC32(CRC32H, false, MO_16)
6225 DO_CRC32(CRC32W, false, MO_32)
6226 DO_CRC32(CRC32CB, true, MO_8)
6227 DO_CRC32(CRC32CH, true, MO_16)
6228 DO_CRC32(CRC32CW, true, MO_32)
6229 
6230 #undef DO_CRC32
6231 
6232 /*
6233  * Miscellaneous instructions
6234  */
6235 
6236 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6237 {
6238     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6239         return false;
6240     }
6241     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6242     return true;
6243 }
6244 
6245 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6246 {
6247     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6248         return false;
6249     }
6250     gen_msr_banked(s, a->r, a->sysm, a->rn);
6251     return true;
6252 }
6253 
6254 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6255 {
6256     TCGv_i32 tmp;
6257 
6258     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6259         return false;
6260     }
6261     if (a->r) {
6262         if (IS_USER(s)) {
6263             unallocated_encoding(s);
6264             return true;
6265         }
6266         tmp = load_cpu_field(spsr);
6267     } else {
6268         tmp = tcg_temp_new_i32();
6269         gen_helper_cpsr_read(tmp, cpu_env);
6270     }
6271     store_reg(s, a->rd, tmp);
6272     return true;
6273 }
6274 
6275 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6276 {
6277     TCGv_i32 tmp;
6278     uint32_t mask = msr_mask(s, a->mask, a->r);
6279 
6280     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6281         return false;
6282     }
6283     tmp = load_reg(s, a->rn);
6284     if (gen_set_psr(s, mask, a->r, tmp)) {
6285         unallocated_encoding(s);
6286     }
6287     return true;
6288 }
6289 
6290 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6291 {
6292     TCGv_i32 tmp;
6293 
6294     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6295         return false;
6296     }
6297     tmp = tcg_temp_new_i32();
6298     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6299     store_reg(s, a->rd, tmp);
6300     return true;
6301 }
6302 
6303 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6304 {
6305     TCGv_i32 addr, reg;
6306 
6307     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6308         return false;
6309     }
6310     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6311     reg = load_reg(s, a->rn);
6312     gen_helper_v7m_msr(cpu_env, addr, reg);
6313     /* If we wrote to CONTROL, the EL might have changed */
6314     gen_rebuild_hflags(s, true);
6315     gen_lookup_tb(s);
6316     return true;
6317 }
6318 
6319 static bool trans_BX(DisasContext *s, arg_BX *a)
6320 {
6321     if (!ENABLE_ARCH_4T) {
6322         return false;
6323     }
6324     gen_bx_excret(s, load_reg(s, a->rm));
6325     return true;
6326 }
6327 
6328 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6329 {
6330     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6331         return false;
6332     }
6333     /*
6334      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6335      * TBFLAGS bit on a basically-never-happens case, so call a helper
6336      * function to check for the trap and raise the exception if needed
6337      * (passing it the register number for the syndrome value).
6338      * v8A doesn't have this HSTR bit.
6339      */
6340     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6341         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6342         s->current_el < 2 && s->ns) {
6343         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6344     }
6345     /* Trivial implementation equivalent to bx.  */
6346     gen_bx(s, load_reg(s, a->rm));
6347     return true;
6348 }
6349 
6350 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6351 {
6352     TCGv_i32 tmp;
6353 
6354     if (!ENABLE_ARCH_5) {
6355         return false;
6356     }
6357     tmp = load_reg(s, a->rm);
6358     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6359     gen_bx(s, tmp);
6360     return true;
6361 }
6362 
6363 /*
6364  * BXNS/BLXNS: only exist for v8M with the security extensions,
6365  * and always UNDEF if NonSecure.  We don't implement these in
6366  * the user-only mode either (in theory you can use them from
6367  * Secure User mode but they are too tied in to system emulation).
6368  */
6369 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6370 {
6371     if (!s->v8m_secure || IS_USER_ONLY) {
6372         unallocated_encoding(s);
6373     } else {
6374         gen_bxns(s, a->rm);
6375     }
6376     return true;
6377 }
6378 
6379 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6380 {
6381     if (!s->v8m_secure || IS_USER_ONLY) {
6382         unallocated_encoding(s);
6383     } else {
6384         gen_blxns(s, a->rm);
6385     }
6386     return true;
6387 }
6388 
6389 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6390 {
6391     TCGv_i32 tmp;
6392 
6393     if (!ENABLE_ARCH_5) {
6394         return false;
6395     }
6396     tmp = load_reg(s, a->rm);
6397     tcg_gen_clzi_i32(tmp, tmp, 32);
6398     store_reg(s, a->rd, tmp);
6399     return true;
6400 }
6401 
6402 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6403 {
6404     TCGv_i32 tmp;
6405 
6406     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6407         return false;
6408     }
6409     if (IS_USER(s)) {
6410         unallocated_encoding(s);
6411         return true;
6412     }
6413     if (s->current_el == 2) {
6414         /* ERET from Hyp uses ELR_Hyp, not LR */
6415         tmp = load_cpu_field_low32(elr_el[2]);
6416     } else {
6417         tmp = load_reg(s, 14);
6418     }
6419     gen_exception_return(s, tmp);
6420     return true;
6421 }
6422 
6423 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6424 {
6425     gen_hlt(s, a->imm);
6426     return true;
6427 }
6428 
6429 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6430 {
6431     if (!ENABLE_ARCH_5) {
6432         return false;
6433     }
6434     /* BKPT is OK with ECI set and leaves it untouched */
6435     s->eci_handled = true;
6436     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6437         semihosting_enabled(s->current_el == 0) &&
6438         (a->imm == 0xab)) {
6439         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6440     } else {
6441         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6442     }
6443     return true;
6444 }
6445 
6446 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6447 {
6448     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6449         return false;
6450     }
6451     if (IS_USER(s)) {
6452         unallocated_encoding(s);
6453     } else {
6454         gen_hvc(s, a->imm);
6455     }
6456     return true;
6457 }
6458 
6459 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6460 {
6461     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6462         return false;
6463     }
6464     if (IS_USER(s)) {
6465         unallocated_encoding(s);
6466     } else {
6467         gen_smc(s);
6468     }
6469     return true;
6470 }
6471 
6472 static bool trans_SG(DisasContext *s, arg_SG *a)
6473 {
6474     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6475         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6476         return false;
6477     }
6478     /*
6479      * SG (v8M only)
6480      * The bulk of the behaviour for this instruction is implemented
6481      * in v7m_handle_execute_nsc(), which deals with the insn when
6482      * it is executed by a CPU in non-secure state from memory
6483      * which is Secure & NonSecure-Callable.
6484      * Here we only need to handle the remaining cases:
6485      *  * in NS memory (including the "security extension not
6486      *    implemented" case) : NOP
6487      *  * in S memory but CPU already secure (clear IT bits)
6488      * We know that the attribute for the memory this insn is
6489      * in must match the current CPU state, because otherwise
6490      * get_phys_addr_pmsav8 would have generated an exception.
6491      */
6492     if (s->v8m_secure) {
6493         /* Like the IT insn, we don't need to generate any code */
6494         s->condexec_cond = 0;
6495         s->condexec_mask = 0;
6496     }
6497     return true;
6498 }
6499 
6500 static bool trans_TT(DisasContext *s, arg_TT *a)
6501 {
6502     TCGv_i32 addr, tmp;
6503 
6504     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6505         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6506         return false;
6507     }
6508     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6509         /* We UNDEF for these UNPREDICTABLE cases */
6510         unallocated_encoding(s);
6511         return true;
6512     }
6513     if (a->A && !s->v8m_secure) {
6514         /* This case is UNDEFINED.  */
6515         unallocated_encoding(s);
6516         return true;
6517     }
6518 
6519     addr = load_reg(s, a->rn);
6520     tmp = tcg_temp_new_i32();
6521     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6522     store_reg(s, a->rd, tmp);
6523     return true;
6524 }
6525 
6526 /*
6527  * Load/store register index
6528  */
6529 
6530 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6531 {
6532     ISSInfo ret;
6533 
6534     /* ISS not valid if writeback */
6535     if (p && !w) {
6536         ret = rd;
6537         if (curr_insn_len(s) == 2) {
6538             ret |= ISSIs16Bit;
6539         }
6540     } else {
6541         ret = ISSInvalid;
6542     }
6543     return ret;
6544 }
6545 
6546 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6547 {
6548     TCGv_i32 addr = load_reg(s, a->rn);
6549 
6550     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6551         gen_helper_v8m_stackcheck(cpu_env, addr);
6552     }
6553 
6554     if (a->p) {
6555         TCGv_i32 ofs = load_reg(s, a->rm);
6556         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6557         if (a->u) {
6558             tcg_gen_add_i32(addr, addr, ofs);
6559         } else {
6560             tcg_gen_sub_i32(addr, addr, ofs);
6561         }
6562     }
6563     return addr;
6564 }
6565 
6566 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6567                             TCGv_i32 addr, int address_offset)
6568 {
6569     if (!a->p) {
6570         TCGv_i32 ofs = load_reg(s, a->rm);
6571         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6572         if (a->u) {
6573             tcg_gen_add_i32(addr, addr, ofs);
6574         } else {
6575             tcg_gen_sub_i32(addr, addr, ofs);
6576         }
6577     } else if (!a->w) {
6578         return;
6579     }
6580     tcg_gen_addi_i32(addr, addr, address_offset);
6581     store_reg(s, a->rn, addr);
6582 }
6583 
6584 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6585                        MemOp mop, int mem_idx)
6586 {
6587     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6588     TCGv_i32 addr, tmp;
6589 
6590     addr = op_addr_rr_pre(s, a);
6591 
6592     tmp = tcg_temp_new_i32();
6593     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6594     disas_set_da_iss(s, mop, issinfo);
6595 
6596     /*
6597      * Perform base writeback before the loaded value to
6598      * ensure correct behavior with overlapping index registers.
6599      */
6600     op_addr_rr_post(s, a, addr, 0);
6601     store_reg_from_load(s, a->rt, tmp);
6602     return true;
6603 }
6604 
6605 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6606                         MemOp mop, int mem_idx)
6607 {
6608     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6609     TCGv_i32 addr, tmp;
6610 
6611     /*
6612      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6613      * is either UNPREDICTABLE or has defined behaviour
6614      */
6615     if (s->thumb && a->rn == 15) {
6616         return false;
6617     }
6618 
6619     addr = op_addr_rr_pre(s, a);
6620 
6621     tmp = load_reg(s, a->rt);
6622     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6623     disas_set_da_iss(s, mop, issinfo);
6624 
6625     op_addr_rr_post(s, a, addr, 0);
6626     return true;
6627 }
6628 
6629 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6630 {
6631     int mem_idx = get_mem_index(s);
6632     TCGv_i32 addr, tmp;
6633 
6634     if (!ENABLE_ARCH_5TE) {
6635         return false;
6636     }
6637     if (a->rt & 1) {
6638         unallocated_encoding(s);
6639         return true;
6640     }
6641     addr = op_addr_rr_pre(s, a);
6642 
6643     tmp = tcg_temp_new_i32();
6644     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6645     store_reg(s, a->rt, tmp);
6646 
6647     tcg_gen_addi_i32(addr, addr, 4);
6648 
6649     tmp = tcg_temp_new_i32();
6650     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6651     store_reg(s, a->rt + 1, tmp);
6652 
6653     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6654     op_addr_rr_post(s, a, addr, -4);
6655     return true;
6656 }
6657 
6658 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6659 {
6660     int mem_idx = get_mem_index(s);
6661     TCGv_i32 addr, tmp;
6662 
6663     if (!ENABLE_ARCH_5TE) {
6664         return false;
6665     }
6666     if (a->rt & 1) {
6667         unallocated_encoding(s);
6668         return true;
6669     }
6670     addr = op_addr_rr_pre(s, a);
6671 
6672     tmp = load_reg(s, a->rt);
6673     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6674 
6675     tcg_gen_addi_i32(addr, addr, 4);
6676 
6677     tmp = load_reg(s, a->rt + 1);
6678     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6679 
6680     op_addr_rr_post(s, a, addr, -4);
6681     return true;
6682 }
6683 
6684 /*
6685  * Load/store immediate index
6686  */
6687 
6688 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6689 {
6690     int ofs = a->imm;
6691 
6692     if (!a->u) {
6693         ofs = -ofs;
6694     }
6695 
6696     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6697         /*
6698          * Stackcheck. Here we know 'addr' is the current SP;
6699          * U is set if we're moving SP up, else down. It is
6700          * UNKNOWN whether the limit check triggers when SP starts
6701          * below the limit and ends up above it; we chose to do so.
6702          */
6703         if (!a->u) {
6704             TCGv_i32 newsp = tcg_temp_new_i32();
6705             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6706             gen_helper_v8m_stackcheck(cpu_env, newsp);
6707         } else {
6708             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6709         }
6710     }
6711 
6712     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6713 }
6714 
6715 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6716                             TCGv_i32 addr, int address_offset)
6717 {
6718     if (!a->p) {
6719         if (a->u) {
6720             address_offset += a->imm;
6721         } else {
6722             address_offset -= a->imm;
6723         }
6724     } else if (!a->w) {
6725         return;
6726     }
6727     tcg_gen_addi_i32(addr, addr, address_offset);
6728     store_reg(s, a->rn, addr);
6729 }
6730 
6731 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6732                        MemOp mop, int mem_idx)
6733 {
6734     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6735     TCGv_i32 addr, tmp;
6736 
6737     addr = op_addr_ri_pre(s, a);
6738 
6739     tmp = tcg_temp_new_i32();
6740     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6741     disas_set_da_iss(s, mop, issinfo);
6742 
6743     /*
6744      * Perform base writeback before the loaded value to
6745      * ensure correct behavior with overlapping index registers.
6746      */
6747     op_addr_ri_post(s, a, addr, 0);
6748     store_reg_from_load(s, a->rt, tmp);
6749     return true;
6750 }
6751 
6752 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6753                         MemOp mop, int mem_idx)
6754 {
6755     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6756     TCGv_i32 addr, tmp;
6757 
6758     /*
6759      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6760      * is either UNPREDICTABLE or has defined behaviour
6761      */
6762     if (s->thumb && a->rn == 15) {
6763         return false;
6764     }
6765 
6766     addr = op_addr_ri_pre(s, a);
6767 
6768     tmp = load_reg(s, a->rt);
6769     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6770     disas_set_da_iss(s, mop, issinfo);
6771 
6772     op_addr_ri_post(s, a, addr, 0);
6773     return true;
6774 }
6775 
6776 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6777 {
6778     int mem_idx = get_mem_index(s);
6779     TCGv_i32 addr, tmp;
6780 
6781     addr = op_addr_ri_pre(s, a);
6782 
6783     tmp = tcg_temp_new_i32();
6784     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6785     store_reg(s, a->rt, tmp);
6786 
6787     tcg_gen_addi_i32(addr, addr, 4);
6788 
6789     tmp = tcg_temp_new_i32();
6790     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6791     store_reg(s, rt2, tmp);
6792 
6793     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6794     op_addr_ri_post(s, a, addr, -4);
6795     return true;
6796 }
6797 
6798 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6799 {
6800     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6801         return false;
6802     }
6803     return op_ldrd_ri(s, a, a->rt + 1);
6804 }
6805 
6806 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6807 {
6808     arg_ldst_ri b = {
6809         .u = a->u, .w = a->w, .p = a->p,
6810         .rn = a->rn, .rt = a->rt, .imm = a->imm
6811     };
6812     return op_ldrd_ri(s, &b, a->rt2);
6813 }
6814 
6815 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6816 {
6817     int mem_idx = get_mem_index(s);
6818     TCGv_i32 addr, tmp;
6819 
6820     addr = op_addr_ri_pre(s, a);
6821 
6822     tmp = load_reg(s, a->rt);
6823     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6824 
6825     tcg_gen_addi_i32(addr, addr, 4);
6826 
6827     tmp = load_reg(s, rt2);
6828     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6829 
6830     op_addr_ri_post(s, a, addr, -4);
6831     return true;
6832 }
6833 
6834 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6835 {
6836     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6837         return false;
6838     }
6839     return op_strd_ri(s, a, a->rt + 1);
6840 }
6841 
6842 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6843 {
6844     arg_ldst_ri b = {
6845         .u = a->u, .w = a->w, .p = a->p,
6846         .rn = a->rn, .rt = a->rt, .imm = a->imm
6847     };
6848     return op_strd_ri(s, &b, a->rt2);
6849 }
6850 
6851 #define DO_LDST(NAME, WHICH, MEMOP) \
6852 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6853 {                                                                     \
6854     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6855 }                                                                     \
6856 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6857 {                                                                     \
6858     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6859 }                                                                     \
6860 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6861 {                                                                     \
6862     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6863 }                                                                     \
6864 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6865 {                                                                     \
6866     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6867 }
6868 
6869 DO_LDST(LDR, load, MO_UL)
6870 DO_LDST(LDRB, load, MO_UB)
6871 DO_LDST(LDRH, load, MO_UW)
6872 DO_LDST(LDRSB, load, MO_SB)
6873 DO_LDST(LDRSH, load, MO_SW)
6874 
6875 DO_LDST(STR, store, MO_UL)
6876 DO_LDST(STRB, store, MO_UB)
6877 DO_LDST(STRH, store, MO_UW)
6878 
6879 #undef DO_LDST
6880 
6881 /*
6882  * Synchronization primitives
6883  */
6884 
6885 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6886 {
6887     TCGv_i32 addr, tmp;
6888     TCGv taddr;
6889 
6890     opc |= s->be_data;
6891     addr = load_reg(s, a->rn);
6892     taddr = gen_aa32_addr(s, addr, opc);
6893 
6894     tmp = load_reg(s, a->rt2);
6895     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6896 
6897     store_reg(s, a->rt, tmp);
6898     return true;
6899 }
6900 
6901 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6902 {
6903     return op_swp(s, a, MO_UL | MO_ALIGN);
6904 }
6905 
6906 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6907 {
6908     return op_swp(s, a, MO_UB);
6909 }
6910 
6911 /*
6912  * Load/Store Exclusive and Load-Acquire/Store-Release
6913  */
6914 
6915 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6916 {
6917     TCGv_i32 addr;
6918     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6919     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6920 
6921     /* We UNDEF for these UNPREDICTABLE cases.  */
6922     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6923         || a->rd == a->rn || a->rd == a->rt
6924         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6925         || (mop == MO_64
6926             && (a->rt2 == 15
6927                 || a->rd == a->rt2
6928                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6929         unallocated_encoding(s);
6930         return true;
6931     }
6932 
6933     if (rel) {
6934         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6935     }
6936 
6937     addr = tcg_temp_new_i32();
6938     load_reg_var(s, addr, a->rn);
6939     tcg_gen_addi_i32(addr, addr, a->imm);
6940 
6941     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6942     return true;
6943 }
6944 
6945 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6946 {
6947     if (!ENABLE_ARCH_6) {
6948         return false;
6949     }
6950     return op_strex(s, a, MO_32, false);
6951 }
6952 
6953 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6954 {
6955     if (!ENABLE_ARCH_6K) {
6956         return false;
6957     }
6958     /* We UNDEF for these UNPREDICTABLE cases.  */
6959     if (a->rt & 1) {
6960         unallocated_encoding(s);
6961         return true;
6962     }
6963     a->rt2 = a->rt + 1;
6964     return op_strex(s, a, MO_64, false);
6965 }
6966 
6967 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6968 {
6969     return op_strex(s, a, MO_64, false);
6970 }
6971 
6972 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6973 {
6974     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6975         return false;
6976     }
6977     return op_strex(s, a, MO_8, false);
6978 }
6979 
6980 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6981 {
6982     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6983         return false;
6984     }
6985     return op_strex(s, a, MO_16, false);
6986 }
6987 
6988 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6989 {
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     return op_strex(s, a, MO_32, true);
6994 }
6995 
6996 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6997 {
6998     if (!ENABLE_ARCH_8) {
6999         return false;
7000     }
7001     /* We UNDEF for these UNPREDICTABLE cases.  */
7002     if (a->rt & 1) {
7003         unallocated_encoding(s);
7004         return true;
7005     }
7006     a->rt2 = a->rt + 1;
7007     return op_strex(s, a, MO_64, true);
7008 }
7009 
7010 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7011 {
7012     if (!ENABLE_ARCH_8) {
7013         return false;
7014     }
7015     return op_strex(s, a, MO_64, true);
7016 }
7017 
7018 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7019 {
7020     if (!ENABLE_ARCH_8) {
7021         return false;
7022     }
7023     return op_strex(s, a, MO_8, true);
7024 }
7025 
7026 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7027 {
7028     if (!ENABLE_ARCH_8) {
7029         return false;
7030     }
7031     return op_strex(s, a, MO_16, true);
7032 }
7033 
7034 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7035 {
7036     TCGv_i32 addr, tmp;
7037 
7038     if (!ENABLE_ARCH_8) {
7039         return false;
7040     }
7041     /* We UNDEF for these UNPREDICTABLE cases.  */
7042     if (a->rn == 15 || a->rt == 15) {
7043         unallocated_encoding(s);
7044         return true;
7045     }
7046 
7047     addr = load_reg(s, a->rn);
7048     tmp = load_reg(s, a->rt);
7049     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7050     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7051     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7052 
7053     return true;
7054 }
7055 
7056 static bool trans_STL(DisasContext *s, arg_STL *a)
7057 {
7058     return op_stl(s, a, MO_UL);
7059 }
7060 
7061 static bool trans_STLB(DisasContext *s, arg_STL *a)
7062 {
7063     return op_stl(s, a, MO_UB);
7064 }
7065 
7066 static bool trans_STLH(DisasContext *s, arg_STL *a)
7067 {
7068     return op_stl(s, a, MO_UW);
7069 }
7070 
7071 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7072 {
7073     TCGv_i32 addr;
7074     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7075     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7076 
7077     /* We UNDEF for these UNPREDICTABLE cases.  */
7078     if (a->rn == 15 || a->rt == 15
7079         || (!v8a && s->thumb && a->rt == 13)
7080         || (mop == MO_64
7081             && (a->rt2 == 15 || a->rt == a->rt2
7082                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7083         unallocated_encoding(s);
7084         return true;
7085     }
7086 
7087     addr = tcg_temp_new_i32();
7088     load_reg_var(s, addr, a->rn);
7089     tcg_gen_addi_i32(addr, addr, a->imm);
7090 
7091     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7092 
7093     if (acq) {
7094         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7095     }
7096     return true;
7097 }
7098 
7099 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7100 {
7101     if (!ENABLE_ARCH_6) {
7102         return false;
7103     }
7104     return op_ldrex(s, a, MO_32, false);
7105 }
7106 
7107 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7108 {
7109     if (!ENABLE_ARCH_6K) {
7110         return false;
7111     }
7112     /* We UNDEF for these UNPREDICTABLE cases.  */
7113     if (a->rt & 1) {
7114         unallocated_encoding(s);
7115         return true;
7116     }
7117     a->rt2 = a->rt + 1;
7118     return op_ldrex(s, a, MO_64, false);
7119 }
7120 
7121 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7122 {
7123     return op_ldrex(s, a, MO_64, false);
7124 }
7125 
7126 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7127 {
7128     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7129         return false;
7130     }
7131     return op_ldrex(s, a, MO_8, false);
7132 }
7133 
7134 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7135 {
7136     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7137         return false;
7138     }
7139     return op_ldrex(s, a, MO_16, false);
7140 }
7141 
7142 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7143 {
7144     if (!ENABLE_ARCH_8) {
7145         return false;
7146     }
7147     return op_ldrex(s, a, MO_32, true);
7148 }
7149 
7150 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7151 {
7152     if (!ENABLE_ARCH_8) {
7153         return false;
7154     }
7155     /* We UNDEF for these UNPREDICTABLE cases.  */
7156     if (a->rt & 1) {
7157         unallocated_encoding(s);
7158         return true;
7159     }
7160     a->rt2 = a->rt + 1;
7161     return op_ldrex(s, a, MO_64, true);
7162 }
7163 
7164 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7165 {
7166     if (!ENABLE_ARCH_8) {
7167         return false;
7168     }
7169     return op_ldrex(s, a, MO_64, true);
7170 }
7171 
7172 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7173 {
7174     if (!ENABLE_ARCH_8) {
7175         return false;
7176     }
7177     return op_ldrex(s, a, MO_8, true);
7178 }
7179 
7180 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7181 {
7182     if (!ENABLE_ARCH_8) {
7183         return false;
7184     }
7185     return op_ldrex(s, a, MO_16, true);
7186 }
7187 
7188 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7189 {
7190     TCGv_i32 addr, tmp;
7191 
7192     if (!ENABLE_ARCH_8) {
7193         return false;
7194     }
7195     /* We UNDEF for these UNPREDICTABLE cases.  */
7196     if (a->rn == 15 || a->rt == 15) {
7197         unallocated_encoding(s);
7198         return true;
7199     }
7200 
7201     addr = load_reg(s, a->rn);
7202     tmp = tcg_temp_new_i32();
7203     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7204     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7205 
7206     store_reg(s, a->rt, tmp);
7207     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7208     return true;
7209 }
7210 
7211 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7212 {
7213     return op_lda(s, a, MO_UL);
7214 }
7215 
7216 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7217 {
7218     return op_lda(s, a, MO_UB);
7219 }
7220 
7221 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7222 {
7223     return op_lda(s, a, MO_UW);
7224 }
7225 
7226 /*
7227  * Media instructions
7228  */
7229 
7230 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7231 {
7232     TCGv_i32 t1, t2;
7233 
7234     if (!ENABLE_ARCH_6) {
7235         return false;
7236     }
7237 
7238     t1 = load_reg(s, a->rn);
7239     t2 = load_reg(s, a->rm);
7240     gen_helper_usad8(t1, t1, t2);
7241     if (a->ra != 15) {
7242         t2 = load_reg(s, a->ra);
7243         tcg_gen_add_i32(t1, t1, t2);
7244     }
7245     store_reg(s, a->rd, t1);
7246     return true;
7247 }
7248 
7249 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7250 {
7251     TCGv_i32 tmp;
7252     int width = a->widthm1 + 1;
7253     int shift = a->lsb;
7254 
7255     if (!ENABLE_ARCH_6T2) {
7256         return false;
7257     }
7258     if (shift + width > 32) {
7259         /* UNPREDICTABLE; we choose to UNDEF */
7260         unallocated_encoding(s);
7261         return true;
7262     }
7263 
7264     tmp = load_reg(s, a->rn);
7265     if (u) {
7266         tcg_gen_extract_i32(tmp, tmp, shift, width);
7267     } else {
7268         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7269     }
7270     store_reg(s, a->rd, tmp);
7271     return true;
7272 }
7273 
7274 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7275 {
7276     return op_bfx(s, a, false);
7277 }
7278 
7279 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7280 {
7281     return op_bfx(s, a, true);
7282 }
7283 
7284 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7285 {
7286     int msb = a->msb, lsb = a->lsb;
7287     TCGv_i32 t_in, t_rd;
7288     int width;
7289 
7290     if (!ENABLE_ARCH_6T2) {
7291         return false;
7292     }
7293     if (msb < lsb) {
7294         /* UNPREDICTABLE; we choose to UNDEF */
7295         unallocated_encoding(s);
7296         return true;
7297     }
7298 
7299     width = msb + 1 - lsb;
7300     if (a->rn == 15) {
7301         /* BFC */
7302         t_in = tcg_constant_i32(0);
7303     } else {
7304         /* BFI */
7305         t_in = load_reg(s, a->rn);
7306     }
7307     t_rd = load_reg(s, a->rd);
7308     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7309     store_reg(s, a->rd, t_rd);
7310     return true;
7311 }
7312 
7313 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7314 {
7315     unallocated_encoding(s);
7316     return true;
7317 }
7318 
7319 /*
7320  * Parallel addition and subtraction
7321  */
7322 
7323 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7324                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7325 {
7326     TCGv_i32 t0, t1;
7327 
7328     if (s->thumb
7329         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7330         : !ENABLE_ARCH_6) {
7331         return false;
7332     }
7333 
7334     t0 = load_reg(s, a->rn);
7335     t1 = load_reg(s, a->rm);
7336 
7337     gen(t0, t0, t1);
7338 
7339     store_reg(s, a->rd, t0);
7340     return true;
7341 }
7342 
7343 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7344                              void (*gen)(TCGv_i32, TCGv_i32,
7345                                          TCGv_i32, TCGv_ptr))
7346 {
7347     TCGv_i32 t0, t1;
7348     TCGv_ptr ge;
7349 
7350     if (s->thumb
7351         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7352         : !ENABLE_ARCH_6) {
7353         return false;
7354     }
7355 
7356     t0 = load_reg(s, a->rn);
7357     t1 = load_reg(s, a->rm);
7358 
7359     ge = tcg_temp_new_ptr();
7360     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7361     gen(t0, t0, t1, ge);
7362 
7363     store_reg(s, a->rd, t0);
7364     return true;
7365 }
7366 
7367 #define DO_PAR_ADDSUB(NAME, helper) \
7368 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7369 {                                                       \
7370     return op_par_addsub(s, a, helper);                 \
7371 }
7372 
7373 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7374 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7375 {                                                       \
7376     return op_par_addsub_ge(s, a, helper);              \
7377 }
7378 
7379 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7380 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7381 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7382 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7383 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7384 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7385 
7386 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7387 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7388 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7389 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7390 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7391 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7392 
7393 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7394 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7395 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7396 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7397 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7398 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7399 
7400 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7401 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7402 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7403 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7404 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7405 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7406 
7407 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7408 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7409 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7410 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7411 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7412 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7413 
7414 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7415 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7416 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7417 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7418 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7419 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7420 
7421 #undef DO_PAR_ADDSUB
7422 #undef DO_PAR_ADDSUB_GE
7423 
7424 /*
7425  * Packing, unpacking, saturation, and reversal
7426  */
7427 
7428 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7429 {
7430     TCGv_i32 tn, tm;
7431     int shift = a->imm;
7432 
7433     if (s->thumb
7434         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7435         : !ENABLE_ARCH_6) {
7436         return false;
7437     }
7438 
7439     tn = load_reg(s, a->rn);
7440     tm = load_reg(s, a->rm);
7441     if (a->tb) {
7442         /* PKHTB */
7443         if (shift == 0) {
7444             shift = 31;
7445         }
7446         tcg_gen_sari_i32(tm, tm, shift);
7447         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7448     } else {
7449         /* PKHBT */
7450         tcg_gen_shli_i32(tm, tm, shift);
7451         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7452     }
7453     store_reg(s, a->rd, tn);
7454     return true;
7455 }
7456 
7457 static bool op_sat(DisasContext *s, arg_sat *a,
7458                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7459 {
7460     TCGv_i32 tmp;
7461     int shift = a->imm;
7462 
7463     if (!ENABLE_ARCH_6) {
7464         return false;
7465     }
7466 
7467     tmp = load_reg(s, a->rn);
7468     if (a->sh) {
7469         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7470     } else {
7471         tcg_gen_shli_i32(tmp, tmp, shift);
7472     }
7473 
7474     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7475 
7476     store_reg(s, a->rd, tmp);
7477     return true;
7478 }
7479 
7480 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7481 {
7482     return op_sat(s, a, gen_helper_ssat);
7483 }
7484 
7485 static bool trans_USAT(DisasContext *s, arg_sat *a)
7486 {
7487     return op_sat(s, a, gen_helper_usat);
7488 }
7489 
7490 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7491 {
7492     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7493         return false;
7494     }
7495     return op_sat(s, a, gen_helper_ssat16);
7496 }
7497 
7498 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7499 {
7500     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7501         return false;
7502     }
7503     return op_sat(s, a, gen_helper_usat16);
7504 }
7505 
7506 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7507                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7508                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7509 {
7510     TCGv_i32 tmp;
7511 
7512     if (!ENABLE_ARCH_6) {
7513         return false;
7514     }
7515 
7516     tmp = load_reg(s, a->rm);
7517     /*
7518      * TODO: In many cases we could do a shift instead of a rotate.
7519      * Combined with a simple extend, that becomes an extract.
7520      */
7521     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7522     gen_extract(tmp, tmp);
7523 
7524     if (a->rn != 15) {
7525         TCGv_i32 tmp2 = load_reg(s, a->rn);
7526         gen_add(tmp, tmp, tmp2);
7527     }
7528     store_reg(s, a->rd, tmp);
7529     return true;
7530 }
7531 
7532 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7533 {
7534     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7535 }
7536 
7537 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7538 {
7539     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7540 }
7541 
7542 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7543 {
7544     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7545         return false;
7546     }
7547     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7548 }
7549 
7550 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7551 {
7552     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7553 }
7554 
7555 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7556 {
7557     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7558 }
7559 
7560 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7561 {
7562     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7563         return false;
7564     }
7565     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7566 }
7567 
7568 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7569 {
7570     TCGv_i32 t1, t2, t3;
7571 
7572     if (s->thumb
7573         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7574         : !ENABLE_ARCH_6) {
7575         return false;
7576     }
7577 
7578     t1 = load_reg(s, a->rn);
7579     t2 = load_reg(s, a->rm);
7580     t3 = tcg_temp_new_i32();
7581     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7582     gen_helper_sel_flags(t1, t3, t1, t2);
7583     store_reg(s, a->rd, t1);
7584     return true;
7585 }
7586 
7587 static bool op_rr(DisasContext *s, arg_rr *a,
7588                   void (*gen)(TCGv_i32, TCGv_i32))
7589 {
7590     TCGv_i32 tmp;
7591 
7592     tmp = load_reg(s, a->rm);
7593     gen(tmp, tmp);
7594     store_reg(s, a->rd, tmp);
7595     return true;
7596 }
7597 
7598 static bool trans_REV(DisasContext *s, arg_rr *a)
7599 {
7600     if (!ENABLE_ARCH_6) {
7601         return false;
7602     }
7603     return op_rr(s, a, tcg_gen_bswap32_i32);
7604 }
7605 
7606 static bool trans_REV16(DisasContext *s, arg_rr *a)
7607 {
7608     if (!ENABLE_ARCH_6) {
7609         return false;
7610     }
7611     return op_rr(s, a, gen_rev16);
7612 }
7613 
7614 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7615 {
7616     if (!ENABLE_ARCH_6) {
7617         return false;
7618     }
7619     return op_rr(s, a, gen_revsh);
7620 }
7621 
7622 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7623 {
7624     if (!ENABLE_ARCH_6T2) {
7625         return false;
7626     }
7627     return op_rr(s, a, gen_helper_rbit);
7628 }
7629 
7630 /*
7631  * Signed multiply, signed and unsigned divide
7632  */
7633 
7634 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7635 {
7636     TCGv_i32 t1, t2;
7637 
7638     if (!ENABLE_ARCH_6) {
7639         return false;
7640     }
7641 
7642     t1 = load_reg(s, a->rn);
7643     t2 = load_reg(s, a->rm);
7644     if (m_swap) {
7645         gen_swap_half(t2, t2);
7646     }
7647     gen_smul_dual(t1, t2);
7648 
7649     if (sub) {
7650         /*
7651          * This subtraction cannot overflow, so we can do a simple
7652          * 32-bit subtraction and then a possible 32-bit saturating
7653          * addition of Ra.
7654          */
7655         tcg_gen_sub_i32(t1, t1, t2);
7656 
7657         if (a->ra != 15) {
7658             t2 = load_reg(s, a->ra);
7659             gen_helper_add_setq(t1, cpu_env, t1, t2);
7660         }
7661     } else if (a->ra == 15) {
7662         /* Single saturation-checking addition */
7663         gen_helper_add_setq(t1, cpu_env, t1, t2);
7664     } else {
7665         /*
7666          * We need to add the products and Ra together and then
7667          * determine whether the final result overflowed. Doing
7668          * this as two separate add-and-check-overflow steps incorrectly
7669          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7670          * Do all the arithmetic at 64-bits and then check for overflow.
7671          */
7672         TCGv_i64 p64, q64;
7673         TCGv_i32 t3, qf, one;
7674 
7675         p64 = tcg_temp_new_i64();
7676         q64 = tcg_temp_new_i64();
7677         tcg_gen_ext_i32_i64(p64, t1);
7678         tcg_gen_ext_i32_i64(q64, t2);
7679         tcg_gen_add_i64(p64, p64, q64);
7680         load_reg_var(s, t2, a->ra);
7681         tcg_gen_ext_i32_i64(q64, t2);
7682         tcg_gen_add_i64(p64, p64, q64);
7683 
7684         tcg_gen_extr_i64_i32(t1, t2, p64);
7685         /*
7686          * t1 is the low half of the result which goes into Rd.
7687          * We have overflow and must set Q if the high half (t2)
7688          * is different from the sign-extension of t1.
7689          */
7690         t3 = tcg_temp_new_i32();
7691         tcg_gen_sari_i32(t3, t1, 31);
7692         qf = load_cpu_field(QF);
7693         one = tcg_constant_i32(1);
7694         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7695         store_cpu_field(qf, QF);
7696     }
7697     store_reg(s, a->rd, t1);
7698     return true;
7699 }
7700 
7701 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7702 {
7703     return op_smlad(s, a, false, false);
7704 }
7705 
7706 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7707 {
7708     return op_smlad(s, a, true, false);
7709 }
7710 
7711 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7712 {
7713     return op_smlad(s, a, false, true);
7714 }
7715 
7716 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7717 {
7718     return op_smlad(s, a, true, true);
7719 }
7720 
7721 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7722 {
7723     TCGv_i32 t1, t2;
7724     TCGv_i64 l1, l2;
7725 
7726     if (!ENABLE_ARCH_6) {
7727         return false;
7728     }
7729 
7730     t1 = load_reg(s, a->rn);
7731     t2 = load_reg(s, a->rm);
7732     if (m_swap) {
7733         gen_swap_half(t2, t2);
7734     }
7735     gen_smul_dual(t1, t2);
7736 
7737     l1 = tcg_temp_new_i64();
7738     l2 = tcg_temp_new_i64();
7739     tcg_gen_ext_i32_i64(l1, t1);
7740     tcg_gen_ext_i32_i64(l2, t2);
7741 
7742     if (sub) {
7743         tcg_gen_sub_i64(l1, l1, l2);
7744     } else {
7745         tcg_gen_add_i64(l1, l1, l2);
7746     }
7747 
7748     gen_addq(s, l1, a->ra, a->rd);
7749     gen_storeq_reg(s, a->ra, a->rd, l1);
7750     return true;
7751 }
7752 
7753 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7754 {
7755     return op_smlald(s, a, false, false);
7756 }
7757 
7758 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7759 {
7760     return op_smlald(s, a, true, false);
7761 }
7762 
7763 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7764 {
7765     return op_smlald(s, a, false, true);
7766 }
7767 
7768 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7769 {
7770     return op_smlald(s, a, true, true);
7771 }
7772 
7773 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7774 {
7775     TCGv_i32 t1, t2;
7776 
7777     if (s->thumb
7778         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7779         : !ENABLE_ARCH_6) {
7780         return false;
7781     }
7782 
7783     t1 = load_reg(s, a->rn);
7784     t2 = load_reg(s, a->rm);
7785     tcg_gen_muls2_i32(t2, t1, t1, t2);
7786 
7787     if (a->ra != 15) {
7788         TCGv_i32 t3 = load_reg(s, a->ra);
7789         if (sub) {
7790             /*
7791              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7792              * a non-zero multiplicand lowpart, and the correct result
7793              * lowpart for rounding.
7794              */
7795             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7796         } else {
7797             tcg_gen_add_i32(t1, t1, t3);
7798         }
7799     }
7800     if (round) {
7801         /*
7802          * Adding 0x80000000 to the 64-bit quantity means that we have
7803          * carry in to the high word when the low word has the msb set.
7804          */
7805         tcg_gen_shri_i32(t2, t2, 31);
7806         tcg_gen_add_i32(t1, t1, t2);
7807     }
7808     store_reg(s, a->rd, t1);
7809     return true;
7810 }
7811 
7812 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7813 {
7814     return op_smmla(s, a, false, false);
7815 }
7816 
7817 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7818 {
7819     return op_smmla(s, a, true, false);
7820 }
7821 
7822 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7823 {
7824     return op_smmla(s, a, false, true);
7825 }
7826 
7827 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7828 {
7829     return op_smmla(s, a, true, true);
7830 }
7831 
7832 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7833 {
7834     TCGv_i32 t1, t2;
7835 
7836     if (s->thumb
7837         ? !dc_isar_feature(aa32_thumb_div, s)
7838         : !dc_isar_feature(aa32_arm_div, s)) {
7839         return false;
7840     }
7841 
7842     t1 = load_reg(s, a->rn);
7843     t2 = load_reg(s, a->rm);
7844     if (u) {
7845         gen_helper_udiv(t1, cpu_env, t1, t2);
7846     } else {
7847         gen_helper_sdiv(t1, cpu_env, t1, t2);
7848     }
7849     store_reg(s, a->rd, t1);
7850     return true;
7851 }
7852 
7853 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7854 {
7855     return op_div(s, a, false);
7856 }
7857 
7858 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7859 {
7860     return op_div(s, a, true);
7861 }
7862 
7863 /*
7864  * Block data transfer
7865  */
7866 
7867 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7868 {
7869     TCGv_i32 addr = load_reg(s, a->rn);
7870 
7871     if (a->b) {
7872         if (a->i) {
7873             /* pre increment */
7874             tcg_gen_addi_i32(addr, addr, 4);
7875         } else {
7876             /* pre decrement */
7877             tcg_gen_addi_i32(addr, addr, -(n * 4));
7878         }
7879     } else if (!a->i && n != 1) {
7880         /* post decrement */
7881         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7882     }
7883 
7884     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7885         /*
7886          * If the writeback is incrementing SP rather than
7887          * decrementing it, and the initial SP is below the
7888          * stack limit but the final written-back SP would
7889          * be above, then we must not perform any memory
7890          * accesses, but it is IMPDEF whether we generate
7891          * an exception. We choose to do so in this case.
7892          * At this point 'addr' is the lowest address, so
7893          * either the original SP (if incrementing) or our
7894          * final SP (if decrementing), so that's what we check.
7895          */
7896         gen_helper_v8m_stackcheck(cpu_env, addr);
7897     }
7898 
7899     return addr;
7900 }
7901 
7902 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7903                                TCGv_i32 addr, int n)
7904 {
7905     if (a->w) {
7906         /* write back */
7907         if (!a->b) {
7908             if (a->i) {
7909                 /* post increment */
7910                 tcg_gen_addi_i32(addr, addr, 4);
7911             } else {
7912                 /* post decrement */
7913                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7914             }
7915         } else if (!a->i && n != 1) {
7916             /* pre decrement */
7917             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7918         }
7919         store_reg(s, a->rn, addr);
7920     }
7921 }
7922 
7923 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7924 {
7925     int i, j, n, list, mem_idx;
7926     bool user = a->u;
7927     TCGv_i32 addr, tmp;
7928 
7929     if (user) {
7930         /* STM (user) */
7931         if (IS_USER(s)) {
7932             /* Only usable in supervisor mode.  */
7933             unallocated_encoding(s);
7934             return true;
7935         }
7936     }
7937 
7938     list = a->list;
7939     n = ctpop16(list);
7940     if (n < min_n || a->rn == 15) {
7941         unallocated_encoding(s);
7942         return true;
7943     }
7944 
7945     s->eci_handled = true;
7946 
7947     addr = op_addr_block_pre(s, a, n);
7948     mem_idx = get_mem_index(s);
7949 
7950     for (i = j = 0; i < 16; i++) {
7951         if (!(list & (1 << i))) {
7952             continue;
7953         }
7954 
7955         if (user && i != 15) {
7956             tmp = tcg_temp_new_i32();
7957             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7958         } else {
7959             tmp = load_reg(s, i);
7960         }
7961         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7962 
7963         /* No need to add after the last transfer.  */
7964         if (++j != n) {
7965             tcg_gen_addi_i32(addr, addr, 4);
7966         }
7967     }
7968 
7969     op_addr_block_post(s, a, addr, n);
7970     clear_eci_state(s);
7971     return true;
7972 }
7973 
7974 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7975 {
7976     /* BitCount(list) < 1 is UNPREDICTABLE */
7977     return op_stm(s, a, 1);
7978 }
7979 
7980 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7981 {
7982     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7983     if (a->w && (a->list & (1 << a->rn))) {
7984         unallocated_encoding(s);
7985         return true;
7986     }
7987     /* BitCount(list) < 2 is UNPREDICTABLE */
7988     return op_stm(s, a, 2);
7989 }
7990 
7991 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7992 {
7993     int i, j, n, list, mem_idx;
7994     bool loaded_base;
7995     bool user = a->u;
7996     bool exc_return = false;
7997     TCGv_i32 addr, tmp, loaded_var;
7998 
7999     if (user) {
8000         /* LDM (user), LDM (exception return) */
8001         if (IS_USER(s)) {
8002             /* Only usable in supervisor mode.  */
8003             unallocated_encoding(s);
8004             return true;
8005         }
8006         if (extract32(a->list, 15, 1)) {
8007             exc_return = true;
8008             user = false;
8009         } else {
8010             /* LDM (user) does not allow writeback.  */
8011             if (a->w) {
8012                 unallocated_encoding(s);
8013                 return true;
8014             }
8015         }
8016     }
8017 
8018     list = a->list;
8019     n = ctpop16(list);
8020     if (n < min_n || a->rn == 15) {
8021         unallocated_encoding(s);
8022         return true;
8023     }
8024 
8025     s->eci_handled = true;
8026 
8027     addr = op_addr_block_pre(s, a, n);
8028     mem_idx = get_mem_index(s);
8029     loaded_base = false;
8030     loaded_var = NULL;
8031 
8032     for (i = j = 0; i < 16; i++) {
8033         if (!(list & (1 << i))) {
8034             continue;
8035         }
8036 
8037         tmp = tcg_temp_new_i32();
8038         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8039         if (user) {
8040             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8041         } else if (i == a->rn) {
8042             loaded_var = tmp;
8043             loaded_base = true;
8044         } else if (i == 15 && exc_return) {
8045             store_pc_exc_ret(s, tmp);
8046         } else {
8047             store_reg_from_load(s, i, tmp);
8048         }
8049 
8050         /* No need to add after the last transfer.  */
8051         if (++j != n) {
8052             tcg_gen_addi_i32(addr, addr, 4);
8053         }
8054     }
8055 
8056     op_addr_block_post(s, a, addr, n);
8057 
8058     if (loaded_base) {
8059         /* Note that we reject base == pc above.  */
8060         store_reg(s, a->rn, loaded_var);
8061     }
8062 
8063     if (exc_return) {
8064         /* Restore CPSR from SPSR.  */
8065         tmp = load_cpu_field(spsr);
8066         translator_io_start(&s->base);
8067         gen_helper_cpsr_write_eret(cpu_env, tmp);
8068         /* Must exit loop to check un-masked IRQs */
8069         s->base.is_jmp = DISAS_EXIT;
8070     }
8071     clear_eci_state(s);
8072     return true;
8073 }
8074 
8075 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8076 {
8077     /*
8078      * Writeback register in register list is UNPREDICTABLE
8079      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8080      * an UNKNOWN value to the base register.
8081      */
8082     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8083         unallocated_encoding(s);
8084         return true;
8085     }
8086     /* BitCount(list) < 1 is UNPREDICTABLE */
8087     return do_ldm(s, a, 1);
8088 }
8089 
8090 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8091 {
8092     /* Writeback register in register list is UNPREDICTABLE for T32. */
8093     if (a->w && (a->list & (1 << a->rn))) {
8094         unallocated_encoding(s);
8095         return true;
8096     }
8097     /* BitCount(list) < 2 is UNPREDICTABLE */
8098     return do_ldm(s, a, 2);
8099 }
8100 
8101 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8102 {
8103     /* Writeback is conditional on the base register not being loaded.  */
8104     a->w = !(a->list & (1 << a->rn));
8105     /* BitCount(list) < 1 is UNPREDICTABLE */
8106     return do_ldm(s, a, 1);
8107 }
8108 
8109 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8110 {
8111     int i;
8112     TCGv_i32 zero;
8113 
8114     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8115         return false;
8116     }
8117 
8118     if (extract32(a->list, 13, 1)) {
8119         return false;
8120     }
8121 
8122     if (!a->list) {
8123         /* UNPREDICTABLE; we choose to UNDEF */
8124         return false;
8125     }
8126 
8127     s->eci_handled = true;
8128 
8129     zero = tcg_constant_i32(0);
8130     for (i = 0; i < 15; i++) {
8131         if (extract32(a->list, i, 1)) {
8132             /* Clear R[i] */
8133             tcg_gen_mov_i32(cpu_R[i], zero);
8134         }
8135     }
8136     if (extract32(a->list, 15, 1)) {
8137         /*
8138          * Clear APSR (by calling the MSR helper with the same argument
8139          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8140          */
8141         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8142     }
8143     clear_eci_state(s);
8144     return true;
8145 }
8146 
8147 /*
8148  * Branch, branch with link
8149  */
8150 
8151 static bool trans_B(DisasContext *s, arg_i *a)
8152 {
8153     gen_jmp(s, jmp_diff(s, a->imm));
8154     return true;
8155 }
8156 
8157 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8158 {
8159     /* This has cond from encoding, required to be outside IT block.  */
8160     if (a->cond >= 0xe) {
8161         return false;
8162     }
8163     if (s->condexec_mask) {
8164         unallocated_encoding(s);
8165         return true;
8166     }
8167     arm_skip_unless(s, a->cond);
8168     gen_jmp(s, jmp_diff(s, a->imm));
8169     return true;
8170 }
8171 
8172 static bool trans_BL(DisasContext *s, arg_i *a)
8173 {
8174     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8175     gen_jmp(s, jmp_diff(s, a->imm));
8176     return true;
8177 }
8178 
8179 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8180 {
8181     /*
8182      * BLX <imm> would be useless on M-profile; the encoding space
8183      * is used for other insns from v8.1M onward, and UNDEFs before that.
8184      */
8185     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8186         return false;
8187     }
8188 
8189     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8190     if (s->thumb && (a->imm & 2)) {
8191         return false;
8192     }
8193     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8194     store_cpu_field_constant(!s->thumb, thumb);
8195     /* This jump is computed from an aligned PC: subtract off the low bits. */
8196     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8197     return true;
8198 }
8199 
8200 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8201 {
8202     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8203     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8204     return true;
8205 }
8206 
8207 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8208 {
8209     TCGv_i32 tmp = tcg_temp_new_i32();
8210 
8211     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8212     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8213     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8214     gen_bx(s, tmp);
8215     return true;
8216 }
8217 
8218 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8219 {
8220     TCGv_i32 tmp;
8221 
8222     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8223     if (!ENABLE_ARCH_5) {
8224         return false;
8225     }
8226     tmp = tcg_temp_new_i32();
8227     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8228     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8229     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8230     gen_bx(s, tmp);
8231     return true;
8232 }
8233 
8234 static bool trans_BF(DisasContext *s, arg_BF *a)
8235 {
8236     /*
8237      * M-profile branch future insns. The architecture permits an
8238      * implementation to implement these as NOPs (equivalent to
8239      * discarding the LO_BRANCH_INFO cache immediately), and we
8240      * take that IMPDEF option because for QEMU a "real" implementation
8241      * would be complicated and wouldn't execute any faster.
8242      */
8243     if (!dc_isar_feature(aa32_lob, s)) {
8244         return false;
8245     }
8246     if (a->boff == 0) {
8247         /* SEE "Related encodings" (loop insns) */
8248         return false;
8249     }
8250     /* Handle as NOP */
8251     return true;
8252 }
8253 
8254 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8255 {
8256     /* M-profile low-overhead loop start */
8257     TCGv_i32 tmp;
8258 
8259     if (!dc_isar_feature(aa32_lob, s)) {
8260         return false;
8261     }
8262     if (a->rn == 13 || a->rn == 15) {
8263         /*
8264          * For DLSTP rn == 15 is a related encoding (LCTP); the
8265          * other cases caught by this condition are all
8266          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8267          */
8268         return false;
8269     }
8270 
8271     if (a->size != 4) {
8272         /* DLSTP */
8273         if (!dc_isar_feature(aa32_mve, s)) {
8274             return false;
8275         }
8276         if (!vfp_access_check(s)) {
8277             return true;
8278         }
8279     }
8280 
8281     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8282     tmp = load_reg(s, a->rn);
8283     store_reg(s, 14, tmp);
8284     if (a->size != 4) {
8285         /* DLSTP: set FPSCR.LTPSIZE */
8286         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8287         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8288     }
8289     return true;
8290 }
8291 
8292 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8293 {
8294     /* M-profile low-overhead while-loop start */
8295     TCGv_i32 tmp;
8296     DisasLabel nextlabel;
8297 
8298     if (!dc_isar_feature(aa32_lob, s)) {
8299         return false;
8300     }
8301     if (a->rn == 13 || a->rn == 15) {
8302         /*
8303          * For WLSTP rn == 15 is a related encoding (LE); the
8304          * other cases caught by this condition are all
8305          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8306          */
8307         return false;
8308     }
8309     if (s->condexec_mask) {
8310         /*
8311          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8312          * we choose to UNDEF, because otherwise our use of
8313          * gen_goto_tb(1) would clash with the use of TB exit 1
8314          * in the dc->condjmp condition-failed codepath in
8315          * arm_tr_tb_stop() and we'd get an assertion.
8316          */
8317         return false;
8318     }
8319     if (a->size != 4) {
8320         /* WLSTP */
8321         if (!dc_isar_feature(aa32_mve, s)) {
8322             return false;
8323         }
8324         /*
8325          * We need to check that the FPU is enabled here, but mustn't
8326          * call vfp_access_check() to do that because we don't want to
8327          * do the lazy state preservation in the "loop count is zero" case.
8328          * Do the check-and-raise-exception by hand.
8329          */
8330         if (s->fp_excp_el) {
8331             gen_exception_insn_el(s, 0, EXCP_NOCP,
8332                                   syn_uncategorized(), s->fp_excp_el);
8333             return true;
8334         }
8335     }
8336 
8337     nextlabel = gen_disas_label(s);
8338     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8339     tmp = load_reg(s, a->rn);
8340     store_reg(s, 14, tmp);
8341     if (a->size != 4) {
8342         /*
8343          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8344          * lazy state preservation, new FP context creation, etc,
8345          * that vfp_access_check() does. We know that the actual
8346          * access check will succeed (ie it won't generate code that
8347          * throws an exception) because we did that check by hand earlier.
8348          */
8349         bool ok = vfp_access_check(s);
8350         assert(ok);
8351         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8352         /*
8353          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8354          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8355          */
8356     }
8357     gen_jmp_tb(s, curr_insn_len(s), 1);
8358 
8359     set_disas_label(s, nextlabel);
8360     gen_jmp(s, jmp_diff(s, a->imm));
8361     return true;
8362 }
8363 
8364 static bool trans_LE(DisasContext *s, arg_LE *a)
8365 {
8366     /*
8367      * M-profile low-overhead loop end. The architecture permits an
8368      * implementation to discard the LO_BRANCH_INFO cache at any time,
8369      * and we take the IMPDEF option to never set it in the first place
8370      * (equivalent to always discarding it immediately), because for QEMU
8371      * a "real" implementation would be complicated and wouldn't execute
8372      * any faster.
8373      */
8374     TCGv_i32 tmp;
8375     DisasLabel loopend;
8376     bool fpu_active;
8377 
8378     if (!dc_isar_feature(aa32_lob, s)) {
8379         return false;
8380     }
8381     if (a->f && a->tp) {
8382         return false;
8383     }
8384     if (s->condexec_mask) {
8385         /*
8386          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8387          * we choose to UNDEF, because otherwise our use of
8388          * gen_goto_tb(1) would clash with the use of TB exit 1
8389          * in the dc->condjmp condition-failed codepath in
8390          * arm_tr_tb_stop() and we'd get an assertion.
8391          */
8392         return false;
8393     }
8394     if (a->tp) {
8395         /* LETP */
8396         if (!dc_isar_feature(aa32_mve, s)) {
8397             return false;
8398         }
8399         if (!vfp_access_check(s)) {
8400             s->eci_handled = true;
8401             return true;
8402         }
8403     }
8404 
8405     /* LE/LETP is OK with ECI set and leaves it untouched */
8406     s->eci_handled = true;
8407 
8408     /*
8409      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8410      * UsageFault exception for the LE insn in that case. Note that we
8411      * are not directly checking FPSCR.LTPSIZE but instead check the
8412      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8413      * not currently active (ie ActiveFPState() returns false). We
8414      * can identify not-active purely from our TB state flags, as the
8415      * FPU is active only if:
8416      *  the FPU is enabled
8417      *  AND lazy state preservation is not active
8418      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8419      *
8420      * Usually we don't need to care about this distinction between
8421      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8422      * will either take an exception or clear the conditions that make
8423      * the FPU not active. But LE is an unusual case of a non-FP insn
8424      * that looks at LTPSIZE.
8425      */
8426     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8427 
8428     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8429         /* Need to do a runtime check for LTPSIZE != 4 */
8430         DisasLabel skipexc = gen_disas_label(s);
8431         tmp = load_cpu_field(v7m.ltpsize);
8432         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8433         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8434         set_disas_label(s, skipexc);
8435     }
8436 
8437     if (a->f) {
8438         /* Loop-forever: just jump back to the loop start */
8439         gen_jmp(s, jmp_diff(s, -a->imm));
8440         return true;
8441     }
8442 
8443     /*
8444      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8445      * For LE, we know at this point that LTPSIZE must be 4 and the
8446      * loop decrement value is 1. For LETP we need to calculate the decrement
8447      * value from LTPSIZE.
8448      */
8449     loopend = gen_disas_label(s);
8450     if (!a->tp) {
8451         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8452         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8453     } else {
8454         /*
8455          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8456          * so that decr stays live after the brcondi.
8457          */
8458         TCGv_i32 decr = tcg_temp_new_i32();
8459         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8460         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8461         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8462 
8463         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8464 
8465         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8466     }
8467     /* Jump back to the loop start */
8468     gen_jmp(s, jmp_diff(s, -a->imm));
8469 
8470     set_disas_label(s, loopend);
8471     if (a->tp) {
8472         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8473         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8474     }
8475     /* End TB, continuing to following insn */
8476     gen_jmp_tb(s, curr_insn_len(s), 1);
8477     return true;
8478 }
8479 
8480 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8481 {
8482     /*
8483      * M-profile Loop Clear with Tail Predication. Since our implementation
8484      * doesn't cache branch information, all we need to do is reset
8485      * FPSCR.LTPSIZE to 4.
8486      */
8487 
8488     if (!dc_isar_feature(aa32_lob, s) ||
8489         !dc_isar_feature(aa32_mve, s)) {
8490         return false;
8491     }
8492 
8493     if (!vfp_access_check(s)) {
8494         return true;
8495     }
8496 
8497     store_cpu_field_constant(4, v7m.ltpsize);
8498     return true;
8499 }
8500 
8501 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8502 {
8503     /*
8504      * M-profile Create Vector Tail Predicate. This insn is itself
8505      * predicated and is subject to beatwise execution.
8506      */
8507     TCGv_i32 rn_shifted, masklen;
8508 
8509     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8510         return false;
8511     }
8512 
8513     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8514         return true;
8515     }
8516 
8517     /*
8518      * We pre-calculate the mask length here to avoid having
8519      * to have multiple helpers specialized for size.
8520      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8521      */
8522     rn_shifted = tcg_temp_new_i32();
8523     masklen = load_reg(s, a->rn);
8524     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8525     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8526                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8527                         rn_shifted, tcg_constant_i32(16));
8528     gen_helper_mve_vctp(cpu_env, masklen);
8529     /* This insn updates predication bits */
8530     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8531     mve_update_eci(s);
8532     return true;
8533 }
8534 
8535 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8536 {
8537     TCGv_i32 addr, tmp;
8538 
8539     tmp = load_reg(s, a->rm);
8540     if (half) {
8541         tcg_gen_add_i32(tmp, tmp, tmp);
8542     }
8543     addr = load_reg(s, a->rn);
8544     tcg_gen_add_i32(addr, addr, tmp);
8545 
8546     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8547 
8548     tcg_gen_add_i32(tmp, tmp, tmp);
8549     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8550     tcg_gen_add_i32(tmp, tmp, addr);
8551     store_reg(s, 15, tmp);
8552     return true;
8553 }
8554 
8555 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8556 {
8557     return op_tbranch(s, a, false);
8558 }
8559 
8560 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8561 {
8562     return op_tbranch(s, a, true);
8563 }
8564 
8565 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8566 {
8567     TCGv_i32 tmp = load_reg(s, a->rn);
8568 
8569     arm_gen_condlabel(s);
8570     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8571                         tmp, 0, s->condlabel.label);
8572     gen_jmp(s, jmp_diff(s, a->imm));
8573     return true;
8574 }
8575 
8576 /*
8577  * Supervisor call - both T32 & A32 come here so we need to check
8578  * which mode we are in when checking for semihosting.
8579  */
8580 
8581 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8582 {
8583     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8584 
8585     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8586         semihosting_enabled(s->current_el == 0) &&
8587         (a->imm == semihost_imm)) {
8588         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8589     } else {
8590         if (s->fgt_svc) {
8591             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8592             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8593         } else {
8594             gen_update_pc(s, curr_insn_len(s));
8595             s->svc_imm = a->imm;
8596             s->base.is_jmp = DISAS_SWI;
8597         }
8598     }
8599     return true;
8600 }
8601 
8602 /*
8603  * Unconditional system instructions
8604  */
8605 
8606 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8607 {
8608     static const int8_t pre_offset[4] = {
8609         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8610     };
8611     static const int8_t post_offset[4] = {
8612         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8613     };
8614     TCGv_i32 addr, t1, t2;
8615 
8616     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8617         return false;
8618     }
8619     if (IS_USER(s)) {
8620         unallocated_encoding(s);
8621         return true;
8622     }
8623 
8624     addr = load_reg(s, a->rn);
8625     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8626 
8627     /* Load PC into tmp and CPSR into tmp2.  */
8628     t1 = tcg_temp_new_i32();
8629     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8630     tcg_gen_addi_i32(addr, addr, 4);
8631     t2 = tcg_temp_new_i32();
8632     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8633 
8634     if (a->w) {
8635         /* Base writeback.  */
8636         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8637         store_reg(s, a->rn, addr);
8638     }
8639     gen_rfe(s, t1, t2);
8640     return true;
8641 }
8642 
8643 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8644 {
8645     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8646         return false;
8647     }
8648     gen_srs(s, a->mode, a->pu, a->w);
8649     return true;
8650 }
8651 
8652 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8653 {
8654     uint32_t mask, val;
8655 
8656     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8657         return false;
8658     }
8659     if (IS_USER(s)) {
8660         /* Implemented as NOP in user mode.  */
8661         return true;
8662     }
8663     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8664 
8665     mask = val = 0;
8666     if (a->imod & 2) {
8667         if (a->A) {
8668             mask |= CPSR_A;
8669         }
8670         if (a->I) {
8671             mask |= CPSR_I;
8672         }
8673         if (a->F) {
8674             mask |= CPSR_F;
8675         }
8676         if (a->imod & 1) {
8677             val |= mask;
8678         }
8679     }
8680     if (a->M) {
8681         mask |= CPSR_M;
8682         val |= a->mode;
8683     }
8684     if (mask) {
8685         gen_set_psr_im(s, mask, 0, val);
8686     }
8687     return true;
8688 }
8689 
8690 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8691 {
8692     TCGv_i32 tmp, addr;
8693 
8694     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8695         return false;
8696     }
8697     if (IS_USER(s)) {
8698         /* Implemented as NOP in user mode.  */
8699         return true;
8700     }
8701 
8702     tmp = tcg_constant_i32(a->im);
8703     /* FAULTMASK */
8704     if (a->F) {
8705         addr = tcg_constant_i32(19);
8706         gen_helper_v7m_msr(cpu_env, addr, tmp);
8707     }
8708     /* PRIMASK */
8709     if (a->I) {
8710         addr = tcg_constant_i32(16);
8711         gen_helper_v7m_msr(cpu_env, addr, tmp);
8712     }
8713     gen_rebuild_hflags(s, false);
8714     gen_lookup_tb(s);
8715     return true;
8716 }
8717 
8718 /*
8719  * Clear-Exclusive, Barriers
8720  */
8721 
8722 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8723 {
8724     if (s->thumb
8725         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8726         : !ENABLE_ARCH_6K) {
8727         return false;
8728     }
8729     gen_clrex(s);
8730     return true;
8731 }
8732 
8733 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8734 {
8735     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8736         return false;
8737     }
8738     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8739     return true;
8740 }
8741 
8742 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8743 {
8744     return trans_DSB(s, NULL);
8745 }
8746 
8747 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8748 {
8749     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8750         return false;
8751     }
8752     /*
8753      * We need to break the TB after this insn to execute
8754      * self-modifying code correctly and also to take
8755      * any pending interrupts immediately.
8756      */
8757     s->base.is_jmp = DISAS_TOO_MANY;
8758     return true;
8759 }
8760 
8761 static bool trans_SB(DisasContext *s, arg_SB *a)
8762 {
8763     if (!dc_isar_feature(aa32_sb, s)) {
8764         return false;
8765     }
8766     /*
8767      * TODO: There is no speculation barrier opcode
8768      * for TCG; MB and end the TB instead.
8769      */
8770     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8771     s->base.is_jmp = DISAS_TOO_MANY;
8772     return true;
8773 }
8774 
8775 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8776 {
8777     if (!ENABLE_ARCH_6) {
8778         return false;
8779     }
8780     if (a->E != (s->be_data == MO_BE)) {
8781         gen_helper_setend(cpu_env);
8782         s->base.is_jmp = DISAS_UPDATE_EXIT;
8783     }
8784     return true;
8785 }
8786 
8787 /*
8788  * Preload instructions
8789  * All are nops, contingent on the appropriate arch level.
8790  */
8791 
8792 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8793 {
8794     return ENABLE_ARCH_5TE;
8795 }
8796 
8797 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8798 {
8799     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8800 }
8801 
8802 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8803 {
8804     return ENABLE_ARCH_7;
8805 }
8806 
8807 /*
8808  * If-then
8809  */
8810 
8811 static bool trans_IT(DisasContext *s, arg_IT *a)
8812 {
8813     int cond_mask = a->cond_mask;
8814 
8815     /*
8816      * No actual code generated for this insn, just setup state.
8817      *
8818      * Combinations of firstcond and mask which set up an 0b1111
8819      * condition are UNPREDICTABLE; we take the CONSTRAINED
8820      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8821      * i.e. both meaning "execute always".
8822      */
8823     s->condexec_cond = (cond_mask >> 4) & 0xe;
8824     s->condexec_mask = cond_mask & 0x1f;
8825     return true;
8826 }
8827 
8828 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8829 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8830 {
8831     TCGv_i32 rn, rm;
8832     DisasCompare c;
8833 
8834     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8835         return false;
8836     }
8837 
8838     if (a->rm == 13) {
8839         /* SEE "Related encodings" (MVE shifts) */
8840         return false;
8841     }
8842 
8843     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8844         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8845         return false;
8846     }
8847 
8848     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8849     rn = tcg_temp_new_i32();
8850     rm = tcg_temp_new_i32();
8851     if (a->rn == 15) {
8852         tcg_gen_movi_i32(rn, 0);
8853     } else {
8854         load_reg_var(s, rn, a->rn);
8855     }
8856     if (a->rm == 15) {
8857         tcg_gen_movi_i32(rm, 0);
8858     } else {
8859         load_reg_var(s, rm, a->rm);
8860     }
8861 
8862     switch (a->op) {
8863     case 0: /* CSEL */
8864         break;
8865     case 1: /* CSINC */
8866         tcg_gen_addi_i32(rm, rm, 1);
8867         break;
8868     case 2: /* CSINV */
8869         tcg_gen_not_i32(rm, rm);
8870         break;
8871     case 3: /* CSNEG */
8872         tcg_gen_neg_i32(rm, rm);
8873         break;
8874     default:
8875         g_assert_not_reached();
8876     }
8877 
8878     arm_test_cc(&c, a->fcond);
8879     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8880 
8881     store_reg(s, a->rd, rn);
8882     return true;
8883 }
8884 
8885 /*
8886  * Legacy decoder.
8887  */
8888 
8889 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8890 {
8891     unsigned int cond = insn >> 28;
8892 
8893     /* M variants do not implement ARM mode; this must raise the INVSTATE
8894      * UsageFault exception.
8895      */
8896     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8897         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8898         return;
8899     }
8900 
8901     if (s->pstate_il) {
8902         /*
8903          * Illegal execution state. This has priority over BTI
8904          * exceptions, but comes after instruction abort exceptions.
8905          */
8906         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8907         return;
8908     }
8909 
8910     if (cond == 0xf) {
8911         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8912          * choose to UNDEF. In ARMv5 and above the space is used
8913          * for miscellaneous unconditional instructions.
8914          */
8915         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8916             unallocated_encoding(s);
8917             return;
8918         }
8919 
8920         /* Unconditional instructions.  */
8921         /* TODO: Perhaps merge these into one decodetree output file.  */
8922         if (disas_a32_uncond(s, insn) ||
8923             disas_vfp_uncond(s, insn) ||
8924             disas_neon_dp(s, insn) ||
8925             disas_neon_ls(s, insn) ||
8926             disas_neon_shared(s, insn)) {
8927             return;
8928         }
8929         /* fall back to legacy decoder */
8930 
8931         if ((insn & 0x0e000f00) == 0x0c000100) {
8932             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8933                 /* iWMMXt register transfer.  */
8934                 if (extract32(s->c15_cpar, 1, 1)) {
8935                     if (!disas_iwmmxt_insn(s, insn)) {
8936                         return;
8937                     }
8938                 }
8939             }
8940         }
8941         goto illegal_op;
8942     }
8943     if (cond != 0xe) {
8944         /* if not always execute, we generate a conditional jump to
8945            next instruction */
8946         arm_skip_unless(s, cond);
8947     }
8948 
8949     /* TODO: Perhaps merge these into one decodetree output file.  */
8950     if (disas_a32(s, insn) ||
8951         disas_vfp(s, insn)) {
8952         return;
8953     }
8954     /* fall back to legacy decoder */
8955     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8956     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8957         if (((insn & 0x0c000e00) == 0x0c000000)
8958             && ((insn & 0x03000000) != 0x03000000)) {
8959             /* Coprocessor insn, coprocessor 0 or 1 */
8960             disas_xscale_insn(s, insn);
8961             return;
8962         }
8963     }
8964 
8965 illegal_op:
8966     unallocated_encoding(s);
8967 }
8968 
8969 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8970 {
8971     /*
8972      * Return true if this is a 16 bit instruction. We must be precise
8973      * about this (matching the decode).
8974      */
8975     if ((insn >> 11) < 0x1d) {
8976         /* Definitely a 16-bit instruction */
8977         return true;
8978     }
8979 
8980     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8981      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8982      * end up actually treating this as two 16-bit insns, though,
8983      * if it's half of a bl/blx pair that might span a page boundary.
8984      */
8985     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8986         arm_dc_feature(s, ARM_FEATURE_M)) {
8987         /* Thumb2 cores (including all M profile ones) always treat
8988          * 32-bit insns as 32-bit.
8989          */
8990         return false;
8991     }
8992 
8993     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8994         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8995          * is not on the next page; we merge this into a 32-bit
8996          * insn.
8997          */
8998         return false;
8999     }
9000     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9001      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9002      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9003      *  -- handle as single 16 bit insn
9004      */
9005     return true;
9006 }
9007 
9008 /* Translate a 32-bit thumb instruction. */
9009 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9010 {
9011     /*
9012      * ARMv6-M supports a limited subset of Thumb2 instructions.
9013      * Other Thumb1 architectures allow only 32-bit
9014      * combined BL/BLX prefix and suffix.
9015      */
9016     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9017         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9018         int i;
9019         bool found = false;
9020         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9021                                                0xf3b08040 /* dsb */,
9022                                                0xf3b08050 /* dmb */,
9023                                                0xf3b08060 /* isb */,
9024                                                0xf3e08000 /* mrs */,
9025                                                0xf000d000 /* bl */};
9026         static const uint32_t armv6m_mask[] = {0xffe0d000,
9027                                                0xfff0d0f0,
9028                                                0xfff0d0f0,
9029                                                0xfff0d0f0,
9030                                                0xffe0d000,
9031                                                0xf800d000};
9032 
9033         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9034             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9035                 found = true;
9036                 break;
9037             }
9038         }
9039         if (!found) {
9040             goto illegal_op;
9041         }
9042     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9043         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9044             unallocated_encoding(s);
9045             return;
9046         }
9047     }
9048 
9049     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9050         /*
9051          * NOCP takes precedence over any UNDEF for (almost) the
9052          * entire wide range of coprocessor-space encodings, so check
9053          * for it first before proceeding to actually decode eg VFP
9054          * insns. This decode also handles the few insns which are
9055          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9056          */
9057         if (disas_m_nocp(s, insn)) {
9058             return;
9059         }
9060     }
9061 
9062     if ((insn & 0xef000000) == 0xef000000) {
9063         /*
9064          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9065          * transform into
9066          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9067          */
9068         uint32_t a32_insn = (insn & 0xe2ffffff) |
9069             ((insn & (1 << 28)) >> 4) | (1 << 28);
9070 
9071         if (disas_neon_dp(s, a32_insn)) {
9072             return;
9073         }
9074     }
9075 
9076     if ((insn & 0xff100000) == 0xf9000000) {
9077         /*
9078          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9079          * transform into
9080          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9081          */
9082         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9083 
9084         if (disas_neon_ls(s, a32_insn)) {
9085             return;
9086         }
9087     }
9088 
9089     /*
9090      * TODO: Perhaps merge these into one decodetree output file.
9091      * Note disas_vfp is written for a32 with cond field in the
9092      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9093      */
9094     if (disas_t32(s, insn) ||
9095         disas_vfp_uncond(s, insn) ||
9096         disas_neon_shared(s, insn) ||
9097         disas_mve(s, insn) ||
9098         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9099         return;
9100     }
9101 
9102 illegal_op:
9103     unallocated_encoding(s);
9104 }
9105 
9106 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9107 {
9108     if (!disas_t16(s, insn)) {
9109         unallocated_encoding(s);
9110     }
9111 }
9112 
9113 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9114 {
9115     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9116      * (False positives are OK, false negatives are not.)
9117      * We know this is a Thumb insn, and our caller ensures we are
9118      * only called if dc->base.pc_next is less than 4 bytes from the page
9119      * boundary, so we cross the page if the first 16 bits indicate
9120      * that this is a 32 bit insn.
9121      */
9122     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9123 
9124     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9125 }
9126 
9127 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9128 {
9129     DisasContext *dc = container_of(dcbase, DisasContext, base);
9130     CPUARMState *env = cs->env_ptr;
9131     ARMCPU *cpu = env_archcpu(env);
9132     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9133     uint32_t condexec, core_mmu_idx;
9134 
9135     dc->isar = &cpu->isar;
9136     dc->condjmp = 0;
9137     dc->pc_save = dc->base.pc_first;
9138     dc->aarch64 = false;
9139     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9140     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9141     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9142     /*
9143      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9144      * is always the IT bits. On M-profile, some of the reserved encodings
9145      * of IT are used instead to indicate either ICI or ECI, which
9146      * indicate partial progress of a restartable insn that was interrupted
9147      * partway through by an exception:
9148      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9149      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9150      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9151      * insn, behave normally".
9152      */
9153     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9154     dc->eci_handled = false;
9155     if (condexec & 0xf) {
9156         dc->condexec_mask = (condexec & 0xf) << 1;
9157         dc->condexec_cond = condexec >> 4;
9158     } else {
9159         if (arm_feature(env, ARM_FEATURE_M)) {
9160             dc->eci = condexec >> 4;
9161         }
9162     }
9163 
9164     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9165     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9166     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9167 #if !defined(CONFIG_USER_ONLY)
9168     dc->user = (dc->current_el == 0);
9169 #endif
9170     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9171     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9172     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9173     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9174     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9175 
9176     if (arm_feature(env, ARM_FEATURE_M)) {
9177         dc->vfp_enabled = 1;
9178         dc->be_data = MO_TE;
9179         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9180         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9181         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9182         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9183         dc->v7m_new_fp_ctxt_needed =
9184             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9185         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9186         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9187     } else {
9188         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9189         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9190         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9191         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9192         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9193             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9194         } else {
9195             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9196             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9197         }
9198         dc->sme_trap_nonstreaming =
9199             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9200     }
9201     dc->lse2 = false; /* applies only to aarch64 */
9202     dc->cp_regs = cpu->cp_regs;
9203     dc->features = env->features;
9204 
9205     /* Single step state. The code-generation logic here is:
9206      *  SS_ACTIVE == 0:
9207      *   generate code with no special handling for single-stepping (except
9208      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9209      *   this happens anyway because those changes are all system register or
9210      *   PSTATE writes).
9211      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9212      *   emit code for one insn
9213      *   emit code to clear PSTATE.SS
9214      *   emit code to generate software step exception for completed step
9215      *   end TB (as usual for having generated an exception)
9216      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9217      *   emit code to generate a software step exception
9218      *   end the TB
9219      */
9220     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9221     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9222     dc->is_ldex = false;
9223 
9224     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9225 
9226     /* If architectural single step active, limit to 1.  */
9227     if (dc->ss_active) {
9228         dc->base.max_insns = 1;
9229     }
9230 
9231     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9232        to those left on the page.  */
9233     if (!dc->thumb) {
9234         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9235         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9236     }
9237 
9238     cpu_V0 = tcg_temp_new_i64();
9239     cpu_V1 = tcg_temp_new_i64();
9240     cpu_M0 = tcg_temp_new_i64();
9241 }
9242 
9243 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9244 {
9245     DisasContext *dc = container_of(dcbase, DisasContext, base);
9246 
9247     /* A note on handling of the condexec (IT) bits:
9248      *
9249      * We want to avoid the overhead of having to write the updated condexec
9250      * bits back to the CPUARMState for every instruction in an IT block. So:
9251      * (1) if the condexec bits are not already zero then we write
9252      * zero back into the CPUARMState now. This avoids complications trying
9253      * to do it at the end of the block. (For example if we don't do this
9254      * it's hard to identify whether we can safely skip writing condexec
9255      * at the end of the TB, which we definitely want to do for the case
9256      * where a TB doesn't do anything with the IT state at all.)
9257      * (2) if we are going to leave the TB then we call gen_set_condexec()
9258      * which will write the correct value into CPUARMState if zero is wrong.
9259      * This is done both for leaving the TB at the end, and for leaving
9260      * it because of an exception we know will happen, which is done in
9261      * gen_exception_insn(). The latter is necessary because we need to
9262      * leave the TB with the PC/IT state just prior to execution of the
9263      * instruction which caused the exception.
9264      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9265      * then the CPUARMState will be wrong and we need to reset it.
9266      * This is handled in the same way as restoration of the
9267      * PC in these situations; we save the value of the condexec bits
9268      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9269      * then uses this to restore them after an exception.
9270      *
9271      * Note that there are no instructions which can read the condexec
9272      * bits, and none which can write non-static values to them, so
9273      * we don't need to care about whether CPUARMState is correct in the
9274      * middle of a TB.
9275      */
9276 
9277     /* Reset the conditional execution bits immediately. This avoids
9278        complications trying to do it at the end of the block.  */
9279     if (dc->condexec_mask || dc->condexec_cond) {
9280         store_cpu_field_constant(0, condexec_bits);
9281     }
9282 }
9283 
9284 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9285 {
9286     DisasContext *dc = container_of(dcbase, DisasContext, base);
9287     /*
9288      * The ECI/ICI bits share PSR bits with the IT bits, so we
9289      * need to reconstitute the bits from the split-out DisasContext
9290      * fields here.
9291      */
9292     uint32_t condexec_bits;
9293     target_ulong pc_arg = dc->base.pc_next;
9294 
9295     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9296         pc_arg &= ~TARGET_PAGE_MASK;
9297     }
9298     if (dc->eci) {
9299         condexec_bits = dc->eci << 4;
9300     } else {
9301         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9302     }
9303     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9304     dc->insn_start = tcg_last_op();
9305 }
9306 
9307 static bool arm_check_kernelpage(DisasContext *dc)
9308 {
9309 #ifdef CONFIG_USER_ONLY
9310     /* Intercept jump to the magic kernel page.  */
9311     if (dc->base.pc_next >= 0xffff0000) {
9312         /* We always get here via a jump, so know we are not in a
9313            conditional execution block.  */
9314         gen_exception_internal(EXCP_KERNEL_TRAP);
9315         dc->base.is_jmp = DISAS_NORETURN;
9316         return true;
9317     }
9318 #endif
9319     return false;
9320 }
9321 
9322 static bool arm_check_ss_active(DisasContext *dc)
9323 {
9324     if (dc->ss_active && !dc->pstate_ss) {
9325         /* Singlestep state is Active-pending.
9326          * If we're in this state at the start of a TB then either
9327          *  a) we just took an exception to an EL which is being debugged
9328          *     and this is the first insn in the exception handler
9329          *  b) debug exceptions were masked and we just unmasked them
9330          *     without changing EL (eg by clearing PSTATE.D)
9331          * In either case we're going to take a swstep exception in the
9332          * "did not step an insn" case, and so the syndrome ISV and EX
9333          * bits should be zero.
9334          */
9335         assert(dc->base.num_insns == 1);
9336         gen_swstep_exception(dc, 0, 0);
9337         dc->base.is_jmp = DISAS_NORETURN;
9338         return true;
9339     }
9340 
9341     return false;
9342 }
9343 
9344 static void arm_post_translate_insn(DisasContext *dc)
9345 {
9346     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9347         if (dc->pc_save != dc->condlabel.pc_save) {
9348             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9349         }
9350         gen_set_label(dc->condlabel.label);
9351         dc->condjmp = 0;
9352     }
9353 }
9354 
9355 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9356 {
9357     DisasContext *dc = container_of(dcbase, DisasContext, base);
9358     CPUARMState *env = cpu->env_ptr;
9359     uint32_t pc = dc->base.pc_next;
9360     unsigned int insn;
9361 
9362     /* Singlestep exceptions have the highest priority. */
9363     if (arm_check_ss_active(dc)) {
9364         dc->base.pc_next = pc + 4;
9365         return;
9366     }
9367 
9368     if (pc & 3) {
9369         /*
9370          * PC alignment fault.  This has priority over the instruction abort
9371          * that we would receive from a translation fault via arm_ldl_code
9372          * (or the execution of the kernelpage entrypoint). This should only
9373          * be possible after an indirect branch, at the start of the TB.
9374          */
9375         assert(dc->base.num_insns == 1);
9376         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9377         dc->base.is_jmp = DISAS_NORETURN;
9378         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9379         return;
9380     }
9381 
9382     if (arm_check_kernelpage(dc)) {
9383         dc->base.pc_next = pc + 4;
9384         return;
9385     }
9386 
9387     dc->pc_curr = pc;
9388     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9389     dc->insn = insn;
9390     dc->base.pc_next = pc + 4;
9391     disas_arm_insn(dc, insn);
9392 
9393     arm_post_translate_insn(dc);
9394 
9395     /* ARM is a fixed-length ISA.  We performed the cross-page check
9396        in init_disas_context by adjusting max_insns.  */
9397 }
9398 
9399 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9400 {
9401     /* Return true if this Thumb insn is always unconditional,
9402      * even inside an IT block. This is true of only a very few
9403      * instructions: BKPT, HLT, and SG.
9404      *
9405      * A larger class of instructions are UNPREDICTABLE if used
9406      * inside an IT block; we do not need to detect those here, because
9407      * what we do by default (perform the cc check and update the IT
9408      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9409      * choice for those situations.
9410      *
9411      * insn is either a 16-bit or a 32-bit instruction; the two are
9412      * distinguishable because for the 16-bit case the top 16 bits
9413      * are zeroes, and that isn't a valid 32-bit encoding.
9414      */
9415     if ((insn & 0xffffff00) == 0xbe00) {
9416         /* BKPT */
9417         return true;
9418     }
9419 
9420     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9421         !arm_dc_feature(s, ARM_FEATURE_M)) {
9422         /* HLT: v8A only. This is unconditional even when it is going to
9423          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9424          * For v7 cores this was a plain old undefined encoding and so
9425          * honours its cc check. (We might be using the encoding as
9426          * a semihosting trap, but we don't change the cc check behaviour
9427          * on that account, because a debugger connected to a real v7A
9428          * core and emulating semihosting traps by catching the UNDEF
9429          * exception would also only see cases where the cc check passed.
9430          * No guest code should be trying to do a HLT semihosting trap
9431          * in an IT block anyway.
9432          */
9433         return true;
9434     }
9435 
9436     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9437         arm_dc_feature(s, ARM_FEATURE_M)) {
9438         /* SG: v8M only */
9439         return true;
9440     }
9441 
9442     return false;
9443 }
9444 
9445 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9446 {
9447     DisasContext *dc = container_of(dcbase, DisasContext, base);
9448     CPUARMState *env = cpu->env_ptr;
9449     uint32_t pc = dc->base.pc_next;
9450     uint32_t insn;
9451     bool is_16bit;
9452     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9453     TCGOp *insn_eci_rewind = NULL;
9454     target_ulong insn_eci_pc_save = -1;
9455 
9456     /* Misaligned thumb PC is architecturally impossible. */
9457     assert((dc->base.pc_next & 1) == 0);
9458 
9459     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9460         dc->base.pc_next = pc + 2;
9461         return;
9462     }
9463 
9464     dc->pc_curr = pc;
9465     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9466     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9467     pc += 2;
9468     if (!is_16bit) {
9469         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9470         insn = insn << 16 | insn2;
9471         pc += 2;
9472     }
9473     dc->base.pc_next = pc;
9474     dc->insn = insn;
9475 
9476     if (dc->pstate_il) {
9477         /*
9478          * Illegal execution state. This has priority over BTI
9479          * exceptions, but comes after instruction abort exceptions.
9480          */
9481         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9482         return;
9483     }
9484 
9485     if (dc->eci) {
9486         /*
9487          * For M-profile continuable instructions, ECI/ICI handling
9488          * falls into these cases:
9489          *  - interrupt-continuable instructions
9490          *     These are the various load/store multiple insns (both
9491          *     integer and fp). The ICI bits indicate the register
9492          *     where the load/store can resume. We make the IMPDEF
9493          *     choice to always do "instruction restart", ie ignore
9494          *     the ICI value and always execute the ldm/stm from the
9495          *     start. So all we need to do is zero PSR.ICI if the
9496          *     insn executes.
9497          *  - MVE instructions subject to beat-wise execution
9498          *     Here the ECI bits indicate which beats have already been
9499          *     executed, and we must honour this. Each insn of this
9500          *     type will handle it correctly. We will update PSR.ECI
9501          *     in the helper function for the insn (some ECI values
9502          *     mean that the following insn also has been partially
9503          *     executed).
9504          *  - Special cases which don't advance ECI
9505          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9506          *     bits untouched.
9507          *  - all other insns (the common case)
9508          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9509          *     We place a rewind-marker here. Insns in the previous
9510          *     three categories will set a flag in the DisasContext.
9511          *     If the flag isn't set after we call disas_thumb_insn()
9512          *     or disas_thumb2_insn() then we know we have a "some other
9513          *     insn" case. We will rewind to the marker (ie throwing away
9514          *     all the generated code) and instead emit "take exception".
9515          */
9516         insn_eci_rewind = tcg_last_op();
9517         insn_eci_pc_save = dc->pc_save;
9518     }
9519 
9520     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9521         uint32_t cond = dc->condexec_cond;
9522 
9523         /*
9524          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9525          * "always"; 0xf is not "never".
9526          */
9527         if (cond < 0x0e) {
9528             arm_skip_unless(dc, cond);
9529         }
9530     }
9531 
9532     if (is_16bit) {
9533         disas_thumb_insn(dc, insn);
9534     } else {
9535         disas_thumb2_insn(dc, insn);
9536     }
9537 
9538     /* Advance the Thumb condexec condition.  */
9539     if (dc->condexec_mask) {
9540         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9541                              ((dc->condexec_mask >> 4) & 1));
9542         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9543         if (dc->condexec_mask == 0) {
9544             dc->condexec_cond = 0;
9545         }
9546     }
9547 
9548     if (dc->eci && !dc->eci_handled) {
9549         /*
9550          * Insn wasn't valid for ECI/ICI at all: undo what we
9551          * just generated and instead emit an exception
9552          */
9553         tcg_remove_ops_after(insn_eci_rewind);
9554         dc->pc_save = insn_eci_pc_save;
9555         dc->condjmp = 0;
9556         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9557     }
9558 
9559     arm_post_translate_insn(dc);
9560 
9561     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9562      * will touch a new page.  This ensures that prefetch aborts occur at
9563      * the right place.
9564      *
9565      * We want to stop the TB if the next insn starts in a new page,
9566      * or if it spans between this page and the next. This means that
9567      * if we're looking at the last halfword in the page we need to
9568      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9569      * or a 32-bit Thumb insn (which won't).
9570      * This is to avoid generating a silly TB with a single 16-bit insn
9571      * in it at the end of this page (which would execute correctly
9572      * but isn't very efficient).
9573      */
9574     if (dc->base.is_jmp == DISAS_NEXT
9575         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9576             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9577                 && insn_crosses_page(env, dc)))) {
9578         dc->base.is_jmp = DISAS_TOO_MANY;
9579     }
9580 }
9581 
9582 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9583 {
9584     DisasContext *dc = container_of(dcbase, DisasContext, base);
9585 
9586     /* At this stage dc->condjmp will only be set when the skipped
9587        instruction was a conditional branch or trap, and the PC has
9588        already been written.  */
9589     gen_set_condexec(dc);
9590     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9591         /* Exception return branches need some special case code at the
9592          * end of the TB, which is complex enough that it has to
9593          * handle the single-step vs not and the condition-failed
9594          * insn codepath itself.
9595          */
9596         gen_bx_excret_final_code(dc);
9597     } else if (unlikely(dc->ss_active)) {
9598         /* Unconditional and "condition passed" instruction codepath. */
9599         switch (dc->base.is_jmp) {
9600         case DISAS_SWI:
9601             gen_ss_advance(dc);
9602             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9603             break;
9604         case DISAS_HVC:
9605             gen_ss_advance(dc);
9606             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9607             break;
9608         case DISAS_SMC:
9609             gen_ss_advance(dc);
9610             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9611             break;
9612         case DISAS_NEXT:
9613         case DISAS_TOO_MANY:
9614         case DISAS_UPDATE_EXIT:
9615         case DISAS_UPDATE_NOCHAIN:
9616             gen_update_pc(dc, curr_insn_len(dc));
9617             /* fall through */
9618         default:
9619             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9620             gen_singlestep_exception(dc);
9621             break;
9622         case DISAS_NORETURN:
9623             break;
9624         }
9625     } else {
9626         /* While branches must always occur at the end of an IT block,
9627            there are a few other things that can cause us to terminate
9628            the TB in the middle of an IT block:
9629             - Exception generating instructions (bkpt, swi, undefined).
9630             - Page boundaries.
9631             - Hardware watchpoints.
9632            Hardware breakpoints have already been handled and skip this code.
9633          */
9634         switch (dc->base.is_jmp) {
9635         case DISAS_NEXT:
9636         case DISAS_TOO_MANY:
9637             gen_goto_tb(dc, 1, curr_insn_len(dc));
9638             break;
9639         case DISAS_UPDATE_NOCHAIN:
9640             gen_update_pc(dc, curr_insn_len(dc));
9641             /* fall through */
9642         case DISAS_JUMP:
9643             gen_goto_ptr();
9644             break;
9645         case DISAS_UPDATE_EXIT:
9646             gen_update_pc(dc, curr_insn_len(dc));
9647             /* fall through */
9648         default:
9649             /* indicate that the hash table must be used to find the next TB */
9650             tcg_gen_exit_tb(NULL, 0);
9651             break;
9652         case DISAS_NORETURN:
9653             /* nothing more to generate */
9654             break;
9655         case DISAS_WFI:
9656             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9657             /*
9658              * The helper doesn't necessarily throw an exception, but we
9659              * must go back to the main loop to check for interrupts anyway.
9660              */
9661             tcg_gen_exit_tb(NULL, 0);
9662             break;
9663         case DISAS_WFE:
9664             gen_helper_wfe(cpu_env);
9665             break;
9666         case DISAS_YIELD:
9667             gen_helper_yield(cpu_env);
9668             break;
9669         case DISAS_SWI:
9670             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9671             break;
9672         case DISAS_HVC:
9673             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9674             break;
9675         case DISAS_SMC:
9676             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9677             break;
9678         }
9679     }
9680 
9681     if (dc->condjmp) {
9682         /* "Condition failed" instruction codepath for the branch/trap insn */
9683         set_disas_label(dc, dc->condlabel);
9684         gen_set_condexec(dc);
9685         if (unlikely(dc->ss_active)) {
9686             gen_update_pc(dc, curr_insn_len(dc));
9687             gen_singlestep_exception(dc);
9688         } else {
9689             gen_goto_tb(dc, 1, curr_insn_len(dc));
9690         }
9691     }
9692 }
9693 
9694 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9695                              CPUState *cpu, FILE *logfile)
9696 {
9697     DisasContext *dc = container_of(dcbase, DisasContext, base);
9698 
9699     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9700     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9701 }
9702 
9703 static const TranslatorOps arm_translator_ops = {
9704     .init_disas_context = arm_tr_init_disas_context,
9705     .tb_start           = arm_tr_tb_start,
9706     .insn_start         = arm_tr_insn_start,
9707     .translate_insn     = arm_tr_translate_insn,
9708     .tb_stop            = arm_tr_tb_stop,
9709     .disas_log          = arm_tr_disas_log,
9710 };
9711 
9712 static const TranslatorOps thumb_translator_ops = {
9713     .init_disas_context = arm_tr_init_disas_context,
9714     .tb_start           = arm_tr_tb_start,
9715     .insn_start         = arm_tr_insn_start,
9716     .translate_insn     = thumb_tr_translate_insn,
9717     .tb_stop            = arm_tr_tb_stop,
9718     .disas_log          = arm_tr_disas_log,
9719 };
9720 
9721 /* generate intermediate code for basic block 'tb'.  */
9722 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9723                            target_ulong pc, void *host_pc)
9724 {
9725     DisasContext dc = { };
9726     const TranslatorOps *ops = &arm_translator_ops;
9727     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9728 
9729     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9730         ops = &thumb_translator_ops;
9731     }
9732 #ifdef TARGET_AARCH64
9733     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9734         ops = &aarch64_translator_ops;
9735     }
9736 #endif
9737 
9738     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9739 }
9740