xref: /openbmc/qemu/target/arm/tcg/translate.c (revision ad66b5cb)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "semihosting/semihost.h"
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 #include "exec/log.h"
36 #include "cpregs.h"
37 
38 
39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
41 /* currently all emulated v5 cores are also v5TE, so don't bother */
42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
49 
50 #include "translate.h"
51 #include "translate-a32.h"
52 
53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
55 /* These are TCG globals which alias CPUARMState fields */
56 static TCGv_i32 cpu_R[16];
57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
58 TCGv_i64 cpu_exclusive_addr;
59 TCGv_i64 cpu_exclusive_val;
60 
61 #include "exec/gen-icount.h"
62 
63 static const char * const regnames[] =
64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
66 
67 
68 /* initialize TCG globals.  */
69 void arm_translate_init(void)
70 {
71     int i;
72 
73     for (i = 0; i < 16; i++) {
74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
75                                           offsetof(CPUARMState, regs[i]),
76                                           regnames[i]);
77     }
78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
82 
83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
87 
88     a64_translate_init();
89 }
90 
91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
92 {
93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
94     switch (cmode) {
95     case 0: case 1:
96         /* no-op */
97         break;
98     case 2: case 3:
99         imm <<= 8;
100         break;
101     case 4: case 5:
102         imm <<= 16;
103         break;
104     case 6: case 7:
105         imm <<= 24;
106         break;
107     case 8: case 9:
108         imm |= imm << 16;
109         break;
110     case 10: case 11:
111         imm = (imm << 8) | (imm << 24);
112         break;
113     case 12:
114         imm = (imm << 8) | 0xff;
115         break;
116     case 13:
117         imm = (imm << 16) | 0xffff;
118         break;
119     case 14:
120         if (op) {
121             /*
122              * This and cmode == 15 op == 1 are the only cases where
123              * the top and bottom 32 bits of the encoded constant differ.
124              */
125             uint64_t imm64 = 0;
126             int n;
127 
128             for (n = 0; n < 8; n++) {
129                 if (imm & (1 << n)) {
130                     imm64 |= (0xffULL << (n * 8));
131                 }
132             }
133             return imm64;
134         }
135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
136         break;
137     case 15:
138         if (op) {
139             /* Reserved encoding for AArch32; valid for AArch64 */
140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
141             if (imm & 0x80) {
142                 imm64 |= 0x8000000000000000ULL;
143             }
144             if (imm & 0x40) {
145                 imm64 |= 0x3fc0000000000000ULL;
146             } else {
147                 imm64 |= 0x4000000000000000ULL;
148             }
149             return imm64;
150         }
151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
153         break;
154     }
155     if (op) {
156         imm = ~imm;
157     }
158     return dup_const(MO_32, imm);
159 }
160 
161 /* Generate a label used for skipping this instruction */
162 void arm_gen_condlabel(DisasContext *s)
163 {
164     if (!s->condjmp) {
165         s->condlabel = gen_disas_label(s);
166         s->condjmp = 1;
167     }
168 }
169 
170 /* Flags for the disas_set_da_iss info argument:
171  * lower bits hold the Rt register number, higher bits are flags.
172  */
173 typedef enum ISSInfo {
174     ISSNone = 0,
175     ISSRegMask = 0x1f,
176     ISSInvalid = (1 << 5),
177     ISSIsAcqRel = (1 << 6),
178     ISSIsWrite = (1 << 7),
179     ISSIs16Bit = (1 << 8),
180 } ISSInfo;
181 
182 /*
183  * Store var into env + offset to a member with size bytes.
184  * Free var after use.
185  */
186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
187 {
188     switch (size) {
189     case 1:
190         tcg_gen_st8_i32(var, cpu_env, offset);
191         break;
192     case 4:
193         tcg_gen_st_i32(var, cpu_env, offset);
194         break;
195     default:
196         g_assert_not_reached();
197     }
198 }
199 
200 /* Save the syndrome information for a Data Abort */
201 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
202 {
203     uint32_t syn;
204     int sas = memop & MO_SIZE;
205     bool sse = memop & MO_SIGN;
206     bool is_acqrel = issinfo & ISSIsAcqRel;
207     bool is_write = issinfo & ISSIsWrite;
208     bool is_16bit = issinfo & ISSIs16Bit;
209     int srt = issinfo & ISSRegMask;
210 
211     if (issinfo & ISSInvalid) {
212         /* Some callsites want to conditionally provide ISS info,
213          * eg "only if this was not a writeback"
214          */
215         return;
216     }
217 
218     if (srt == 15) {
219         /* For AArch32, insns where the src/dest is R15 never generate
220          * ISS information. Catching that here saves checking at all
221          * the call sites.
222          */
223         return;
224     }
225 
226     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
227                                   0, 0, 0, is_write, 0, is_16bit);
228     disas_set_insn_syndrome(s, syn);
229 }
230 
231 static inline int get_a32_user_mem_index(DisasContext *s)
232 {
233     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
234      * insns:
235      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
236      *  otherwise, access as if at PL0.
237      */
238     switch (s->mmu_idx) {
239     case ARMMMUIdx_E3:
240     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
241     case ARMMMUIdx_E10_0:
242     case ARMMMUIdx_E10_1:
243     case ARMMMUIdx_E10_1_PAN:
244         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
245     case ARMMMUIdx_MUser:
246     case ARMMMUIdx_MPriv:
247         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
248     case ARMMMUIdx_MUserNegPri:
249     case ARMMMUIdx_MPrivNegPri:
250         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
251     case ARMMMUIdx_MSUser:
252     case ARMMMUIdx_MSPriv:
253         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
254     case ARMMMUIdx_MSUserNegPri:
255     case ARMMMUIdx_MSPrivNegPri:
256         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
257     default:
258         g_assert_not_reached();
259     }
260 }
261 
262 /* The pc_curr difference for an architectural jump. */
263 static target_long jmp_diff(DisasContext *s, target_long diff)
264 {
265     return diff + (s->thumb ? 4 : 8);
266 }
267 
268 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
269 {
270     assert(s->pc_save != -1);
271     if (tb_cflags(s->base.tb) & CF_PCREL) {
272         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
273     } else {
274         tcg_gen_movi_i32(var, s->pc_curr + diff);
275     }
276 }
277 
278 /* Set a variable to the value of a CPU register.  */
279 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
280 {
281     if (reg == 15) {
282         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
283     } else {
284         tcg_gen_mov_i32(var, cpu_R[reg]);
285     }
286 }
287 
288 /*
289  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
290  * This is used for load/store for which use of PC implies (literal),
291  * or ADD that implies ADR.
292  */
293 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
294 {
295     TCGv_i32 tmp = tcg_temp_new_i32();
296 
297     if (reg == 15) {
298         /*
299          * This address is computed from an aligned PC:
300          * subtract off the low bits.
301          */
302         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
303     } else {
304         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
305     }
306     return tmp;
307 }
308 
309 /* Set a CPU register.  The source must be a temporary and will be
310    marked as dead.  */
311 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
312 {
313     if (reg == 15) {
314         /* In Thumb mode, we must ignore bit 0.
315          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
316          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
317          * We choose to ignore [1:0] in ARM mode for all architecture versions.
318          */
319         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
320         s->base.is_jmp = DISAS_JUMP;
321         s->pc_save = -1;
322     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
323         /* For M-profile SP bits [1:0] are always zero */
324         tcg_gen_andi_i32(var, var, ~3);
325     }
326     tcg_gen_mov_i32(cpu_R[reg], var);
327 }
328 
329 /*
330  * Variant of store_reg which applies v8M stack-limit checks before updating
331  * SP. If the check fails this will result in an exception being taken.
332  * We disable the stack checks for CONFIG_USER_ONLY because we have
333  * no idea what the stack limits should be in that case.
334  * If stack checking is not being done this just acts like store_reg().
335  */
336 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
337 {
338 #ifndef CONFIG_USER_ONLY
339     if (s->v8m_stackcheck) {
340         gen_helper_v8m_stackcheck(cpu_env, var);
341     }
342 #endif
343     store_reg(s, 13, var);
344 }
345 
346 /* Value extensions.  */
347 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
348 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
349 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
350 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
351 
352 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
353 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
354 
355 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
356 {
357     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
358 }
359 
360 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
361 {
362     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
363 
364     if (new_el) {
365         if (m_profile) {
366             gen_helper_rebuild_hflags_m32_newel(cpu_env);
367         } else {
368             gen_helper_rebuild_hflags_a32_newel(cpu_env);
369         }
370     } else {
371         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
372         if (m_profile) {
373             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
374         } else {
375             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
376         }
377     }
378 }
379 
380 static void gen_exception_internal(int excp)
381 {
382     assert(excp_is_internal(excp));
383     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
384 }
385 
386 static void gen_singlestep_exception(DisasContext *s)
387 {
388     /* We just completed step of an insn. Move from Active-not-pending
389      * to Active-pending, and then also take the swstep exception.
390      * This corresponds to making the (IMPDEF) choice to prioritize
391      * swstep exceptions over asynchronous exceptions taken to an exception
392      * level where debug is disabled. This choice has the advantage that
393      * we do not need to maintain internal state corresponding to the
394      * ISV/EX syndrome bits between completion of the step and generation
395      * of the exception, and our syndrome information is always correct.
396      */
397     gen_ss_advance(s);
398     gen_swstep_exception(s, 1, s->is_ldex);
399     s->base.is_jmp = DISAS_NORETURN;
400 }
401 
402 void clear_eci_state(DisasContext *s)
403 {
404     /*
405      * Clear any ECI/ICI state: used when a load multiple/store
406      * multiple insn executes.
407      */
408     if (s->eci) {
409         store_cpu_field_constant(0, condexec_bits);
410         s->eci = 0;
411     }
412 }
413 
414 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
415 {
416     TCGv_i32 tmp1 = tcg_temp_new_i32();
417     TCGv_i32 tmp2 = tcg_temp_new_i32();
418     tcg_gen_ext16s_i32(tmp1, a);
419     tcg_gen_ext16s_i32(tmp2, b);
420     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
421     tcg_gen_sari_i32(a, a, 16);
422     tcg_gen_sari_i32(b, b, 16);
423     tcg_gen_mul_i32(b, b, a);
424     tcg_gen_mov_i32(a, tmp1);
425 }
426 
427 /* Byteswap each halfword.  */
428 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
429 {
430     TCGv_i32 tmp = tcg_temp_new_i32();
431     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
432     tcg_gen_shri_i32(tmp, var, 8);
433     tcg_gen_and_i32(tmp, tmp, mask);
434     tcg_gen_and_i32(var, var, mask);
435     tcg_gen_shli_i32(var, var, 8);
436     tcg_gen_or_i32(dest, var, tmp);
437 }
438 
439 /* Byteswap low halfword and sign extend.  */
440 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
441 {
442     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
443 }
444 
445 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
446     tmp = (t0 ^ t1) & 0x8000;
447     t0 &= ~0x8000;
448     t1 &= ~0x8000;
449     t0 = (t0 + t1) ^ tmp;
450  */
451 
452 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
453 {
454     TCGv_i32 tmp = tcg_temp_new_i32();
455     tcg_gen_xor_i32(tmp, t0, t1);
456     tcg_gen_andi_i32(tmp, tmp, 0x8000);
457     tcg_gen_andi_i32(t0, t0, ~0x8000);
458     tcg_gen_andi_i32(t1, t1, ~0x8000);
459     tcg_gen_add_i32(t0, t0, t1);
460     tcg_gen_xor_i32(dest, t0, tmp);
461 }
462 
463 /* Set N and Z flags from var.  */
464 static inline void gen_logic_CC(TCGv_i32 var)
465 {
466     tcg_gen_mov_i32(cpu_NF, var);
467     tcg_gen_mov_i32(cpu_ZF, var);
468 }
469 
470 /* dest = T0 + T1 + CF. */
471 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
472 {
473     tcg_gen_add_i32(dest, t0, t1);
474     tcg_gen_add_i32(dest, dest, cpu_CF);
475 }
476 
477 /* dest = T0 - T1 + CF - 1.  */
478 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     tcg_gen_sub_i32(dest, t0, t1);
481     tcg_gen_add_i32(dest, dest, cpu_CF);
482     tcg_gen_subi_i32(dest, dest, 1);
483 }
484 
485 /* dest = T0 + T1. Compute C, N, V and Z flags */
486 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
487 {
488     TCGv_i32 tmp = tcg_temp_new_i32();
489     tcg_gen_movi_i32(tmp, 0);
490     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
491     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
492     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
493     tcg_gen_xor_i32(tmp, t0, t1);
494     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
495     tcg_gen_mov_i32(dest, cpu_NF);
496 }
497 
498 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
499 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
500 {
501     TCGv_i32 tmp = tcg_temp_new_i32();
502     if (TCG_TARGET_HAS_add2_i32) {
503         tcg_gen_movi_i32(tmp, 0);
504         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
505         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
506     } else {
507         TCGv_i64 q0 = tcg_temp_new_i64();
508         TCGv_i64 q1 = tcg_temp_new_i64();
509         tcg_gen_extu_i32_i64(q0, t0);
510         tcg_gen_extu_i32_i64(q1, t1);
511         tcg_gen_add_i64(q0, q0, q1);
512         tcg_gen_extu_i32_i64(q1, cpu_CF);
513         tcg_gen_add_i64(q0, q0, q1);
514         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
515     }
516     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
517     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
518     tcg_gen_xor_i32(tmp, t0, t1);
519     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
520     tcg_gen_mov_i32(dest, cpu_NF);
521 }
522 
523 /* dest = T0 - T1. Compute C, N, V and Z flags */
524 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
525 {
526     TCGv_i32 tmp;
527     tcg_gen_sub_i32(cpu_NF, t0, t1);
528     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
529     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
530     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
531     tmp = tcg_temp_new_i32();
532     tcg_gen_xor_i32(tmp, t0, t1);
533     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
534     tcg_gen_mov_i32(dest, cpu_NF);
535 }
536 
537 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
538 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
539 {
540     TCGv_i32 tmp = tcg_temp_new_i32();
541     tcg_gen_not_i32(tmp, t1);
542     gen_adc_CC(dest, t0, tmp);
543 }
544 
545 #define GEN_SHIFT(name)                                               \
546 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
547 {                                                                     \
548     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
549     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
550     TCGv_i32 zero = tcg_constant_i32(0);                              \
551     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
552     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
553     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
554     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
555 }
556 GEN_SHIFT(shl)
557 GEN_SHIFT(shr)
558 #undef GEN_SHIFT
559 
560 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
561 {
562     TCGv_i32 tmp1 = tcg_temp_new_i32();
563 
564     tcg_gen_andi_i32(tmp1, t1, 0xff);
565     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
566     tcg_gen_sar_i32(dest, t0, tmp1);
567 }
568 
569 static void shifter_out_im(TCGv_i32 var, int shift)
570 {
571     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
572 }
573 
574 /* Shift by immediate.  Includes special handling for shift == 0.  */
575 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
576                                     int shift, int flags)
577 {
578     switch (shiftop) {
579     case 0: /* LSL */
580         if (shift != 0) {
581             if (flags)
582                 shifter_out_im(var, 32 - shift);
583             tcg_gen_shli_i32(var, var, shift);
584         }
585         break;
586     case 1: /* LSR */
587         if (shift == 0) {
588             if (flags) {
589                 tcg_gen_shri_i32(cpu_CF, var, 31);
590             }
591             tcg_gen_movi_i32(var, 0);
592         } else {
593             if (flags)
594                 shifter_out_im(var, shift - 1);
595             tcg_gen_shri_i32(var, var, shift);
596         }
597         break;
598     case 2: /* ASR */
599         if (shift == 0)
600             shift = 32;
601         if (flags)
602             shifter_out_im(var, shift - 1);
603         if (shift == 32)
604           shift = 31;
605         tcg_gen_sari_i32(var, var, shift);
606         break;
607     case 3: /* ROR/RRX */
608         if (shift != 0) {
609             if (flags)
610                 shifter_out_im(var, shift - 1);
611             tcg_gen_rotri_i32(var, var, shift); break;
612         } else {
613             TCGv_i32 tmp = tcg_temp_new_i32();
614             tcg_gen_shli_i32(tmp, cpu_CF, 31);
615             if (flags)
616                 shifter_out_im(var, 0);
617             tcg_gen_shri_i32(var, var, 1);
618             tcg_gen_or_i32(var, var, tmp);
619         }
620     }
621 };
622 
623 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
624                                      TCGv_i32 shift, int flags)
625 {
626     if (flags) {
627         switch (shiftop) {
628         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
629         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
630         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
631         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
632         }
633     } else {
634         switch (shiftop) {
635         case 0:
636             gen_shl(var, var, shift);
637             break;
638         case 1:
639             gen_shr(var, var, shift);
640             break;
641         case 2:
642             gen_sar(var, var, shift);
643             break;
644         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
645                 tcg_gen_rotr_i32(var, var, shift); break;
646         }
647     }
648 }
649 
650 /*
651  * Generate a conditional based on ARM condition code cc.
652  * This is common between ARM and Aarch64 targets.
653  */
654 void arm_test_cc(DisasCompare *cmp, int cc)
655 {
656     TCGv_i32 value;
657     TCGCond cond;
658 
659     switch (cc) {
660     case 0: /* eq: Z */
661     case 1: /* ne: !Z */
662         cond = TCG_COND_EQ;
663         value = cpu_ZF;
664         break;
665 
666     case 2: /* cs: C */
667     case 3: /* cc: !C */
668         cond = TCG_COND_NE;
669         value = cpu_CF;
670         break;
671 
672     case 4: /* mi: N */
673     case 5: /* pl: !N */
674         cond = TCG_COND_LT;
675         value = cpu_NF;
676         break;
677 
678     case 6: /* vs: V */
679     case 7: /* vc: !V */
680         cond = TCG_COND_LT;
681         value = cpu_VF;
682         break;
683 
684     case 8: /* hi: C && !Z */
685     case 9: /* ls: !C || Z -> !(C && !Z) */
686         cond = TCG_COND_NE;
687         value = tcg_temp_new_i32();
688         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
689            ZF is non-zero for !Z; so AND the two subexpressions.  */
690         tcg_gen_neg_i32(value, cpu_CF);
691         tcg_gen_and_i32(value, value, cpu_ZF);
692         break;
693 
694     case 10: /* ge: N == V -> N ^ V == 0 */
695     case 11: /* lt: N != V -> N ^ V != 0 */
696         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
697         cond = TCG_COND_GE;
698         value = tcg_temp_new_i32();
699         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
700         break;
701 
702     case 12: /* gt: !Z && N == V */
703     case 13: /* le: Z || N != V */
704         cond = TCG_COND_NE;
705         value = tcg_temp_new_i32();
706         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
707          * the sign bit then AND with ZF to yield the result.  */
708         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
709         tcg_gen_sari_i32(value, value, 31);
710         tcg_gen_andc_i32(value, cpu_ZF, value);
711         break;
712 
713     case 14: /* always */
714     case 15: /* always */
715         /* Use the ALWAYS condition, which will fold early.
716          * It doesn't matter what we use for the value.  */
717         cond = TCG_COND_ALWAYS;
718         value = cpu_ZF;
719         goto no_invert;
720 
721     default:
722         fprintf(stderr, "Bad condition code 0x%x\n", cc);
723         abort();
724     }
725 
726     if (cc & 1) {
727         cond = tcg_invert_cond(cond);
728     }
729 
730  no_invert:
731     cmp->cond = cond;
732     cmp->value = value;
733 }
734 
735 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
736 {
737     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
738 }
739 
740 void arm_gen_test_cc(int cc, TCGLabel *label)
741 {
742     DisasCompare cmp;
743     arm_test_cc(&cmp, cc);
744     arm_jump_cc(&cmp, label);
745 }
746 
747 void gen_set_condexec(DisasContext *s)
748 {
749     if (s->condexec_mask) {
750         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
751 
752         store_cpu_field_constant(val, condexec_bits);
753     }
754 }
755 
756 void gen_update_pc(DisasContext *s, target_long diff)
757 {
758     gen_pc_plus_diff(s, cpu_R[15], diff);
759     s->pc_save = s->pc_curr + diff;
760 }
761 
762 /* Set PC and Thumb state from var.  var is marked as dead.  */
763 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
764 {
765     s->base.is_jmp = DISAS_JUMP;
766     tcg_gen_andi_i32(cpu_R[15], var, ~1);
767     tcg_gen_andi_i32(var, var, 1);
768     store_cpu_field(var, thumb);
769     s->pc_save = -1;
770 }
771 
772 /*
773  * Set PC and Thumb state from var. var is marked as dead.
774  * For M-profile CPUs, include logic to detect exception-return
775  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
776  * and BX reg, and no others, and happens only for code in Handler mode.
777  * The Security Extension also requires us to check for the FNC_RETURN
778  * which signals a function return from non-secure state; this can happen
779  * in both Handler and Thread mode.
780  * To avoid having to do multiple comparisons in inline generated code,
781  * we make the check we do here loose, so it will match for EXC_RETURN
782  * in Thread mode. For system emulation do_v7m_exception_exit() checks
783  * for these spurious cases and returns without doing anything (giving
784  * the same behaviour as for a branch to a non-magic address).
785  *
786  * In linux-user mode it is unclear what the right behaviour for an
787  * attempted FNC_RETURN should be, because in real hardware this will go
788  * directly to Secure code (ie not the Linux kernel) which will then treat
789  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
790  * attempt behave the way it would on a CPU without the security extension,
791  * which is to say "like a normal branch". That means we can simply treat
792  * all branches as normal with no magic address behaviour.
793  */
794 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
795 {
796     /* Generate the same code here as for a simple bx, but flag via
797      * s->base.is_jmp that we need to do the rest of the work later.
798      */
799     gen_bx(s, var);
800 #ifndef CONFIG_USER_ONLY
801     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
802         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
803         s->base.is_jmp = DISAS_BX_EXCRET;
804     }
805 #endif
806 }
807 
808 static inline void gen_bx_excret_final_code(DisasContext *s)
809 {
810     /* Generate the code to finish possible exception return and end the TB */
811     DisasLabel excret_label = gen_disas_label(s);
812     uint32_t min_magic;
813 
814     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
815         /* Covers FNC_RETURN and EXC_RETURN magic */
816         min_magic = FNC_RETURN_MIN_MAGIC;
817     } else {
818         /* EXC_RETURN magic only */
819         min_magic = EXC_RETURN_MIN_MAGIC;
820     }
821 
822     /* Is the new PC value in the magic range indicating exception return? */
823     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
824     /* No: end the TB as we would for a DISAS_JMP */
825     if (s->ss_active) {
826         gen_singlestep_exception(s);
827     } else {
828         tcg_gen_exit_tb(NULL, 0);
829     }
830     set_disas_label(s, excret_label);
831     /* Yes: this is an exception return.
832      * At this point in runtime env->regs[15] and env->thumb will hold
833      * the exception-return magic number, which do_v7m_exception_exit()
834      * will read. Nothing else will be able to see those values because
835      * the cpu-exec main loop guarantees that we will always go straight
836      * from raising the exception to the exception-handling code.
837      *
838      * gen_ss_advance(s) does nothing on M profile currently but
839      * calling it is conceptually the right thing as we have executed
840      * this instruction (compare SWI, HVC, SMC handling).
841      */
842     gen_ss_advance(s);
843     gen_exception_internal(EXCP_EXCEPTION_EXIT);
844 }
845 
846 static inline void gen_bxns(DisasContext *s, int rm)
847 {
848     TCGv_i32 var = load_reg(s, rm);
849 
850     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
851      * we need to sync state before calling it, but:
852      *  - we don't need to do gen_update_pc() because the bxns helper will
853      *    always set the PC itself
854      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
855      *    unless it's outside an IT block or the last insn in an IT block,
856      *    so we know that condexec == 0 (already set at the top of the TB)
857      *    is correct in the non-UNPREDICTABLE cases, and we can choose
858      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
859      */
860     gen_helper_v7m_bxns(cpu_env, var);
861     s->base.is_jmp = DISAS_EXIT;
862 }
863 
864 static inline void gen_blxns(DisasContext *s, int rm)
865 {
866     TCGv_i32 var = load_reg(s, rm);
867 
868     /* We don't need to sync condexec state, for the same reason as bxns.
869      * We do however need to set the PC, because the blxns helper reads it.
870      * The blxns helper may throw an exception.
871      */
872     gen_update_pc(s, curr_insn_len(s));
873     gen_helper_v7m_blxns(cpu_env, var);
874     s->base.is_jmp = DISAS_EXIT;
875 }
876 
877 /* Variant of store_reg which uses branch&exchange logic when storing
878    to r15 in ARM architecture v7 and above. The source must be a temporary
879    and will be marked as dead. */
880 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
881 {
882     if (reg == 15 && ENABLE_ARCH_7) {
883         gen_bx(s, var);
884     } else {
885         store_reg(s, reg, var);
886     }
887 }
888 
889 /* Variant of store_reg which uses branch&exchange logic when storing
890  * to r15 in ARM architecture v5T and above. This is used for storing
891  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
892  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
893 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
894 {
895     if (reg == 15 && ENABLE_ARCH_5) {
896         gen_bx_excret(s, var);
897     } else {
898         store_reg(s, reg, var);
899     }
900 }
901 
902 #ifdef CONFIG_USER_ONLY
903 #define IS_USER_ONLY 1
904 #else
905 #define IS_USER_ONLY 0
906 #endif
907 
908 MemOp pow2_align(unsigned i)
909 {
910     static const MemOp mop_align[] = {
911         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
912         /*
913          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
914          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
915          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
916          */
917         MO_ALIGN_16
918     };
919     g_assert(i < ARRAY_SIZE(mop_align));
920     return mop_align[i];
921 }
922 
923 /*
924  * Abstractions of "generate code to do a guest load/store for
925  * AArch32", where a vaddr is always 32 bits (and is zero
926  * extended if we're a 64 bit core) and  data is also
927  * 32 bits unless specifically doing a 64 bit access.
928  * These functions work like tcg_gen_qemu_{ld,st}* except
929  * that the address argument is TCGv_i32 rather than TCGv.
930  */
931 
932 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
933 {
934     TCGv addr = tcg_temp_new();
935     tcg_gen_extu_i32_tl(addr, a32);
936 
937     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
938     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
939         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
940     }
941     return addr;
942 }
943 
944 /*
945  * Internal routines are used for NEON cases where the endianness
946  * and/or alignment has already been taken into account and manipulated.
947  */
948 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
949                               TCGv_i32 a32, int index, MemOp opc)
950 {
951     TCGv addr = gen_aa32_addr(s, a32, opc);
952     tcg_gen_qemu_ld_i32(val, addr, index, opc);
953 }
954 
955 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
956                               TCGv_i32 a32, int index, MemOp opc)
957 {
958     TCGv addr = gen_aa32_addr(s, a32, opc);
959     tcg_gen_qemu_st_i32(val, addr, index, opc);
960 }
961 
962 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
963                               TCGv_i32 a32, int index, MemOp opc)
964 {
965     TCGv addr = gen_aa32_addr(s, a32, opc);
966 
967     tcg_gen_qemu_ld_i64(val, addr, index, opc);
968 
969     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
970     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
971         tcg_gen_rotri_i64(val, val, 32);
972     }
973 }
974 
975 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
976                               TCGv_i32 a32, int index, MemOp opc)
977 {
978     TCGv addr = gen_aa32_addr(s, a32, opc);
979 
980     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
981     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
982         TCGv_i64 tmp = tcg_temp_new_i64();
983         tcg_gen_rotri_i64(tmp, val, 32);
984         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
985     } else {
986         tcg_gen_qemu_st_i64(val, addr, index, opc);
987     }
988 }
989 
990 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
991                      int index, MemOp opc)
992 {
993     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
994 }
995 
996 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
997                      int index, MemOp opc)
998 {
999     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1000 }
1001 
1002 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003                      int index, MemOp opc)
1004 {
1005     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1006 }
1007 
1008 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1009                      int index, MemOp opc)
1010 {
1011     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1012 }
1013 
1014 #define DO_GEN_LD(SUFF, OPC)                                            \
1015     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1016                                          TCGv_i32 a32, int index)       \
1017     {                                                                   \
1018         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1019     }
1020 
1021 #define DO_GEN_ST(SUFF, OPC)                                            \
1022     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1023                                          TCGv_i32 a32, int index)       \
1024     {                                                                   \
1025         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1026     }
1027 
1028 static inline void gen_hvc(DisasContext *s, int imm16)
1029 {
1030     /* The pre HVC helper handles cases when HVC gets trapped
1031      * as an undefined insn by runtime configuration (ie before
1032      * the insn really executes).
1033      */
1034     gen_update_pc(s, 0);
1035     gen_helper_pre_hvc(cpu_env);
1036     /* Otherwise we will treat this as a real exception which
1037      * happens after execution of the insn. (The distinction matters
1038      * for the PC value reported to the exception handler and also
1039      * for single stepping.)
1040      */
1041     s->svc_imm = imm16;
1042     gen_update_pc(s, curr_insn_len(s));
1043     s->base.is_jmp = DISAS_HVC;
1044 }
1045 
1046 static inline void gen_smc(DisasContext *s)
1047 {
1048     /* As with HVC, we may take an exception either before or after
1049      * the insn executes.
1050      */
1051     gen_update_pc(s, 0);
1052     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1053     gen_update_pc(s, curr_insn_len(s));
1054     s->base.is_jmp = DISAS_SMC;
1055 }
1056 
1057 static void gen_exception_internal_insn(DisasContext *s, int excp)
1058 {
1059     gen_set_condexec(s);
1060     gen_update_pc(s, 0);
1061     gen_exception_internal(excp);
1062     s->base.is_jmp = DISAS_NORETURN;
1063 }
1064 
1065 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1066 {
1067     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1068                                           tcg_constant_i32(syndrome), tcg_el);
1069 }
1070 
1071 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1072 {
1073     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1074 }
1075 
1076 static void gen_exception(int excp, uint32_t syndrome)
1077 {
1078     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1079                                        tcg_constant_i32(syndrome));
1080 }
1081 
1082 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1083                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1084 {
1085     if (s->aarch64) {
1086         gen_a64_update_pc(s, pc_diff);
1087     } else {
1088         gen_set_condexec(s);
1089         gen_update_pc(s, pc_diff);
1090     }
1091     gen_exception_el_v(excp, syn, tcg_el);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094 
1095 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1096                            uint32_t syn, uint32_t target_el)
1097 {
1098     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1099                             tcg_constant_i32(target_el));
1100 }
1101 
1102 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1103                         int excp, uint32_t syn)
1104 {
1105     if (s->aarch64) {
1106         gen_a64_update_pc(s, pc_diff);
1107     } else {
1108         gen_set_condexec(s);
1109         gen_update_pc(s, pc_diff);
1110     }
1111     gen_exception(excp, syn);
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1116 {
1117     gen_set_condexec(s);
1118     gen_update_pc(s, 0);
1119     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1120     s->base.is_jmp = DISAS_NORETURN;
1121 }
1122 
1123 void unallocated_encoding(DisasContext *s)
1124 {
1125     /* Unallocated and reserved encodings are uncategorized */
1126     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1127 }
1128 
1129 /* Force a TB lookup after an instruction that changes the CPU state.  */
1130 void gen_lookup_tb(DisasContext *s)
1131 {
1132     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1133     s->base.is_jmp = DISAS_EXIT;
1134 }
1135 
1136 static inline void gen_hlt(DisasContext *s, int imm)
1137 {
1138     /* HLT. This has two purposes.
1139      * Architecturally, it is an external halting debug instruction.
1140      * Since QEMU doesn't implement external debug, we treat this as
1141      * it is required for halting debug disabled: it will UNDEF.
1142      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1143      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1144      * must trigger semihosting even for ARMv7 and earlier, where
1145      * HLT was an undefined encoding.
1146      * In system mode, we don't allow userspace access to
1147      * semihosting, to provide some semblance of security
1148      * (and for consistency with our 32-bit semihosting).
1149      */
1150     if (semihosting_enabled(s->current_el == 0) &&
1151         (imm == (s->thumb ? 0x3c : 0xf000))) {
1152         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1153         return;
1154     }
1155 
1156     unallocated_encoding(s);
1157 }
1158 
1159 /*
1160  * Return the offset of a "full" NEON Dreg.
1161  */
1162 long neon_full_reg_offset(unsigned reg)
1163 {
1164     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1165 }
1166 
1167 /*
1168  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1169  * where 0 is the least significant end of the register.
1170  */
1171 long neon_element_offset(int reg, int element, MemOp memop)
1172 {
1173     int element_size = 1 << (memop & MO_SIZE);
1174     int ofs = element * element_size;
1175 #if HOST_BIG_ENDIAN
1176     /*
1177      * Calculate the offset assuming fully little-endian,
1178      * then XOR to account for the order of the 8-byte units.
1179      */
1180     if (element_size < 8) {
1181         ofs ^= 8 - element_size;
1182     }
1183 #endif
1184     return neon_full_reg_offset(reg) + ofs;
1185 }
1186 
1187 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1188 long vfp_reg_offset(bool dp, unsigned reg)
1189 {
1190     if (dp) {
1191         return neon_element_offset(reg, 0, MO_64);
1192     } else {
1193         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1194     }
1195 }
1196 
1197 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1198 {
1199     long off = neon_element_offset(reg, ele, memop);
1200 
1201     switch (memop) {
1202     case MO_SB:
1203         tcg_gen_ld8s_i32(dest, cpu_env, off);
1204         break;
1205     case MO_UB:
1206         tcg_gen_ld8u_i32(dest, cpu_env, off);
1207         break;
1208     case MO_SW:
1209         tcg_gen_ld16s_i32(dest, cpu_env, off);
1210         break;
1211     case MO_UW:
1212         tcg_gen_ld16u_i32(dest, cpu_env, off);
1213         break;
1214     case MO_UL:
1215     case MO_SL:
1216         tcg_gen_ld_i32(dest, cpu_env, off);
1217         break;
1218     default:
1219         g_assert_not_reached();
1220     }
1221 }
1222 
1223 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1224 {
1225     long off = neon_element_offset(reg, ele, memop);
1226 
1227     switch (memop) {
1228     case MO_SL:
1229         tcg_gen_ld32s_i64(dest, cpu_env, off);
1230         break;
1231     case MO_UL:
1232         tcg_gen_ld32u_i64(dest, cpu_env, off);
1233         break;
1234     case MO_UQ:
1235         tcg_gen_ld_i64(dest, cpu_env, off);
1236         break;
1237     default:
1238         g_assert_not_reached();
1239     }
1240 }
1241 
1242 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1243 {
1244     long off = neon_element_offset(reg, ele, memop);
1245 
1246     switch (memop) {
1247     case MO_8:
1248         tcg_gen_st8_i32(src, cpu_env, off);
1249         break;
1250     case MO_16:
1251         tcg_gen_st16_i32(src, cpu_env, off);
1252         break;
1253     case MO_32:
1254         tcg_gen_st_i32(src, cpu_env, off);
1255         break;
1256     default:
1257         g_assert_not_reached();
1258     }
1259 }
1260 
1261 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1262 {
1263     long off = neon_element_offset(reg, ele, memop);
1264 
1265     switch (memop) {
1266     case MO_32:
1267         tcg_gen_st32_i64(src, cpu_env, off);
1268         break;
1269     case MO_64:
1270         tcg_gen_st_i64(src, cpu_env, off);
1271         break;
1272     default:
1273         g_assert_not_reached();
1274     }
1275 }
1276 
1277 #define ARM_CP_RW_BIT   (1 << 20)
1278 
1279 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1280 {
1281     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1282 }
1283 
1284 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1285 {
1286     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1287 }
1288 
1289 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1290 {
1291     TCGv_i32 var = tcg_temp_new_i32();
1292     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1293     return var;
1294 }
1295 
1296 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1297 {
1298     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1299 }
1300 
1301 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1302 {
1303     iwmmxt_store_reg(cpu_M0, rn);
1304 }
1305 
1306 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1307 {
1308     iwmmxt_load_reg(cpu_M0, rn);
1309 }
1310 
1311 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1312 {
1313     iwmmxt_load_reg(cpu_V1, rn);
1314     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1315 }
1316 
1317 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1318 {
1319     iwmmxt_load_reg(cpu_V1, rn);
1320     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1321 }
1322 
1323 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1324 {
1325     iwmmxt_load_reg(cpu_V1, rn);
1326     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1327 }
1328 
1329 #define IWMMXT_OP(name) \
1330 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1331 { \
1332     iwmmxt_load_reg(cpu_V1, rn); \
1333     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1334 }
1335 
1336 #define IWMMXT_OP_ENV(name) \
1337 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1338 { \
1339     iwmmxt_load_reg(cpu_V1, rn); \
1340     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1341 }
1342 
1343 #define IWMMXT_OP_ENV_SIZE(name) \
1344 IWMMXT_OP_ENV(name##b) \
1345 IWMMXT_OP_ENV(name##w) \
1346 IWMMXT_OP_ENV(name##l)
1347 
1348 #define IWMMXT_OP_ENV1(name) \
1349 static inline void gen_op_iwmmxt_##name##_M0(void) \
1350 { \
1351     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1352 }
1353 
1354 IWMMXT_OP(maddsq)
1355 IWMMXT_OP(madduq)
1356 IWMMXT_OP(sadb)
1357 IWMMXT_OP(sadw)
1358 IWMMXT_OP(mulslw)
1359 IWMMXT_OP(mulshw)
1360 IWMMXT_OP(mululw)
1361 IWMMXT_OP(muluhw)
1362 IWMMXT_OP(macsw)
1363 IWMMXT_OP(macuw)
1364 
1365 IWMMXT_OP_ENV_SIZE(unpackl)
1366 IWMMXT_OP_ENV_SIZE(unpackh)
1367 
1368 IWMMXT_OP_ENV1(unpacklub)
1369 IWMMXT_OP_ENV1(unpackluw)
1370 IWMMXT_OP_ENV1(unpacklul)
1371 IWMMXT_OP_ENV1(unpackhub)
1372 IWMMXT_OP_ENV1(unpackhuw)
1373 IWMMXT_OP_ENV1(unpackhul)
1374 IWMMXT_OP_ENV1(unpacklsb)
1375 IWMMXT_OP_ENV1(unpacklsw)
1376 IWMMXT_OP_ENV1(unpacklsl)
1377 IWMMXT_OP_ENV1(unpackhsb)
1378 IWMMXT_OP_ENV1(unpackhsw)
1379 IWMMXT_OP_ENV1(unpackhsl)
1380 
1381 IWMMXT_OP_ENV_SIZE(cmpeq)
1382 IWMMXT_OP_ENV_SIZE(cmpgtu)
1383 IWMMXT_OP_ENV_SIZE(cmpgts)
1384 
1385 IWMMXT_OP_ENV_SIZE(mins)
1386 IWMMXT_OP_ENV_SIZE(minu)
1387 IWMMXT_OP_ENV_SIZE(maxs)
1388 IWMMXT_OP_ENV_SIZE(maxu)
1389 
1390 IWMMXT_OP_ENV_SIZE(subn)
1391 IWMMXT_OP_ENV_SIZE(addn)
1392 IWMMXT_OP_ENV_SIZE(subu)
1393 IWMMXT_OP_ENV_SIZE(addu)
1394 IWMMXT_OP_ENV_SIZE(subs)
1395 IWMMXT_OP_ENV_SIZE(adds)
1396 
1397 IWMMXT_OP_ENV(avgb0)
1398 IWMMXT_OP_ENV(avgb1)
1399 IWMMXT_OP_ENV(avgw0)
1400 IWMMXT_OP_ENV(avgw1)
1401 
1402 IWMMXT_OP_ENV(packuw)
1403 IWMMXT_OP_ENV(packul)
1404 IWMMXT_OP_ENV(packuq)
1405 IWMMXT_OP_ENV(packsw)
1406 IWMMXT_OP_ENV(packsl)
1407 IWMMXT_OP_ENV(packsq)
1408 
1409 static void gen_op_iwmmxt_set_mup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 2);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_set_cup(void)
1418 {
1419     TCGv_i32 tmp;
1420     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1421     tcg_gen_ori_i32(tmp, tmp, 1);
1422     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1423 }
1424 
1425 static void gen_op_iwmmxt_setpsr_nz(void)
1426 {
1427     TCGv_i32 tmp = tcg_temp_new_i32();
1428     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1429     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1430 }
1431 
1432 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1433 {
1434     iwmmxt_load_reg(cpu_V1, rn);
1435     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1436     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1437 }
1438 
1439 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1440                                      TCGv_i32 dest)
1441 {
1442     int rd;
1443     uint32_t offset;
1444     TCGv_i32 tmp;
1445 
1446     rd = (insn >> 16) & 0xf;
1447     tmp = load_reg(s, rd);
1448 
1449     offset = (insn & 0xff) << ((insn >> 7) & 2);
1450     if (insn & (1 << 24)) {
1451         /* Pre indexed */
1452         if (insn & (1 << 23))
1453             tcg_gen_addi_i32(tmp, tmp, offset);
1454         else
1455             tcg_gen_addi_i32(tmp, tmp, -offset);
1456         tcg_gen_mov_i32(dest, tmp);
1457         if (insn & (1 << 21)) {
1458             store_reg(s, rd, tmp);
1459         }
1460     } else if (insn & (1 << 21)) {
1461         /* Post indexed */
1462         tcg_gen_mov_i32(dest, tmp);
1463         if (insn & (1 << 23))
1464             tcg_gen_addi_i32(tmp, tmp, offset);
1465         else
1466             tcg_gen_addi_i32(tmp, tmp, -offset);
1467         store_reg(s, rd, tmp);
1468     } else if (!(insn & (1 << 23)))
1469         return 1;
1470     return 0;
1471 }
1472 
1473 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1474 {
1475     int rd = (insn >> 0) & 0xf;
1476     TCGv_i32 tmp;
1477 
1478     if (insn & (1 << 8)) {
1479         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1480             return 1;
1481         } else {
1482             tmp = iwmmxt_load_creg(rd);
1483         }
1484     } else {
1485         tmp = tcg_temp_new_i32();
1486         iwmmxt_load_reg(cpu_V0, rd);
1487         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1488     }
1489     tcg_gen_andi_i32(tmp, tmp, mask);
1490     tcg_gen_mov_i32(dest, tmp);
1491     return 0;
1492 }
1493 
1494 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1495    (ie. an undefined instruction).  */
1496 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1497 {
1498     int rd, wrd;
1499     int rdhi, rdlo, rd0, rd1, i;
1500     TCGv_i32 addr;
1501     TCGv_i32 tmp, tmp2, tmp3;
1502 
1503     if ((insn & 0x0e000e00) == 0x0c000000) {
1504         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1505             wrd = insn & 0xf;
1506             rdlo = (insn >> 12) & 0xf;
1507             rdhi = (insn >> 16) & 0xf;
1508             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1509                 iwmmxt_load_reg(cpu_V0, wrd);
1510                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1511                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1512             } else {                                    /* TMCRR */
1513                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1514                 iwmmxt_store_reg(cpu_V0, wrd);
1515                 gen_op_iwmmxt_set_mup();
1516             }
1517             return 0;
1518         }
1519 
1520         wrd = (insn >> 12) & 0xf;
1521         addr = tcg_temp_new_i32();
1522         if (gen_iwmmxt_address(s, insn, addr)) {
1523             return 1;
1524         }
1525         if (insn & ARM_CP_RW_BIT) {
1526             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1527                 tmp = tcg_temp_new_i32();
1528                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1529                 iwmmxt_store_creg(wrd, tmp);
1530             } else {
1531                 i = 1;
1532                 if (insn & (1 << 8)) {
1533                     if (insn & (1 << 22)) {             /* WLDRD */
1534                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1535                         i = 0;
1536                     } else {                            /* WLDRW wRd */
1537                         tmp = tcg_temp_new_i32();
1538                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1539                     }
1540                 } else {
1541                     tmp = tcg_temp_new_i32();
1542                     if (insn & (1 << 22)) {             /* WLDRH */
1543                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1544                     } else {                            /* WLDRB */
1545                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1546                     }
1547                 }
1548                 if (i) {
1549                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1550                 }
1551                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1552             }
1553         } else {
1554             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1555                 tmp = iwmmxt_load_creg(wrd);
1556                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1557             } else {
1558                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1559                 tmp = tcg_temp_new_i32();
1560                 if (insn & (1 << 8)) {
1561                     if (insn & (1 << 22)) {             /* WSTRD */
1562                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1563                     } else {                            /* WSTRW wRd */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 } else {
1568                     if (insn & (1 << 22)) {             /* WSTRH */
1569                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1570                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1571                     } else {                            /* WSTRB */
1572                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1573                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1574                     }
1575                 }
1576             }
1577         }
1578         return 0;
1579     }
1580 
1581     if ((insn & 0x0f000000) != 0x0e000000)
1582         return 1;
1583 
1584     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1585     case 0x000:                                                 /* WOR */
1586         wrd = (insn >> 12) & 0xf;
1587         rd0 = (insn >> 0) & 0xf;
1588         rd1 = (insn >> 16) & 0xf;
1589         gen_op_iwmmxt_movq_M0_wRn(rd0);
1590         gen_op_iwmmxt_orq_M0_wRn(rd1);
1591         gen_op_iwmmxt_setpsr_nz();
1592         gen_op_iwmmxt_movq_wRn_M0(wrd);
1593         gen_op_iwmmxt_set_mup();
1594         gen_op_iwmmxt_set_cup();
1595         break;
1596     case 0x011:                                                 /* TMCR */
1597         if (insn & 0xf)
1598             return 1;
1599         rd = (insn >> 12) & 0xf;
1600         wrd = (insn >> 16) & 0xf;
1601         switch (wrd) {
1602         case ARM_IWMMXT_wCID:
1603         case ARM_IWMMXT_wCASF:
1604             break;
1605         case ARM_IWMMXT_wCon:
1606             gen_op_iwmmxt_set_cup();
1607             /* Fall through.  */
1608         case ARM_IWMMXT_wCSSF:
1609             tmp = iwmmxt_load_creg(wrd);
1610             tmp2 = load_reg(s, rd);
1611             tcg_gen_andc_i32(tmp, tmp, tmp2);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         case ARM_IWMMXT_wCGR0:
1615         case ARM_IWMMXT_wCGR1:
1616         case ARM_IWMMXT_wCGR2:
1617         case ARM_IWMMXT_wCGR3:
1618             gen_op_iwmmxt_set_cup();
1619             tmp = load_reg(s, rd);
1620             iwmmxt_store_creg(wrd, tmp);
1621             break;
1622         default:
1623             return 1;
1624         }
1625         break;
1626     case 0x100:                                                 /* WXOR */
1627         wrd = (insn >> 12) & 0xf;
1628         rd0 = (insn >> 0) & 0xf;
1629         rd1 = (insn >> 16) & 0xf;
1630         gen_op_iwmmxt_movq_M0_wRn(rd0);
1631         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1632         gen_op_iwmmxt_setpsr_nz();
1633         gen_op_iwmmxt_movq_wRn_M0(wrd);
1634         gen_op_iwmmxt_set_mup();
1635         gen_op_iwmmxt_set_cup();
1636         break;
1637     case 0x111:                                                 /* TMRC */
1638         if (insn & 0xf)
1639             return 1;
1640         rd = (insn >> 12) & 0xf;
1641         wrd = (insn >> 16) & 0xf;
1642         tmp = iwmmxt_load_creg(wrd);
1643         store_reg(s, rd, tmp);
1644         break;
1645     case 0x300:                                                 /* WANDN */
1646         wrd = (insn >> 12) & 0xf;
1647         rd0 = (insn >> 0) & 0xf;
1648         rd1 = (insn >> 16) & 0xf;
1649         gen_op_iwmmxt_movq_M0_wRn(rd0);
1650         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1651         gen_op_iwmmxt_andq_M0_wRn(rd1);
1652         gen_op_iwmmxt_setpsr_nz();
1653         gen_op_iwmmxt_movq_wRn_M0(wrd);
1654         gen_op_iwmmxt_set_mup();
1655         gen_op_iwmmxt_set_cup();
1656         break;
1657     case 0x200:                                                 /* WAND */
1658         wrd = (insn >> 12) & 0xf;
1659         rd0 = (insn >> 0) & 0xf;
1660         rd1 = (insn >> 16) & 0xf;
1661         gen_op_iwmmxt_movq_M0_wRn(rd0);
1662         gen_op_iwmmxt_andq_M0_wRn(rd1);
1663         gen_op_iwmmxt_setpsr_nz();
1664         gen_op_iwmmxt_movq_wRn_M0(wrd);
1665         gen_op_iwmmxt_set_mup();
1666         gen_op_iwmmxt_set_cup();
1667         break;
1668     case 0x810: case 0xa10:                             /* WMADD */
1669         wrd = (insn >> 12) & 0xf;
1670         rd0 = (insn >> 0) & 0xf;
1671         rd1 = (insn >> 16) & 0xf;
1672         gen_op_iwmmxt_movq_M0_wRn(rd0);
1673         if (insn & (1 << 21))
1674             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1675         else
1676             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1677         gen_op_iwmmxt_movq_wRn_M0(wrd);
1678         gen_op_iwmmxt_set_mup();
1679         break;
1680     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1681         wrd = (insn >> 12) & 0xf;
1682         rd0 = (insn >> 16) & 0xf;
1683         rd1 = (insn >> 0) & 0xf;
1684         gen_op_iwmmxt_movq_M0_wRn(rd0);
1685         switch ((insn >> 22) & 3) {
1686         case 0:
1687             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1688             break;
1689         case 1:
1690             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1691             break;
1692         case 2:
1693             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1694             break;
1695         case 3:
1696             return 1;
1697         }
1698         gen_op_iwmmxt_movq_wRn_M0(wrd);
1699         gen_op_iwmmxt_set_mup();
1700         gen_op_iwmmxt_set_cup();
1701         break;
1702     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1703         wrd = (insn >> 12) & 0xf;
1704         rd0 = (insn >> 16) & 0xf;
1705         rd1 = (insn >> 0) & 0xf;
1706         gen_op_iwmmxt_movq_M0_wRn(rd0);
1707         switch ((insn >> 22) & 3) {
1708         case 0:
1709             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1710             break;
1711         case 1:
1712             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1713             break;
1714         case 2:
1715             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1716             break;
1717         case 3:
1718             return 1;
1719         }
1720         gen_op_iwmmxt_movq_wRn_M0(wrd);
1721         gen_op_iwmmxt_set_mup();
1722         gen_op_iwmmxt_set_cup();
1723         break;
1724     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1725         wrd = (insn >> 12) & 0xf;
1726         rd0 = (insn >> 16) & 0xf;
1727         rd1 = (insn >> 0) & 0xf;
1728         gen_op_iwmmxt_movq_M0_wRn(rd0);
1729         if (insn & (1 << 22))
1730             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1731         else
1732             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1733         if (!(insn & (1 << 20)))
1734             gen_op_iwmmxt_addl_M0_wRn(wrd);
1735         gen_op_iwmmxt_movq_wRn_M0(wrd);
1736         gen_op_iwmmxt_set_mup();
1737         break;
1738     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1739         wrd = (insn >> 12) & 0xf;
1740         rd0 = (insn >> 16) & 0xf;
1741         rd1 = (insn >> 0) & 0xf;
1742         gen_op_iwmmxt_movq_M0_wRn(rd0);
1743         if (insn & (1 << 21)) {
1744             if (insn & (1 << 20))
1745                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1746             else
1747                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1748         } else {
1749             if (insn & (1 << 20))
1750                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1751             else
1752                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1753         }
1754         gen_op_iwmmxt_movq_wRn_M0(wrd);
1755         gen_op_iwmmxt_set_mup();
1756         break;
1757     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1758         wrd = (insn >> 12) & 0xf;
1759         rd0 = (insn >> 16) & 0xf;
1760         rd1 = (insn >> 0) & 0xf;
1761         gen_op_iwmmxt_movq_M0_wRn(rd0);
1762         if (insn & (1 << 21))
1763             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1764         else
1765             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1766         if (!(insn & (1 << 20))) {
1767             iwmmxt_load_reg(cpu_V1, wrd);
1768             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1769         }
1770         gen_op_iwmmxt_movq_wRn_M0(wrd);
1771         gen_op_iwmmxt_set_mup();
1772         break;
1773     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         switch ((insn >> 22) & 3) {
1779         case 0:
1780             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1781             break;
1782         case 1:
1783             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1784             break;
1785         case 2:
1786             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1787             break;
1788         case 3:
1789             return 1;
1790         }
1791         gen_op_iwmmxt_movq_wRn_M0(wrd);
1792         gen_op_iwmmxt_set_mup();
1793         gen_op_iwmmxt_set_cup();
1794         break;
1795     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1796         wrd = (insn >> 12) & 0xf;
1797         rd0 = (insn >> 16) & 0xf;
1798         rd1 = (insn >> 0) & 0xf;
1799         gen_op_iwmmxt_movq_M0_wRn(rd0);
1800         if (insn & (1 << 22)) {
1801             if (insn & (1 << 20))
1802                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1803             else
1804                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1805         } else {
1806             if (insn & (1 << 20))
1807                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1808             else
1809                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1810         }
1811         gen_op_iwmmxt_movq_wRn_M0(wrd);
1812         gen_op_iwmmxt_set_mup();
1813         gen_op_iwmmxt_set_cup();
1814         break;
1815     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1816         wrd = (insn >> 12) & 0xf;
1817         rd0 = (insn >> 16) & 0xf;
1818         rd1 = (insn >> 0) & 0xf;
1819         gen_op_iwmmxt_movq_M0_wRn(rd0);
1820         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1821         tcg_gen_andi_i32(tmp, tmp, 7);
1822         iwmmxt_load_reg(cpu_V1, rd1);
1823         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1824         gen_op_iwmmxt_movq_wRn_M0(wrd);
1825         gen_op_iwmmxt_set_mup();
1826         break;
1827     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1828         if (((insn >> 6) & 3) == 3)
1829             return 1;
1830         rd = (insn >> 12) & 0xf;
1831         wrd = (insn >> 16) & 0xf;
1832         tmp = load_reg(s, rd);
1833         gen_op_iwmmxt_movq_M0_wRn(wrd);
1834         switch ((insn >> 6) & 3) {
1835         case 0:
1836             tmp2 = tcg_constant_i32(0xff);
1837             tmp3 = tcg_constant_i32((insn & 7) << 3);
1838             break;
1839         case 1:
1840             tmp2 = tcg_constant_i32(0xffff);
1841             tmp3 = tcg_constant_i32((insn & 3) << 4);
1842             break;
1843         case 2:
1844             tmp2 = tcg_constant_i32(0xffffffff);
1845             tmp3 = tcg_constant_i32((insn & 1) << 5);
1846             break;
1847         default:
1848             g_assert_not_reached();
1849         }
1850         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1851         gen_op_iwmmxt_movq_wRn_M0(wrd);
1852         gen_op_iwmmxt_set_mup();
1853         break;
1854     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1855         rd = (insn >> 12) & 0xf;
1856         wrd = (insn >> 16) & 0xf;
1857         if (rd == 15 || ((insn >> 22) & 3) == 3)
1858             return 1;
1859         gen_op_iwmmxt_movq_M0_wRn(wrd);
1860         tmp = tcg_temp_new_i32();
1861         switch ((insn >> 22) & 3) {
1862         case 0:
1863             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1864             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1865             if (insn & 8) {
1866                 tcg_gen_ext8s_i32(tmp, tmp);
1867             } else {
1868                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1869             }
1870             break;
1871         case 1:
1872             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1873             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1874             if (insn & 8) {
1875                 tcg_gen_ext16s_i32(tmp, tmp);
1876             } else {
1877                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1878             }
1879             break;
1880         case 2:
1881             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1882             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1883             break;
1884         }
1885         store_reg(s, rd, tmp);
1886         break;
1887     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1888         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1889             return 1;
1890         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1891         switch ((insn >> 22) & 3) {
1892         case 0:
1893             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1894             break;
1895         case 1:
1896             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1897             break;
1898         case 2:
1899             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1900             break;
1901         }
1902         tcg_gen_shli_i32(tmp, tmp, 28);
1903         gen_set_nzcv(tmp);
1904         break;
1905     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1906         if (((insn >> 6) & 3) == 3)
1907             return 1;
1908         rd = (insn >> 12) & 0xf;
1909         wrd = (insn >> 16) & 0xf;
1910         tmp = load_reg(s, rd);
1911         switch ((insn >> 6) & 3) {
1912         case 0:
1913             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1914             break;
1915         case 1:
1916             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1917             break;
1918         case 2:
1919             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1920             break;
1921         }
1922         gen_op_iwmmxt_movq_wRn_M0(wrd);
1923         gen_op_iwmmxt_set_mup();
1924         break;
1925     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1926         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1927             return 1;
1928         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1929         tmp2 = tcg_temp_new_i32();
1930         tcg_gen_mov_i32(tmp2, tmp);
1931         switch ((insn >> 22) & 3) {
1932         case 0:
1933             for (i = 0; i < 7; i ++) {
1934                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1935                 tcg_gen_and_i32(tmp, tmp, tmp2);
1936             }
1937             break;
1938         case 1:
1939             for (i = 0; i < 3; i ++) {
1940                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1941                 tcg_gen_and_i32(tmp, tmp, tmp2);
1942             }
1943             break;
1944         case 2:
1945             tcg_gen_shli_i32(tmp2, tmp2, 16);
1946             tcg_gen_and_i32(tmp, tmp, tmp2);
1947             break;
1948         }
1949         gen_set_nzcv(tmp);
1950         break;
1951     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1952         wrd = (insn >> 12) & 0xf;
1953         rd0 = (insn >> 16) & 0xf;
1954         gen_op_iwmmxt_movq_M0_wRn(rd0);
1955         switch ((insn >> 22) & 3) {
1956         case 0:
1957             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1958             break;
1959         case 1:
1960             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1961             break;
1962         case 2:
1963             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1964             break;
1965         case 3:
1966             return 1;
1967         }
1968         gen_op_iwmmxt_movq_wRn_M0(wrd);
1969         gen_op_iwmmxt_set_mup();
1970         break;
1971     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1972         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1973             return 1;
1974         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1975         tmp2 = tcg_temp_new_i32();
1976         tcg_gen_mov_i32(tmp2, tmp);
1977         switch ((insn >> 22) & 3) {
1978         case 0:
1979             for (i = 0; i < 7; i ++) {
1980                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1981                 tcg_gen_or_i32(tmp, tmp, tmp2);
1982             }
1983             break;
1984         case 1:
1985             for (i = 0; i < 3; i ++) {
1986                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1987                 tcg_gen_or_i32(tmp, tmp, tmp2);
1988             }
1989             break;
1990         case 2:
1991             tcg_gen_shli_i32(tmp2, tmp2, 16);
1992             tcg_gen_or_i32(tmp, tmp, tmp2);
1993             break;
1994         }
1995         gen_set_nzcv(tmp);
1996         break;
1997     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1998         rd = (insn >> 12) & 0xf;
1999         rd0 = (insn >> 16) & 0xf;
2000         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2001             return 1;
2002         gen_op_iwmmxt_movq_M0_wRn(rd0);
2003         tmp = tcg_temp_new_i32();
2004         switch ((insn >> 22) & 3) {
2005         case 0:
2006             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2007             break;
2008         case 1:
2009             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2010             break;
2011         case 2:
2012             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2013             break;
2014         }
2015         store_reg(s, rd, tmp);
2016         break;
2017     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2018     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2019         wrd = (insn >> 12) & 0xf;
2020         rd0 = (insn >> 16) & 0xf;
2021         rd1 = (insn >> 0) & 0xf;
2022         gen_op_iwmmxt_movq_M0_wRn(rd0);
2023         switch ((insn >> 22) & 3) {
2024         case 0:
2025             if (insn & (1 << 21))
2026                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2027             else
2028                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2029             break;
2030         case 1:
2031             if (insn & (1 << 21))
2032                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2033             else
2034                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2035             break;
2036         case 2:
2037             if (insn & (1 << 21))
2038                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2039             else
2040                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2041             break;
2042         case 3:
2043             return 1;
2044         }
2045         gen_op_iwmmxt_movq_wRn_M0(wrd);
2046         gen_op_iwmmxt_set_mup();
2047         gen_op_iwmmxt_set_cup();
2048         break;
2049     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2050     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2051         wrd = (insn >> 12) & 0xf;
2052         rd0 = (insn >> 16) & 0xf;
2053         gen_op_iwmmxt_movq_M0_wRn(rd0);
2054         switch ((insn >> 22) & 3) {
2055         case 0:
2056             if (insn & (1 << 21))
2057                 gen_op_iwmmxt_unpacklsb_M0();
2058             else
2059                 gen_op_iwmmxt_unpacklub_M0();
2060             break;
2061         case 1:
2062             if (insn & (1 << 21))
2063                 gen_op_iwmmxt_unpacklsw_M0();
2064             else
2065                 gen_op_iwmmxt_unpackluw_M0();
2066             break;
2067         case 2:
2068             if (insn & (1 << 21))
2069                 gen_op_iwmmxt_unpacklsl_M0();
2070             else
2071                 gen_op_iwmmxt_unpacklul_M0();
2072             break;
2073         case 3:
2074             return 1;
2075         }
2076         gen_op_iwmmxt_movq_wRn_M0(wrd);
2077         gen_op_iwmmxt_set_mup();
2078         gen_op_iwmmxt_set_cup();
2079         break;
2080     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2081     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2082         wrd = (insn >> 12) & 0xf;
2083         rd0 = (insn >> 16) & 0xf;
2084         gen_op_iwmmxt_movq_M0_wRn(rd0);
2085         switch ((insn >> 22) & 3) {
2086         case 0:
2087             if (insn & (1 << 21))
2088                 gen_op_iwmmxt_unpackhsb_M0();
2089             else
2090                 gen_op_iwmmxt_unpackhub_M0();
2091             break;
2092         case 1:
2093             if (insn & (1 << 21))
2094                 gen_op_iwmmxt_unpackhsw_M0();
2095             else
2096                 gen_op_iwmmxt_unpackhuw_M0();
2097             break;
2098         case 2:
2099             if (insn & (1 << 21))
2100                 gen_op_iwmmxt_unpackhsl_M0();
2101             else
2102                 gen_op_iwmmxt_unpackhul_M0();
2103             break;
2104         case 3:
2105             return 1;
2106         }
2107         gen_op_iwmmxt_movq_wRn_M0(wrd);
2108         gen_op_iwmmxt_set_mup();
2109         gen_op_iwmmxt_set_cup();
2110         break;
2111     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2112     case 0x214: case 0x614: case 0xa14: case 0xe14:
2113         if (((insn >> 22) & 3) == 0)
2114             return 1;
2115         wrd = (insn >> 12) & 0xf;
2116         rd0 = (insn >> 16) & 0xf;
2117         gen_op_iwmmxt_movq_M0_wRn(rd0);
2118         tmp = tcg_temp_new_i32();
2119         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2120             return 1;
2121         }
2122         switch ((insn >> 22) & 3) {
2123         case 1:
2124             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2125             break;
2126         case 2:
2127             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2128             break;
2129         case 3:
2130             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2131             break;
2132         }
2133         gen_op_iwmmxt_movq_wRn_M0(wrd);
2134         gen_op_iwmmxt_set_mup();
2135         gen_op_iwmmxt_set_cup();
2136         break;
2137     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2138     case 0x014: case 0x414: case 0x814: case 0xc14:
2139         if (((insn >> 22) & 3) == 0)
2140             return 1;
2141         wrd = (insn >> 12) & 0xf;
2142         rd0 = (insn >> 16) & 0xf;
2143         gen_op_iwmmxt_movq_M0_wRn(rd0);
2144         tmp = tcg_temp_new_i32();
2145         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2146             return 1;
2147         }
2148         switch ((insn >> 22) & 3) {
2149         case 1:
2150             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2151             break;
2152         case 2:
2153             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2154             break;
2155         case 3:
2156             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2157             break;
2158         }
2159         gen_op_iwmmxt_movq_wRn_M0(wrd);
2160         gen_op_iwmmxt_set_mup();
2161         gen_op_iwmmxt_set_cup();
2162         break;
2163     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2164     case 0x114: case 0x514: case 0x914: case 0xd14:
2165         if (((insn >> 22) & 3) == 0)
2166             return 1;
2167         wrd = (insn >> 12) & 0xf;
2168         rd0 = (insn >> 16) & 0xf;
2169         gen_op_iwmmxt_movq_M0_wRn(rd0);
2170         tmp = tcg_temp_new_i32();
2171         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2172             return 1;
2173         }
2174         switch ((insn >> 22) & 3) {
2175         case 1:
2176             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2177             break;
2178         case 2:
2179             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2180             break;
2181         case 3:
2182             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2183             break;
2184         }
2185         gen_op_iwmmxt_movq_wRn_M0(wrd);
2186         gen_op_iwmmxt_set_mup();
2187         gen_op_iwmmxt_set_cup();
2188         break;
2189     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2190     case 0x314: case 0x714: case 0xb14: case 0xf14:
2191         if (((insn >> 22) & 3) == 0)
2192             return 1;
2193         wrd = (insn >> 12) & 0xf;
2194         rd0 = (insn >> 16) & 0xf;
2195         gen_op_iwmmxt_movq_M0_wRn(rd0);
2196         tmp = tcg_temp_new_i32();
2197         switch ((insn >> 22) & 3) {
2198         case 1:
2199             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2200                 return 1;
2201             }
2202             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2203             break;
2204         case 2:
2205             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2206                 return 1;
2207             }
2208             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2209             break;
2210         case 3:
2211             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2212                 return 1;
2213             }
2214             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2215             break;
2216         }
2217         gen_op_iwmmxt_movq_wRn_M0(wrd);
2218         gen_op_iwmmxt_set_mup();
2219         gen_op_iwmmxt_set_cup();
2220         break;
2221     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2222     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2223         wrd = (insn >> 12) & 0xf;
2224         rd0 = (insn >> 16) & 0xf;
2225         rd1 = (insn >> 0) & 0xf;
2226         gen_op_iwmmxt_movq_M0_wRn(rd0);
2227         switch ((insn >> 22) & 3) {
2228         case 0:
2229             if (insn & (1 << 21))
2230                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2231             else
2232                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2233             break;
2234         case 1:
2235             if (insn & (1 << 21))
2236                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2237             else
2238                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2239             break;
2240         case 2:
2241             if (insn & (1 << 21))
2242                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2243             else
2244                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2245             break;
2246         case 3:
2247             return 1;
2248         }
2249         gen_op_iwmmxt_movq_wRn_M0(wrd);
2250         gen_op_iwmmxt_set_mup();
2251         break;
2252     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2253     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2254         wrd = (insn >> 12) & 0xf;
2255         rd0 = (insn >> 16) & 0xf;
2256         rd1 = (insn >> 0) & 0xf;
2257         gen_op_iwmmxt_movq_M0_wRn(rd0);
2258         switch ((insn >> 22) & 3) {
2259         case 0:
2260             if (insn & (1 << 21))
2261                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2262             else
2263                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2264             break;
2265         case 1:
2266             if (insn & (1 << 21))
2267                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2268             else
2269                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2270             break;
2271         case 2:
2272             if (insn & (1 << 21))
2273                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2274             else
2275                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2276             break;
2277         case 3:
2278             return 1;
2279         }
2280         gen_op_iwmmxt_movq_wRn_M0(wrd);
2281         gen_op_iwmmxt_set_mup();
2282         break;
2283     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2284     case 0x402: case 0x502: case 0x602: case 0x702:
2285         wrd = (insn >> 12) & 0xf;
2286         rd0 = (insn >> 16) & 0xf;
2287         rd1 = (insn >> 0) & 0xf;
2288         gen_op_iwmmxt_movq_M0_wRn(rd0);
2289         iwmmxt_load_reg(cpu_V1, rd1);
2290         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2291                                 tcg_constant_i32((insn >> 20) & 3));
2292         gen_op_iwmmxt_movq_wRn_M0(wrd);
2293         gen_op_iwmmxt_set_mup();
2294         break;
2295     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2296     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2297     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2298     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2299         wrd = (insn >> 12) & 0xf;
2300         rd0 = (insn >> 16) & 0xf;
2301         rd1 = (insn >> 0) & 0xf;
2302         gen_op_iwmmxt_movq_M0_wRn(rd0);
2303         switch ((insn >> 20) & 0xf) {
2304         case 0x0:
2305             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2306             break;
2307         case 0x1:
2308             gen_op_iwmmxt_subub_M0_wRn(rd1);
2309             break;
2310         case 0x3:
2311             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2312             break;
2313         case 0x4:
2314             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2315             break;
2316         case 0x5:
2317             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2318             break;
2319         case 0x7:
2320             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2321             break;
2322         case 0x8:
2323             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2324             break;
2325         case 0x9:
2326             gen_op_iwmmxt_subul_M0_wRn(rd1);
2327             break;
2328         case 0xb:
2329             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2330             break;
2331         default:
2332             return 1;
2333         }
2334         gen_op_iwmmxt_movq_wRn_M0(wrd);
2335         gen_op_iwmmxt_set_mup();
2336         gen_op_iwmmxt_set_cup();
2337         break;
2338     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2339     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2340     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2341     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2342         wrd = (insn >> 12) & 0xf;
2343         rd0 = (insn >> 16) & 0xf;
2344         gen_op_iwmmxt_movq_M0_wRn(rd0);
2345         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2346         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2347         gen_op_iwmmxt_movq_wRn_M0(wrd);
2348         gen_op_iwmmxt_set_mup();
2349         gen_op_iwmmxt_set_cup();
2350         break;
2351     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2352     case 0x418: case 0x518: case 0x618: case 0x718:
2353     case 0x818: case 0x918: case 0xa18: case 0xb18:
2354     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         switch ((insn >> 20) & 0xf) {
2360         case 0x0:
2361             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2362             break;
2363         case 0x1:
2364             gen_op_iwmmxt_addub_M0_wRn(rd1);
2365             break;
2366         case 0x3:
2367             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2368             break;
2369         case 0x4:
2370             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2371             break;
2372         case 0x5:
2373             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2374             break;
2375         case 0x7:
2376             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2377             break;
2378         case 0x8:
2379             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2380             break;
2381         case 0x9:
2382             gen_op_iwmmxt_addul_M0_wRn(rd1);
2383             break;
2384         case 0xb:
2385             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2386             break;
2387         default:
2388             return 1;
2389         }
2390         gen_op_iwmmxt_movq_wRn_M0(wrd);
2391         gen_op_iwmmxt_set_mup();
2392         gen_op_iwmmxt_set_cup();
2393         break;
2394     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2395     case 0x408: case 0x508: case 0x608: case 0x708:
2396     case 0x808: case 0x908: case 0xa08: case 0xb08:
2397     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2398         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2399             return 1;
2400         wrd = (insn >> 12) & 0xf;
2401         rd0 = (insn >> 16) & 0xf;
2402         rd1 = (insn >> 0) & 0xf;
2403         gen_op_iwmmxt_movq_M0_wRn(rd0);
2404         switch ((insn >> 22) & 3) {
2405         case 1:
2406             if (insn & (1 << 21))
2407                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2408             else
2409                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2410             break;
2411         case 2:
2412             if (insn & (1 << 21))
2413                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2414             else
2415                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2416             break;
2417         case 3:
2418             if (insn & (1 << 21))
2419                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2420             else
2421                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2422             break;
2423         }
2424         gen_op_iwmmxt_movq_wRn_M0(wrd);
2425         gen_op_iwmmxt_set_mup();
2426         gen_op_iwmmxt_set_cup();
2427         break;
2428     case 0x201: case 0x203: case 0x205: case 0x207:
2429     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2430     case 0x211: case 0x213: case 0x215: case 0x217:
2431     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2432         wrd = (insn >> 5) & 0xf;
2433         rd0 = (insn >> 12) & 0xf;
2434         rd1 = (insn >> 0) & 0xf;
2435         if (rd0 == 0xf || rd1 == 0xf)
2436             return 1;
2437         gen_op_iwmmxt_movq_M0_wRn(wrd);
2438         tmp = load_reg(s, rd0);
2439         tmp2 = load_reg(s, rd1);
2440         switch ((insn >> 16) & 0xf) {
2441         case 0x0:                                       /* TMIA */
2442             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2443             break;
2444         case 0x8:                                       /* TMIAPH */
2445             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2446             break;
2447         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2448             if (insn & (1 << 16))
2449                 tcg_gen_shri_i32(tmp, tmp, 16);
2450             if (insn & (1 << 17))
2451                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2452             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2453             break;
2454         default:
2455             return 1;
2456         }
2457         gen_op_iwmmxt_movq_wRn_M0(wrd);
2458         gen_op_iwmmxt_set_mup();
2459         break;
2460     default:
2461         return 1;
2462     }
2463 
2464     return 0;
2465 }
2466 
2467 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2468    (ie. an undefined instruction).  */
2469 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2470 {
2471     int acc, rd0, rd1, rdhi, rdlo;
2472     TCGv_i32 tmp, tmp2;
2473 
2474     if ((insn & 0x0ff00f10) == 0x0e200010) {
2475         /* Multiply with Internal Accumulate Format */
2476         rd0 = (insn >> 12) & 0xf;
2477         rd1 = insn & 0xf;
2478         acc = (insn >> 5) & 7;
2479 
2480         if (acc != 0)
2481             return 1;
2482 
2483         tmp = load_reg(s, rd0);
2484         tmp2 = load_reg(s, rd1);
2485         switch ((insn >> 16) & 0xf) {
2486         case 0x0:                                       /* MIA */
2487             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2488             break;
2489         case 0x8:                                       /* MIAPH */
2490             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2491             break;
2492         case 0xc:                                       /* MIABB */
2493         case 0xd:                                       /* MIABT */
2494         case 0xe:                                       /* MIATB */
2495         case 0xf:                                       /* MIATT */
2496             if (insn & (1 << 16))
2497                 tcg_gen_shri_i32(tmp, tmp, 16);
2498             if (insn & (1 << 17))
2499                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2500             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2501             break;
2502         default:
2503             return 1;
2504         }
2505 
2506         gen_op_iwmmxt_movq_wRn_M0(acc);
2507         return 0;
2508     }
2509 
2510     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2511         /* Internal Accumulator Access Format */
2512         rdhi = (insn >> 16) & 0xf;
2513         rdlo = (insn >> 12) & 0xf;
2514         acc = insn & 7;
2515 
2516         if (acc != 0)
2517             return 1;
2518 
2519         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2520             iwmmxt_load_reg(cpu_V0, acc);
2521             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2522             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2523             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2524         } else {                                        /* MAR */
2525             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2526             iwmmxt_store_reg(cpu_V0, acc);
2527         }
2528         return 0;
2529     }
2530 
2531     return 1;
2532 }
2533 
2534 static void gen_goto_ptr(void)
2535 {
2536     tcg_gen_lookup_and_goto_ptr();
2537 }
2538 
2539 /* This will end the TB but doesn't guarantee we'll return to
2540  * cpu_loop_exec. Any live exit_requests will be processed as we
2541  * enter the next TB.
2542  */
2543 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2544 {
2545     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2546         /*
2547          * For pcrel, the pc must always be up-to-date on entry to
2548          * the linked TB, so that it can use simple additions for all
2549          * further adjustments.  For !pcrel, the linked TB is compiled
2550          * to know its full virtual address, so we can delay the
2551          * update to pc to the unlinked path.  A long chain of links
2552          * can thus avoid many updates to the PC.
2553          */
2554         if (tb_cflags(s->base.tb) & CF_PCREL) {
2555             gen_update_pc(s, diff);
2556             tcg_gen_goto_tb(n);
2557         } else {
2558             tcg_gen_goto_tb(n);
2559             gen_update_pc(s, diff);
2560         }
2561         tcg_gen_exit_tb(s->base.tb, n);
2562     } else {
2563         gen_update_pc(s, diff);
2564         gen_goto_ptr();
2565     }
2566     s->base.is_jmp = DISAS_NORETURN;
2567 }
2568 
2569 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2570 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2571 {
2572     if (unlikely(s->ss_active)) {
2573         /* An indirect jump so that we still trigger the debug exception.  */
2574         gen_update_pc(s, diff);
2575         s->base.is_jmp = DISAS_JUMP;
2576         return;
2577     }
2578     switch (s->base.is_jmp) {
2579     case DISAS_NEXT:
2580     case DISAS_TOO_MANY:
2581     case DISAS_NORETURN:
2582         /*
2583          * The normal case: just go to the destination TB.
2584          * NB: NORETURN happens if we generate code like
2585          *    gen_brcondi(l);
2586          *    gen_jmp();
2587          *    gen_set_label(l);
2588          *    gen_jmp();
2589          * on the second call to gen_jmp().
2590          */
2591         gen_goto_tb(s, tbno, diff);
2592         break;
2593     case DISAS_UPDATE_NOCHAIN:
2594     case DISAS_UPDATE_EXIT:
2595         /*
2596          * We already decided we're leaving the TB for some other reason.
2597          * Avoid using goto_tb so we really do exit back to the main loop
2598          * and don't chain to another TB.
2599          */
2600         gen_update_pc(s, diff);
2601         gen_goto_ptr();
2602         s->base.is_jmp = DISAS_NORETURN;
2603         break;
2604     default:
2605         /*
2606          * We shouldn't be emitting code for a jump and also have
2607          * is_jmp set to one of the special cases like DISAS_SWI.
2608          */
2609         g_assert_not_reached();
2610     }
2611 }
2612 
2613 static inline void gen_jmp(DisasContext *s, target_long diff)
2614 {
2615     gen_jmp_tb(s, diff, 0);
2616 }
2617 
2618 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2619 {
2620     if (x)
2621         tcg_gen_sari_i32(t0, t0, 16);
2622     else
2623         gen_sxth(t0);
2624     if (y)
2625         tcg_gen_sari_i32(t1, t1, 16);
2626     else
2627         gen_sxth(t1);
2628     tcg_gen_mul_i32(t0, t0, t1);
2629 }
2630 
2631 /* Return the mask of PSR bits set by a MSR instruction.  */
2632 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2633 {
2634     uint32_t mask = 0;
2635 
2636     if (flags & (1 << 0)) {
2637         mask |= 0xff;
2638     }
2639     if (flags & (1 << 1)) {
2640         mask |= 0xff00;
2641     }
2642     if (flags & (1 << 2)) {
2643         mask |= 0xff0000;
2644     }
2645     if (flags & (1 << 3)) {
2646         mask |= 0xff000000;
2647     }
2648 
2649     /* Mask out undefined and reserved bits.  */
2650     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2651 
2652     /* Mask out execution state.  */
2653     if (!spsr) {
2654         mask &= ~CPSR_EXEC;
2655     }
2656 
2657     /* Mask out privileged bits.  */
2658     if (IS_USER(s)) {
2659         mask &= CPSR_USER;
2660     }
2661     return mask;
2662 }
2663 
2664 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2665 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2666 {
2667     TCGv_i32 tmp;
2668     if (spsr) {
2669         /* ??? This is also undefined in system mode.  */
2670         if (IS_USER(s))
2671             return 1;
2672 
2673         tmp = load_cpu_field(spsr);
2674         tcg_gen_andi_i32(tmp, tmp, ~mask);
2675         tcg_gen_andi_i32(t0, t0, mask);
2676         tcg_gen_or_i32(tmp, tmp, t0);
2677         store_cpu_field(tmp, spsr);
2678     } else {
2679         gen_set_cpsr(t0, mask);
2680     }
2681     gen_lookup_tb(s);
2682     return 0;
2683 }
2684 
2685 /* Returns nonzero if access to the PSR is not permitted.  */
2686 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2687 {
2688     TCGv_i32 tmp;
2689     tmp = tcg_temp_new_i32();
2690     tcg_gen_movi_i32(tmp, val);
2691     return gen_set_psr(s, mask, spsr, tmp);
2692 }
2693 
2694 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2695                                      int *tgtmode, int *regno)
2696 {
2697     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2698      * the target mode and register number, and identify the various
2699      * unpredictable cases.
2700      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2701      *  + executed in user mode
2702      *  + using R15 as the src/dest register
2703      *  + accessing an unimplemented register
2704      *  + accessing a register that's inaccessible at current PL/security state*
2705      *  + accessing a register that you could access with a different insn
2706      * We choose to UNDEF in all these cases.
2707      * Since we don't know which of the various AArch32 modes we are in
2708      * we have to defer some checks to runtime.
2709      * Accesses to Monitor mode registers from Secure EL1 (which implies
2710      * that EL3 is AArch64) must trap to EL3.
2711      *
2712      * If the access checks fail this function will emit code to take
2713      * an exception and return false. Otherwise it will return true,
2714      * and set *tgtmode and *regno appropriately.
2715      */
2716     /* These instructions are present only in ARMv8, or in ARMv7 with the
2717      * Virtualization Extensions.
2718      */
2719     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2720         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2721         goto undef;
2722     }
2723 
2724     if (IS_USER(s) || rn == 15) {
2725         goto undef;
2726     }
2727 
2728     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2729      * of registers into (r, sysm).
2730      */
2731     if (r) {
2732         /* SPSRs for other modes */
2733         switch (sysm) {
2734         case 0xe: /* SPSR_fiq */
2735             *tgtmode = ARM_CPU_MODE_FIQ;
2736             break;
2737         case 0x10: /* SPSR_irq */
2738             *tgtmode = ARM_CPU_MODE_IRQ;
2739             break;
2740         case 0x12: /* SPSR_svc */
2741             *tgtmode = ARM_CPU_MODE_SVC;
2742             break;
2743         case 0x14: /* SPSR_abt */
2744             *tgtmode = ARM_CPU_MODE_ABT;
2745             break;
2746         case 0x16: /* SPSR_und */
2747             *tgtmode = ARM_CPU_MODE_UND;
2748             break;
2749         case 0x1c: /* SPSR_mon */
2750             *tgtmode = ARM_CPU_MODE_MON;
2751             break;
2752         case 0x1e: /* SPSR_hyp */
2753             *tgtmode = ARM_CPU_MODE_HYP;
2754             break;
2755         default: /* unallocated */
2756             goto undef;
2757         }
2758         /* We arbitrarily assign SPSR a register number of 16. */
2759         *regno = 16;
2760     } else {
2761         /* general purpose registers for other modes */
2762         switch (sysm) {
2763         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2764             *tgtmode = ARM_CPU_MODE_USR;
2765             *regno = sysm + 8;
2766             break;
2767         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2768             *tgtmode = ARM_CPU_MODE_FIQ;
2769             *regno = sysm;
2770             break;
2771         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2772             *tgtmode = ARM_CPU_MODE_IRQ;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2776             *tgtmode = ARM_CPU_MODE_SVC;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2780             *tgtmode = ARM_CPU_MODE_ABT;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2784             *tgtmode = ARM_CPU_MODE_UND;
2785             *regno = sysm & 1 ? 13 : 14;
2786             break;
2787         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2788             *tgtmode = ARM_CPU_MODE_MON;
2789             *regno = sysm & 1 ? 13 : 14;
2790             break;
2791         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2792             *tgtmode = ARM_CPU_MODE_HYP;
2793             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2794             *regno = sysm & 1 ? 13 : 17;
2795             break;
2796         default: /* unallocated */
2797             goto undef;
2798         }
2799     }
2800 
2801     /* Catch the 'accessing inaccessible register' cases we can detect
2802      * at translate time.
2803      */
2804     switch (*tgtmode) {
2805     case ARM_CPU_MODE_MON:
2806         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2807             goto undef;
2808         }
2809         if (s->current_el == 1) {
2810             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2811              * then accesses to Mon registers trap to Secure EL2, if it exists,
2812              * otherwise EL3.
2813              */
2814             TCGv_i32 tcg_el;
2815 
2816             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2817                 dc_isar_feature(aa64_sel2, s)) {
2818                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2819                 tcg_el = load_cpu_field(cp15.scr_el3);
2820                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2821                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2822             } else {
2823                 tcg_el = tcg_constant_i32(3);
2824             }
2825 
2826             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2827                                     syn_uncategorized(), tcg_el);
2828             return false;
2829         }
2830         break;
2831     case ARM_CPU_MODE_HYP:
2832         /*
2833          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2834          * (and so we can forbid accesses from EL2 or below). elr_hyp
2835          * can be accessed also from Hyp mode, so forbid accesses from
2836          * EL0 or EL1.
2837          */
2838         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2839             (s->current_el < 3 && *regno != 17)) {
2840             goto undef;
2841         }
2842         break;
2843     default:
2844         break;
2845     }
2846 
2847     return true;
2848 
2849 undef:
2850     /* If we get here then some access check did not pass */
2851     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2852     return false;
2853 }
2854 
2855 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2856 {
2857     TCGv_i32 tcg_reg;
2858     int tgtmode = 0, regno = 0;
2859 
2860     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2861         return;
2862     }
2863 
2864     /* Sync state because msr_banked() can raise exceptions */
2865     gen_set_condexec(s);
2866     gen_update_pc(s, 0);
2867     tcg_reg = load_reg(s, rn);
2868     gen_helper_msr_banked(cpu_env, tcg_reg,
2869                           tcg_constant_i32(tgtmode),
2870                           tcg_constant_i32(regno));
2871     s->base.is_jmp = DISAS_UPDATE_EXIT;
2872 }
2873 
2874 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2875 {
2876     TCGv_i32 tcg_reg;
2877     int tgtmode = 0, regno = 0;
2878 
2879     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2880         return;
2881     }
2882 
2883     /* Sync state because mrs_banked() can raise exceptions */
2884     gen_set_condexec(s);
2885     gen_update_pc(s, 0);
2886     tcg_reg = tcg_temp_new_i32();
2887     gen_helper_mrs_banked(tcg_reg, cpu_env,
2888                           tcg_constant_i32(tgtmode),
2889                           tcg_constant_i32(regno));
2890     store_reg(s, rn, tcg_reg);
2891     s->base.is_jmp = DISAS_UPDATE_EXIT;
2892 }
2893 
2894 /* Store value to PC as for an exception return (ie don't
2895  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2896  * will do the masking based on the new value of the Thumb bit.
2897  */
2898 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2899 {
2900     tcg_gen_mov_i32(cpu_R[15], pc);
2901 }
2902 
2903 /* Generate a v6 exception return.  Marks both values as dead.  */
2904 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2905 {
2906     store_pc_exc_ret(s, pc);
2907     /* The cpsr_write_eret helper will mask the low bits of PC
2908      * appropriately depending on the new Thumb bit, so it must
2909      * be called after storing the new PC.
2910      */
2911     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2912         gen_io_start();
2913     }
2914     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2915     /* Must exit loop to check un-masked IRQs */
2916     s->base.is_jmp = DISAS_EXIT;
2917 }
2918 
2919 /* Generate an old-style exception return. Marks pc as dead. */
2920 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2921 {
2922     gen_rfe(s, pc, load_cpu_field(spsr));
2923 }
2924 
2925 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2926                             uint32_t opr_sz, uint32_t max_sz,
2927                             gen_helper_gvec_3_ptr *fn)
2928 {
2929     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2930 
2931     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2932     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2933                        opr_sz, max_sz, 0, fn);
2934 }
2935 
2936 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2947                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2948 {
2949     static gen_helper_gvec_3_ptr * const fns[2] = {
2950         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2951     };
2952     tcg_debug_assert(vece >= 1 && vece <= 2);
2953     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2954 }
2955 
2956 #define GEN_CMP0(NAME, COND)                                            \
2957     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2958     {                                                                   \
2959         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2960         tcg_gen_neg_i32(d, d);                                          \
2961     }                                                                   \
2962     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2963     {                                                                   \
2964         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2965         tcg_gen_neg_i64(d, d);                                          \
2966     }                                                                   \
2967     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2968     {                                                                   \
2969         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2970         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2971     }                                                                   \
2972     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2973                             uint32_t opr_sz, uint32_t max_sz)           \
2974     {                                                                   \
2975         const GVecGen2 op[4] = {                                        \
2976             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2977               .fniv = gen_##NAME##0_vec,                                \
2978               .opt_opc = vecop_list_cmp,                                \
2979               .vece = MO_8 },                                           \
2980             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2981               .fniv = gen_##NAME##0_vec,                                \
2982               .opt_opc = vecop_list_cmp,                                \
2983               .vece = MO_16 },                                          \
2984             { .fni4 = gen_##NAME##0_i32,                                \
2985               .fniv = gen_##NAME##0_vec,                                \
2986               .opt_opc = vecop_list_cmp,                                \
2987               .vece = MO_32 },                                          \
2988             { .fni8 = gen_##NAME##0_i64,                                \
2989               .fniv = gen_##NAME##0_vec,                                \
2990               .opt_opc = vecop_list_cmp,                                \
2991               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2992               .vece = MO_64 },                                          \
2993         };                                                              \
2994         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2995     }
2996 
2997 static const TCGOpcode vecop_list_cmp[] = {
2998     INDEX_op_cmp_vec, 0
2999 };
3000 
3001 GEN_CMP0(ceq, TCG_COND_EQ)
3002 GEN_CMP0(cle, TCG_COND_LE)
3003 GEN_CMP0(cge, TCG_COND_GE)
3004 GEN_CMP0(clt, TCG_COND_LT)
3005 GEN_CMP0(cgt, TCG_COND_GT)
3006 
3007 #undef GEN_CMP0
3008 
3009 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3010 {
3011     tcg_gen_vec_sar8i_i64(a, a, shift);
3012     tcg_gen_vec_add8_i64(d, d, a);
3013 }
3014 
3015 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016 {
3017     tcg_gen_vec_sar16i_i64(a, a, shift);
3018     tcg_gen_vec_add16_i64(d, d, a);
3019 }
3020 
3021 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3022 {
3023     tcg_gen_sari_i32(a, a, shift);
3024     tcg_gen_add_i32(d, d, a);
3025 }
3026 
3027 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3028 {
3029     tcg_gen_sari_i64(a, a, shift);
3030     tcg_gen_add_i64(d, d, a);
3031 }
3032 
3033 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3034 {
3035     tcg_gen_sari_vec(vece, a, a, sh);
3036     tcg_gen_add_vec(vece, d, d, a);
3037 }
3038 
3039 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3040                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3041 {
3042     static const TCGOpcode vecop_list[] = {
3043         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3044     };
3045     static const GVecGen2i ops[4] = {
3046         { .fni8 = gen_ssra8_i64,
3047           .fniv = gen_ssra_vec,
3048           .fno = gen_helper_gvec_ssra_b,
3049           .load_dest = true,
3050           .opt_opc = vecop_list,
3051           .vece = MO_8 },
3052         { .fni8 = gen_ssra16_i64,
3053           .fniv = gen_ssra_vec,
3054           .fno = gen_helper_gvec_ssra_h,
3055           .load_dest = true,
3056           .opt_opc = vecop_list,
3057           .vece = MO_16 },
3058         { .fni4 = gen_ssra32_i32,
3059           .fniv = gen_ssra_vec,
3060           .fno = gen_helper_gvec_ssra_s,
3061           .load_dest = true,
3062           .opt_opc = vecop_list,
3063           .vece = MO_32 },
3064         { .fni8 = gen_ssra64_i64,
3065           .fniv = gen_ssra_vec,
3066           .fno = gen_helper_gvec_ssra_b,
3067           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3068           .opt_opc = vecop_list,
3069           .load_dest = true,
3070           .vece = MO_64 },
3071     };
3072 
3073     /* tszimm encoding produces immediates in the range [1..esize]. */
3074     tcg_debug_assert(shift > 0);
3075     tcg_debug_assert(shift <= (8 << vece));
3076 
3077     /*
3078      * Shifts larger than the element size are architecturally valid.
3079      * Signed results in all sign bits.
3080      */
3081     shift = MIN(shift, (8 << vece) - 1);
3082     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3083 }
3084 
3085 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3086 {
3087     tcg_gen_vec_shr8i_i64(a, a, shift);
3088     tcg_gen_vec_add8_i64(d, d, a);
3089 }
3090 
3091 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3092 {
3093     tcg_gen_vec_shr16i_i64(a, a, shift);
3094     tcg_gen_vec_add16_i64(d, d, a);
3095 }
3096 
3097 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3098 {
3099     tcg_gen_shri_i32(a, a, shift);
3100     tcg_gen_add_i32(d, d, a);
3101 }
3102 
3103 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3104 {
3105     tcg_gen_shri_i64(a, a, shift);
3106     tcg_gen_add_i64(d, d, a);
3107 }
3108 
3109 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3110 {
3111     tcg_gen_shri_vec(vece, a, a, sh);
3112     tcg_gen_add_vec(vece, d, d, a);
3113 }
3114 
3115 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3116                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3117 {
3118     static const TCGOpcode vecop_list[] = {
3119         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3120     };
3121     static const GVecGen2i ops[4] = {
3122         { .fni8 = gen_usra8_i64,
3123           .fniv = gen_usra_vec,
3124           .fno = gen_helper_gvec_usra_b,
3125           .load_dest = true,
3126           .opt_opc = vecop_list,
3127           .vece = MO_8, },
3128         { .fni8 = gen_usra16_i64,
3129           .fniv = gen_usra_vec,
3130           .fno = gen_helper_gvec_usra_h,
3131           .load_dest = true,
3132           .opt_opc = vecop_list,
3133           .vece = MO_16, },
3134         { .fni4 = gen_usra32_i32,
3135           .fniv = gen_usra_vec,
3136           .fno = gen_helper_gvec_usra_s,
3137           .load_dest = true,
3138           .opt_opc = vecop_list,
3139           .vece = MO_32, },
3140         { .fni8 = gen_usra64_i64,
3141           .fniv = gen_usra_vec,
3142           .fno = gen_helper_gvec_usra_d,
3143           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3144           .load_dest = true,
3145           .opt_opc = vecop_list,
3146           .vece = MO_64, },
3147     };
3148 
3149     /* tszimm encoding produces immediates in the range [1..esize]. */
3150     tcg_debug_assert(shift > 0);
3151     tcg_debug_assert(shift <= (8 << vece));
3152 
3153     /*
3154      * Shifts larger than the element size are architecturally valid.
3155      * Unsigned results in all zeros as input to accumulate: nop.
3156      */
3157     if (shift < (8 << vece)) {
3158         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3159     } else {
3160         /* Nop, but we do need to clear the tail. */
3161         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3162     }
3163 }
3164 
3165 /*
3166  * Shift one less than the requested amount, and the low bit is
3167  * the rounding bit.  For the 8 and 16-bit operations, because we
3168  * mask the low bit, we can perform a normal integer shift instead
3169  * of a vector shift.
3170  */
3171 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172 {
3173     TCGv_i64 t = tcg_temp_new_i64();
3174 
3175     tcg_gen_shri_i64(t, a, sh - 1);
3176     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3177     tcg_gen_vec_sar8i_i64(d, a, sh);
3178     tcg_gen_vec_add8_i64(d, d, t);
3179 }
3180 
3181 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3182 {
3183     TCGv_i64 t = tcg_temp_new_i64();
3184 
3185     tcg_gen_shri_i64(t, a, sh - 1);
3186     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3187     tcg_gen_vec_sar16i_i64(d, a, sh);
3188     tcg_gen_vec_add16_i64(d, d, t);
3189 }
3190 
3191 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3192 {
3193     TCGv_i32 t;
3194 
3195     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3196     if (sh == 32) {
3197         tcg_gen_movi_i32(d, 0);
3198         return;
3199     }
3200     t = tcg_temp_new_i32();
3201     tcg_gen_extract_i32(t, a, sh - 1, 1);
3202     tcg_gen_sari_i32(d, a, sh);
3203     tcg_gen_add_i32(d, d, t);
3204 }
3205 
3206 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3207 {
3208     TCGv_i64 t = tcg_temp_new_i64();
3209 
3210     tcg_gen_extract_i64(t, a, sh - 1, 1);
3211     tcg_gen_sari_i64(d, a, sh);
3212     tcg_gen_add_i64(d, d, t);
3213 }
3214 
3215 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3216 {
3217     TCGv_vec t = tcg_temp_new_vec_matching(d);
3218     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3219 
3220     tcg_gen_shri_vec(vece, t, a, sh - 1);
3221     tcg_gen_dupi_vec(vece, ones, 1);
3222     tcg_gen_and_vec(vece, t, t, ones);
3223     tcg_gen_sari_vec(vece, d, a, sh);
3224     tcg_gen_add_vec(vece, d, d, t);
3225 }
3226 
3227 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3228                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3229 {
3230     static const TCGOpcode vecop_list[] = {
3231         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3232     };
3233     static const GVecGen2i ops[4] = {
3234         { .fni8 = gen_srshr8_i64,
3235           .fniv = gen_srshr_vec,
3236           .fno = gen_helper_gvec_srshr_b,
3237           .opt_opc = vecop_list,
3238           .vece = MO_8 },
3239         { .fni8 = gen_srshr16_i64,
3240           .fniv = gen_srshr_vec,
3241           .fno = gen_helper_gvec_srshr_h,
3242           .opt_opc = vecop_list,
3243           .vece = MO_16 },
3244         { .fni4 = gen_srshr32_i32,
3245           .fniv = gen_srshr_vec,
3246           .fno = gen_helper_gvec_srshr_s,
3247           .opt_opc = vecop_list,
3248           .vece = MO_32 },
3249         { .fni8 = gen_srshr64_i64,
3250           .fniv = gen_srshr_vec,
3251           .fno = gen_helper_gvec_srshr_d,
3252           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3253           .opt_opc = vecop_list,
3254           .vece = MO_64 },
3255     };
3256 
3257     /* tszimm encoding produces immediates in the range [1..esize] */
3258     tcg_debug_assert(shift > 0);
3259     tcg_debug_assert(shift <= (8 << vece));
3260 
3261     if (shift == (8 << vece)) {
3262         /*
3263          * Shifts larger than the element size are architecturally valid.
3264          * Signed results in all sign bits.  With rounding, this produces
3265          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3266          * I.e. always zero.
3267          */
3268         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3269     } else {
3270         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3271     }
3272 }
3273 
3274 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3275 {
3276     TCGv_i64 t = tcg_temp_new_i64();
3277 
3278     gen_srshr8_i64(t, a, sh);
3279     tcg_gen_vec_add8_i64(d, d, t);
3280 }
3281 
3282 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3283 {
3284     TCGv_i64 t = tcg_temp_new_i64();
3285 
3286     gen_srshr16_i64(t, a, sh);
3287     tcg_gen_vec_add16_i64(d, d, t);
3288 }
3289 
3290 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3291 {
3292     TCGv_i32 t = tcg_temp_new_i32();
3293 
3294     gen_srshr32_i32(t, a, sh);
3295     tcg_gen_add_i32(d, d, t);
3296 }
3297 
3298 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3299 {
3300     TCGv_i64 t = tcg_temp_new_i64();
3301 
3302     gen_srshr64_i64(t, a, sh);
3303     tcg_gen_add_i64(d, d, t);
3304 }
3305 
3306 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3307 {
3308     TCGv_vec t = tcg_temp_new_vec_matching(d);
3309 
3310     gen_srshr_vec(vece, t, a, sh);
3311     tcg_gen_add_vec(vece, d, d, t);
3312 }
3313 
3314 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3315                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3316 {
3317     static const TCGOpcode vecop_list[] = {
3318         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3319     };
3320     static const GVecGen2i ops[4] = {
3321         { .fni8 = gen_srsra8_i64,
3322           .fniv = gen_srsra_vec,
3323           .fno = gen_helper_gvec_srsra_b,
3324           .opt_opc = vecop_list,
3325           .load_dest = true,
3326           .vece = MO_8 },
3327         { .fni8 = gen_srsra16_i64,
3328           .fniv = gen_srsra_vec,
3329           .fno = gen_helper_gvec_srsra_h,
3330           .opt_opc = vecop_list,
3331           .load_dest = true,
3332           .vece = MO_16 },
3333         { .fni4 = gen_srsra32_i32,
3334           .fniv = gen_srsra_vec,
3335           .fno = gen_helper_gvec_srsra_s,
3336           .opt_opc = vecop_list,
3337           .load_dest = true,
3338           .vece = MO_32 },
3339         { .fni8 = gen_srsra64_i64,
3340           .fniv = gen_srsra_vec,
3341           .fno = gen_helper_gvec_srsra_d,
3342           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3343           .opt_opc = vecop_list,
3344           .load_dest = true,
3345           .vece = MO_64 },
3346     };
3347 
3348     /* tszimm encoding produces immediates in the range [1..esize] */
3349     tcg_debug_assert(shift > 0);
3350     tcg_debug_assert(shift <= (8 << vece));
3351 
3352     /*
3353      * Shifts larger than the element size are architecturally valid.
3354      * Signed results in all sign bits.  With rounding, this produces
3355      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3356      * I.e. always zero.  With accumulation, this leaves D unchanged.
3357      */
3358     if (shift == (8 << vece)) {
3359         /* Nop, but we do need to clear the tail. */
3360         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3361     } else {
3362         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3363     }
3364 }
3365 
3366 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3367 {
3368     TCGv_i64 t = tcg_temp_new_i64();
3369 
3370     tcg_gen_shri_i64(t, a, sh - 1);
3371     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3372     tcg_gen_vec_shr8i_i64(d, a, sh);
3373     tcg_gen_vec_add8_i64(d, d, t);
3374 }
3375 
3376 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3377 {
3378     TCGv_i64 t = tcg_temp_new_i64();
3379 
3380     tcg_gen_shri_i64(t, a, sh - 1);
3381     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3382     tcg_gen_vec_shr16i_i64(d, a, sh);
3383     tcg_gen_vec_add16_i64(d, d, t);
3384 }
3385 
3386 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3387 {
3388     TCGv_i32 t;
3389 
3390     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3391     if (sh == 32) {
3392         tcg_gen_extract_i32(d, a, sh - 1, 1);
3393         return;
3394     }
3395     t = tcg_temp_new_i32();
3396     tcg_gen_extract_i32(t, a, sh - 1, 1);
3397     tcg_gen_shri_i32(d, a, sh);
3398     tcg_gen_add_i32(d, d, t);
3399 }
3400 
3401 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3402 {
3403     TCGv_i64 t = tcg_temp_new_i64();
3404 
3405     tcg_gen_extract_i64(t, a, sh - 1, 1);
3406     tcg_gen_shri_i64(d, a, sh);
3407     tcg_gen_add_i64(d, d, t);
3408 }
3409 
3410 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3411 {
3412     TCGv_vec t = tcg_temp_new_vec_matching(d);
3413     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3414 
3415     tcg_gen_shri_vec(vece, t, a, shift - 1);
3416     tcg_gen_dupi_vec(vece, ones, 1);
3417     tcg_gen_and_vec(vece, t, t, ones);
3418     tcg_gen_shri_vec(vece, d, a, shift);
3419     tcg_gen_add_vec(vece, d, d, t);
3420 }
3421 
3422 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3423                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3424 {
3425     static const TCGOpcode vecop_list[] = {
3426         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3427     };
3428     static const GVecGen2i ops[4] = {
3429         { .fni8 = gen_urshr8_i64,
3430           .fniv = gen_urshr_vec,
3431           .fno = gen_helper_gvec_urshr_b,
3432           .opt_opc = vecop_list,
3433           .vece = MO_8 },
3434         { .fni8 = gen_urshr16_i64,
3435           .fniv = gen_urshr_vec,
3436           .fno = gen_helper_gvec_urshr_h,
3437           .opt_opc = vecop_list,
3438           .vece = MO_16 },
3439         { .fni4 = gen_urshr32_i32,
3440           .fniv = gen_urshr_vec,
3441           .fno = gen_helper_gvec_urshr_s,
3442           .opt_opc = vecop_list,
3443           .vece = MO_32 },
3444         { .fni8 = gen_urshr64_i64,
3445           .fniv = gen_urshr_vec,
3446           .fno = gen_helper_gvec_urshr_d,
3447           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3448           .opt_opc = vecop_list,
3449           .vece = MO_64 },
3450     };
3451 
3452     /* tszimm encoding produces immediates in the range [1..esize] */
3453     tcg_debug_assert(shift > 0);
3454     tcg_debug_assert(shift <= (8 << vece));
3455 
3456     if (shift == (8 << vece)) {
3457         /*
3458          * Shifts larger than the element size are architecturally valid.
3459          * Unsigned results in zero.  With rounding, this produces a
3460          * copy of the most significant bit.
3461          */
3462         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3463     } else {
3464         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3465     }
3466 }
3467 
3468 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3469 {
3470     TCGv_i64 t = tcg_temp_new_i64();
3471 
3472     if (sh == 8) {
3473         tcg_gen_vec_shr8i_i64(t, a, 7);
3474     } else {
3475         gen_urshr8_i64(t, a, sh);
3476     }
3477     tcg_gen_vec_add8_i64(d, d, t);
3478 }
3479 
3480 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3481 {
3482     TCGv_i64 t = tcg_temp_new_i64();
3483 
3484     if (sh == 16) {
3485         tcg_gen_vec_shr16i_i64(t, a, 15);
3486     } else {
3487         gen_urshr16_i64(t, a, sh);
3488     }
3489     tcg_gen_vec_add16_i64(d, d, t);
3490 }
3491 
3492 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3493 {
3494     TCGv_i32 t = tcg_temp_new_i32();
3495 
3496     if (sh == 32) {
3497         tcg_gen_shri_i32(t, a, 31);
3498     } else {
3499         gen_urshr32_i32(t, a, sh);
3500     }
3501     tcg_gen_add_i32(d, d, t);
3502 }
3503 
3504 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3505 {
3506     TCGv_i64 t = tcg_temp_new_i64();
3507 
3508     if (sh == 64) {
3509         tcg_gen_shri_i64(t, a, 63);
3510     } else {
3511         gen_urshr64_i64(t, a, sh);
3512     }
3513     tcg_gen_add_i64(d, d, t);
3514 }
3515 
3516 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3517 {
3518     TCGv_vec t = tcg_temp_new_vec_matching(d);
3519 
3520     if (sh == (8 << vece)) {
3521         tcg_gen_shri_vec(vece, t, a, sh - 1);
3522     } else {
3523         gen_urshr_vec(vece, t, a, sh);
3524     }
3525     tcg_gen_add_vec(vece, d, d, t);
3526 }
3527 
3528 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3529                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3530 {
3531     static const TCGOpcode vecop_list[] = {
3532         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3533     };
3534     static const GVecGen2i ops[4] = {
3535         { .fni8 = gen_ursra8_i64,
3536           .fniv = gen_ursra_vec,
3537           .fno = gen_helper_gvec_ursra_b,
3538           .opt_opc = vecop_list,
3539           .load_dest = true,
3540           .vece = MO_8 },
3541         { .fni8 = gen_ursra16_i64,
3542           .fniv = gen_ursra_vec,
3543           .fno = gen_helper_gvec_ursra_h,
3544           .opt_opc = vecop_list,
3545           .load_dest = true,
3546           .vece = MO_16 },
3547         { .fni4 = gen_ursra32_i32,
3548           .fniv = gen_ursra_vec,
3549           .fno = gen_helper_gvec_ursra_s,
3550           .opt_opc = vecop_list,
3551           .load_dest = true,
3552           .vece = MO_32 },
3553         { .fni8 = gen_ursra64_i64,
3554           .fniv = gen_ursra_vec,
3555           .fno = gen_helper_gvec_ursra_d,
3556           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3557           .opt_opc = vecop_list,
3558           .load_dest = true,
3559           .vece = MO_64 },
3560     };
3561 
3562     /* tszimm encoding produces immediates in the range [1..esize] */
3563     tcg_debug_assert(shift > 0);
3564     tcg_debug_assert(shift <= (8 << vece));
3565 
3566     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3567 }
3568 
3569 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3570 {
3571     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3572     TCGv_i64 t = tcg_temp_new_i64();
3573 
3574     tcg_gen_shri_i64(t, a, shift);
3575     tcg_gen_andi_i64(t, t, mask);
3576     tcg_gen_andi_i64(d, d, ~mask);
3577     tcg_gen_or_i64(d, d, t);
3578 }
3579 
3580 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3581 {
3582     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3583     TCGv_i64 t = tcg_temp_new_i64();
3584 
3585     tcg_gen_shri_i64(t, a, shift);
3586     tcg_gen_andi_i64(t, t, mask);
3587     tcg_gen_andi_i64(d, d, ~mask);
3588     tcg_gen_or_i64(d, d, t);
3589 }
3590 
3591 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3592 {
3593     tcg_gen_shri_i32(a, a, shift);
3594     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3595 }
3596 
3597 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3598 {
3599     tcg_gen_shri_i64(a, a, shift);
3600     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3601 }
3602 
3603 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3604 {
3605     TCGv_vec t = tcg_temp_new_vec_matching(d);
3606     TCGv_vec m = tcg_temp_new_vec_matching(d);
3607 
3608     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3609     tcg_gen_shri_vec(vece, t, a, sh);
3610     tcg_gen_and_vec(vece, d, d, m);
3611     tcg_gen_or_vec(vece, d, d, t);
3612 }
3613 
3614 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3615                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3616 {
3617     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3618     const GVecGen2i ops[4] = {
3619         { .fni8 = gen_shr8_ins_i64,
3620           .fniv = gen_shr_ins_vec,
3621           .fno = gen_helper_gvec_sri_b,
3622           .load_dest = true,
3623           .opt_opc = vecop_list,
3624           .vece = MO_8 },
3625         { .fni8 = gen_shr16_ins_i64,
3626           .fniv = gen_shr_ins_vec,
3627           .fno = gen_helper_gvec_sri_h,
3628           .load_dest = true,
3629           .opt_opc = vecop_list,
3630           .vece = MO_16 },
3631         { .fni4 = gen_shr32_ins_i32,
3632           .fniv = gen_shr_ins_vec,
3633           .fno = gen_helper_gvec_sri_s,
3634           .load_dest = true,
3635           .opt_opc = vecop_list,
3636           .vece = MO_32 },
3637         { .fni8 = gen_shr64_ins_i64,
3638           .fniv = gen_shr_ins_vec,
3639           .fno = gen_helper_gvec_sri_d,
3640           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3641           .load_dest = true,
3642           .opt_opc = vecop_list,
3643           .vece = MO_64 },
3644     };
3645 
3646     /* tszimm encoding produces immediates in the range [1..esize]. */
3647     tcg_debug_assert(shift > 0);
3648     tcg_debug_assert(shift <= (8 << vece));
3649 
3650     /* Shift of esize leaves destination unchanged. */
3651     if (shift < (8 << vece)) {
3652         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3653     } else {
3654         /* Nop, but we do need to clear the tail. */
3655         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3656     }
3657 }
3658 
3659 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3660 {
3661     uint64_t mask = dup_const(MO_8, 0xff << shift);
3662     TCGv_i64 t = tcg_temp_new_i64();
3663 
3664     tcg_gen_shli_i64(t, a, shift);
3665     tcg_gen_andi_i64(t, t, mask);
3666     tcg_gen_andi_i64(d, d, ~mask);
3667     tcg_gen_or_i64(d, d, t);
3668 }
3669 
3670 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3671 {
3672     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3673     TCGv_i64 t = tcg_temp_new_i64();
3674 
3675     tcg_gen_shli_i64(t, a, shift);
3676     tcg_gen_andi_i64(t, t, mask);
3677     tcg_gen_andi_i64(d, d, ~mask);
3678     tcg_gen_or_i64(d, d, t);
3679 }
3680 
3681 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3682 {
3683     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3684 }
3685 
3686 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3687 {
3688     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3689 }
3690 
3691 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3692 {
3693     TCGv_vec t = tcg_temp_new_vec_matching(d);
3694     TCGv_vec m = tcg_temp_new_vec_matching(d);
3695 
3696     tcg_gen_shli_vec(vece, t, a, sh);
3697     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3698     tcg_gen_and_vec(vece, d, d, m);
3699     tcg_gen_or_vec(vece, d, d, t);
3700 }
3701 
3702 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3703                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3704 {
3705     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3706     const GVecGen2i ops[4] = {
3707         { .fni8 = gen_shl8_ins_i64,
3708           .fniv = gen_shl_ins_vec,
3709           .fno = gen_helper_gvec_sli_b,
3710           .load_dest = true,
3711           .opt_opc = vecop_list,
3712           .vece = MO_8 },
3713         { .fni8 = gen_shl16_ins_i64,
3714           .fniv = gen_shl_ins_vec,
3715           .fno = gen_helper_gvec_sli_h,
3716           .load_dest = true,
3717           .opt_opc = vecop_list,
3718           .vece = MO_16 },
3719         { .fni4 = gen_shl32_ins_i32,
3720           .fniv = gen_shl_ins_vec,
3721           .fno = gen_helper_gvec_sli_s,
3722           .load_dest = true,
3723           .opt_opc = vecop_list,
3724           .vece = MO_32 },
3725         { .fni8 = gen_shl64_ins_i64,
3726           .fniv = gen_shl_ins_vec,
3727           .fno = gen_helper_gvec_sli_d,
3728           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3729           .load_dest = true,
3730           .opt_opc = vecop_list,
3731           .vece = MO_64 },
3732     };
3733 
3734     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3735     tcg_debug_assert(shift >= 0);
3736     tcg_debug_assert(shift < (8 << vece));
3737 
3738     if (shift == 0) {
3739         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3740     } else {
3741         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3742     }
3743 }
3744 
3745 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3746 {
3747     gen_helper_neon_mul_u8(a, a, b);
3748     gen_helper_neon_add_u8(d, d, a);
3749 }
3750 
3751 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3752 {
3753     gen_helper_neon_mul_u8(a, a, b);
3754     gen_helper_neon_sub_u8(d, d, a);
3755 }
3756 
3757 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3758 {
3759     gen_helper_neon_mul_u16(a, a, b);
3760     gen_helper_neon_add_u16(d, d, a);
3761 }
3762 
3763 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3764 {
3765     gen_helper_neon_mul_u16(a, a, b);
3766     gen_helper_neon_sub_u16(d, d, a);
3767 }
3768 
3769 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3770 {
3771     tcg_gen_mul_i32(a, a, b);
3772     tcg_gen_add_i32(d, d, a);
3773 }
3774 
3775 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3776 {
3777     tcg_gen_mul_i32(a, a, b);
3778     tcg_gen_sub_i32(d, d, a);
3779 }
3780 
3781 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3782 {
3783     tcg_gen_mul_i64(a, a, b);
3784     tcg_gen_add_i64(d, d, a);
3785 }
3786 
3787 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3788 {
3789     tcg_gen_mul_i64(a, a, b);
3790     tcg_gen_sub_i64(d, d, a);
3791 }
3792 
3793 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3794 {
3795     tcg_gen_mul_vec(vece, a, a, b);
3796     tcg_gen_add_vec(vece, d, d, a);
3797 }
3798 
3799 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3800 {
3801     tcg_gen_mul_vec(vece, a, a, b);
3802     tcg_gen_sub_vec(vece, d, d, a);
3803 }
3804 
3805 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3806  * these tables are shared with AArch64 which does support them.
3807  */
3808 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3809                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3810 {
3811     static const TCGOpcode vecop_list[] = {
3812         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3813     };
3814     static const GVecGen3 ops[4] = {
3815         { .fni4 = gen_mla8_i32,
3816           .fniv = gen_mla_vec,
3817           .load_dest = true,
3818           .opt_opc = vecop_list,
3819           .vece = MO_8 },
3820         { .fni4 = gen_mla16_i32,
3821           .fniv = gen_mla_vec,
3822           .load_dest = true,
3823           .opt_opc = vecop_list,
3824           .vece = MO_16 },
3825         { .fni4 = gen_mla32_i32,
3826           .fniv = gen_mla_vec,
3827           .load_dest = true,
3828           .opt_opc = vecop_list,
3829           .vece = MO_32 },
3830         { .fni8 = gen_mla64_i64,
3831           .fniv = gen_mla_vec,
3832           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3833           .load_dest = true,
3834           .opt_opc = vecop_list,
3835           .vece = MO_64 },
3836     };
3837     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3838 }
3839 
3840 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3841                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3842 {
3843     static const TCGOpcode vecop_list[] = {
3844         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3845     };
3846     static const GVecGen3 ops[4] = {
3847         { .fni4 = gen_mls8_i32,
3848           .fniv = gen_mls_vec,
3849           .load_dest = true,
3850           .opt_opc = vecop_list,
3851           .vece = MO_8 },
3852         { .fni4 = gen_mls16_i32,
3853           .fniv = gen_mls_vec,
3854           .load_dest = true,
3855           .opt_opc = vecop_list,
3856           .vece = MO_16 },
3857         { .fni4 = gen_mls32_i32,
3858           .fniv = gen_mls_vec,
3859           .load_dest = true,
3860           .opt_opc = vecop_list,
3861           .vece = MO_32 },
3862         { .fni8 = gen_mls64_i64,
3863           .fniv = gen_mls_vec,
3864           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3865           .load_dest = true,
3866           .opt_opc = vecop_list,
3867           .vece = MO_64 },
3868     };
3869     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3870 }
3871 
3872 /* CMTST : test is "if (X & Y != 0)". */
3873 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3874 {
3875     tcg_gen_and_i32(d, a, b);
3876     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3877     tcg_gen_neg_i32(d, d);
3878 }
3879 
3880 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3881 {
3882     tcg_gen_and_i64(d, a, b);
3883     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3884     tcg_gen_neg_i64(d, d);
3885 }
3886 
3887 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3888 {
3889     tcg_gen_and_vec(vece, d, a, b);
3890     tcg_gen_dupi_vec(vece, a, 0);
3891     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3892 }
3893 
3894 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3895                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3896 {
3897     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3898     static const GVecGen3 ops[4] = {
3899         { .fni4 = gen_helper_neon_tst_u8,
3900           .fniv = gen_cmtst_vec,
3901           .opt_opc = vecop_list,
3902           .vece = MO_8 },
3903         { .fni4 = gen_helper_neon_tst_u16,
3904           .fniv = gen_cmtst_vec,
3905           .opt_opc = vecop_list,
3906           .vece = MO_16 },
3907         { .fni4 = gen_cmtst_i32,
3908           .fniv = gen_cmtst_vec,
3909           .opt_opc = vecop_list,
3910           .vece = MO_32 },
3911         { .fni8 = gen_cmtst_i64,
3912           .fniv = gen_cmtst_vec,
3913           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3914           .opt_opc = vecop_list,
3915           .vece = MO_64 },
3916     };
3917     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3918 }
3919 
3920 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3921 {
3922     TCGv_i32 lval = tcg_temp_new_i32();
3923     TCGv_i32 rval = tcg_temp_new_i32();
3924     TCGv_i32 lsh = tcg_temp_new_i32();
3925     TCGv_i32 rsh = tcg_temp_new_i32();
3926     TCGv_i32 zero = tcg_constant_i32(0);
3927     TCGv_i32 max = tcg_constant_i32(32);
3928 
3929     /*
3930      * Rely on the TCG guarantee that out of range shifts produce
3931      * unspecified results, not undefined behaviour (i.e. no trap).
3932      * Discard out-of-range results after the fact.
3933      */
3934     tcg_gen_ext8s_i32(lsh, shift);
3935     tcg_gen_neg_i32(rsh, lsh);
3936     tcg_gen_shl_i32(lval, src, lsh);
3937     tcg_gen_shr_i32(rval, src, rsh);
3938     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3939     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3940 }
3941 
3942 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3943 {
3944     TCGv_i64 lval = tcg_temp_new_i64();
3945     TCGv_i64 rval = tcg_temp_new_i64();
3946     TCGv_i64 lsh = tcg_temp_new_i64();
3947     TCGv_i64 rsh = tcg_temp_new_i64();
3948     TCGv_i64 zero = tcg_constant_i64(0);
3949     TCGv_i64 max = tcg_constant_i64(64);
3950 
3951     /*
3952      * Rely on the TCG guarantee that out of range shifts produce
3953      * unspecified results, not undefined behaviour (i.e. no trap).
3954      * Discard out-of-range results after the fact.
3955      */
3956     tcg_gen_ext8s_i64(lsh, shift);
3957     tcg_gen_neg_i64(rsh, lsh);
3958     tcg_gen_shl_i64(lval, src, lsh);
3959     tcg_gen_shr_i64(rval, src, rsh);
3960     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3961     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3962 }
3963 
3964 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3965                          TCGv_vec src, TCGv_vec shift)
3966 {
3967     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3968     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3969     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3970     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3971     TCGv_vec msk, max;
3972 
3973     tcg_gen_neg_vec(vece, rsh, shift);
3974     if (vece == MO_8) {
3975         tcg_gen_mov_vec(lsh, shift);
3976     } else {
3977         msk = tcg_temp_new_vec_matching(dst);
3978         tcg_gen_dupi_vec(vece, msk, 0xff);
3979         tcg_gen_and_vec(vece, lsh, shift, msk);
3980         tcg_gen_and_vec(vece, rsh, rsh, msk);
3981     }
3982 
3983     /*
3984      * Rely on the TCG guarantee that out of range shifts produce
3985      * unspecified results, not undefined behaviour (i.e. no trap).
3986      * Discard out-of-range results after the fact.
3987      */
3988     tcg_gen_shlv_vec(vece, lval, src, lsh);
3989     tcg_gen_shrv_vec(vece, rval, src, rsh);
3990 
3991     max = tcg_temp_new_vec_matching(dst);
3992     tcg_gen_dupi_vec(vece, max, 8 << vece);
3993 
3994     /*
3995      * The choice of LT (signed) and GEU (unsigned) are biased toward
3996      * the instructions of the x86_64 host.  For MO_8, the whole byte
3997      * is significant so we must use an unsigned compare; otherwise we
3998      * have already masked to a byte and so a signed compare works.
3999      * Other tcg hosts have a full set of comparisons and do not care.
4000      */
4001     if (vece == MO_8) {
4002         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4003         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4004         tcg_gen_andc_vec(vece, lval, lval, lsh);
4005         tcg_gen_andc_vec(vece, rval, rval, rsh);
4006     } else {
4007         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4008         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4009         tcg_gen_and_vec(vece, lval, lval, lsh);
4010         tcg_gen_and_vec(vece, rval, rval, rsh);
4011     }
4012     tcg_gen_or_vec(vece, dst, lval, rval);
4013 }
4014 
4015 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4016                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4017 {
4018     static const TCGOpcode vecop_list[] = {
4019         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4020         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4021     };
4022     static const GVecGen3 ops[4] = {
4023         { .fniv = gen_ushl_vec,
4024           .fno = gen_helper_gvec_ushl_b,
4025           .opt_opc = vecop_list,
4026           .vece = MO_8 },
4027         { .fniv = gen_ushl_vec,
4028           .fno = gen_helper_gvec_ushl_h,
4029           .opt_opc = vecop_list,
4030           .vece = MO_16 },
4031         { .fni4 = gen_ushl_i32,
4032           .fniv = gen_ushl_vec,
4033           .opt_opc = vecop_list,
4034           .vece = MO_32 },
4035         { .fni8 = gen_ushl_i64,
4036           .fniv = gen_ushl_vec,
4037           .opt_opc = vecop_list,
4038           .vece = MO_64 },
4039     };
4040     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4041 }
4042 
4043 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4044 {
4045     TCGv_i32 lval = tcg_temp_new_i32();
4046     TCGv_i32 rval = tcg_temp_new_i32();
4047     TCGv_i32 lsh = tcg_temp_new_i32();
4048     TCGv_i32 rsh = tcg_temp_new_i32();
4049     TCGv_i32 zero = tcg_constant_i32(0);
4050     TCGv_i32 max = tcg_constant_i32(31);
4051 
4052     /*
4053      * Rely on the TCG guarantee that out of range shifts produce
4054      * unspecified results, not undefined behaviour (i.e. no trap).
4055      * Discard out-of-range results after the fact.
4056      */
4057     tcg_gen_ext8s_i32(lsh, shift);
4058     tcg_gen_neg_i32(rsh, lsh);
4059     tcg_gen_shl_i32(lval, src, lsh);
4060     tcg_gen_umin_i32(rsh, rsh, max);
4061     tcg_gen_sar_i32(rval, src, rsh);
4062     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4063     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4064 }
4065 
4066 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4067 {
4068     TCGv_i64 lval = tcg_temp_new_i64();
4069     TCGv_i64 rval = tcg_temp_new_i64();
4070     TCGv_i64 lsh = tcg_temp_new_i64();
4071     TCGv_i64 rsh = tcg_temp_new_i64();
4072     TCGv_i64 zero = tcg_constant_i64(0);
4073     TCGv_i64 max = tcg_constant_i64(63);
4074 
4075     /*
4076      * Rely on the TCG guarantee that out of range shifts produce
4077      * unspecified results, not undefined behaviour (i.e. no trap).
4078      * Discard out-of-range results after the fact.
4079      */
4080     tcg_gen_ext8s_i64(lsh, shift);
4081     tcg_gen_neg_i64(rsh, lsh);
4082     tcg_gen_shl_i64(lval, src, lsh);
4083     tcg_gen_umin_i64(rsh, rsh, max);
4084     tcg_gen_sar_i64(rval, src, rsh);
4085     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4086     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4087 }
4088 
4089 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4090                          TCGv_vec src, TCGv_vec shift)
4091 {
4092     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4093     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4094     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4095     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4096     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4097 
4098     /*
4099      * Rely on the TCG guarantee that out of range shifts produce
4100      * unspecified results, not undefined behaviour (i.e. no trap).
4101      * Discard out-of-range results after the fact.
4102      */
4103     tcg_gen_neg_vec(vece, rsh, shift);
4104     if (vece == MO_8) {
4105         tcg_gen_mov_vec(lsh, shift);
4106     } else {
4107         tcg_gen_dupi_vec(vece, tmp, 0xff);
4108         tcg_gen_and_vec(vece, lsh, shift, tmp);
4109         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4110     }
4111 
4112     /* Bound rsh so out of bound right shift gets -1.  */
4113     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4114     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4115     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4116 
4117     tcg_gen_shlv_vec(vece, lval, src, lsh);
4118     tcg_gen_sarv_vec(vece, rval, src, rsh);
4119 
4120     /* Select in-bound left shift.  */
4121     tcg_gen_andc_vec(vece, lval, lval, tmp);
4122 
4123     /* Select between left and right shift.  */
4124     if (vece == MO_8) {
4125         tcg_gen_dupi_vec(vece, tmp, 0);
4126         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4127     } else {
4128         tcg_gen_dupi_vec(vece, tmp, 0x80);
4129         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4130     }
4131 }
4132 
4133 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4134                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4135 {
4136     static const TCGOpcode vecop_list[] = {
4137         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4138         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4139     };
4140     static const GVecGen3 ops[4] = {
4141         { .fniv = gen_sshl_vec,
4142           .fno = gen_helper_gvec_sshl_b,
4143           .opt_opc = vecop_list,
4144           .vece = MO_8 },
4145         { .fniv = gen_sshl_vec,
4146           .fno = gen_helper_gvec_sshl_h,
4147           .opt_opc = vecop_list,
4148           .vece = MO_16 },
4149         { .fni4 = gen_sshl_i32,
4150           .fniv = gen_sshl_vec,
4151           .opt_opc = vecop_list,
4152           .vece = MO_32 },
4153         { .fni8 = gen_sshl_i64,
4154           .fniv = gen_sshl_vec,
4155           .opt_opc = vecop_list,
4156           .vece = MO_64 },
4157     };
4158     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4159 }
4160 
4161 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4162                           TCGv_vec a, TCGv_vec b)
4163 {
4164     TCGv_vec x = tcg_temp_new_vec_matching(t);
4165     tcg_gen_add_vec(vece, x, a, b);
4166     tcg_gen_usadd_vec(vece, t, a, b);
4167     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4168     tcg_gen_or_vec(vece, sat, sat, x);
4169 }
4170 
4171 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4172                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4173 {
4174     static const TCGOpcode vecop_list[] = {
4175         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4176     };
4177     static const GVecGen4 ops[4] = {
4178         { .fniv = gen_uqadd_vec,
4179           .fno = gen_helper_gvec_uqadd_b,
4180           .write_aofs = true,
4181           .opt_opc = vecop_list,
4182           .vece = MO_8 },
4183         { .fniv = gen_uqadd_vec,
4184           .fno = gen_helper_gvec_uqadd_h,
4185           .write_aofs = true,
4186           .opt_opc = vecop_list,
4187           .vece = MO_16 },
4188         { .fniv = gen_uqadd_vec,
4189           .fno = gen_helper_gvec_uqadd_s,
4190           .write_aofs = true,
4191           .opt_opc = vecop_list,
4192           .vece = MO_32 },
4193         { .fniv = gen_uqadd_vec,
4194           .fno = gen_helper_gvec_uqadd_d,
4195           .write_aofs = true,
4196           .opt_opc = vecop_list,
4197           .vece = MO_64 },
4198     };
4199     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4200                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4201 }
4202 
4203 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4204                           TCGv_vec a, TCGv_vec b)
4205 {
4206     TCGv_vec x = tcg_temp_new_vec_matching(t);
4207     tcg_gen_add_vec(vece, x, a, b);
4208     tcg_gen_ssadd_vec(vece, t, a, b);
4209     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4210     tcg_gen_or_vec(vece, sat, sat, x);
4211 }
4212 
4213 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4214                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4215 {
4216     static const TCGOpcode vecop_list[] = {
4217         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4218     };
4219     static const GVecGen4 ops[4] = {
4220         { .fniv = gen_sqadd_vec,
4221           .fno = gen_helper_gvec_sqadd_b,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_8 },
4225         { .fniv = gen_sqadd_vec,
4226           .fno = gen_helper_gvec_sqadd_h,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_16 },
4230         { .fniv = gen_sqadd_vec,
4231           .fno = gen_helper_gvec_sqadd_s,
4232           .opt_opc = vecop_list,
4233           .write_aofs = true,
4234           .vece = MO_32 },
4235         { .fniv = gen_sqadd_vec,
4236           .fno = gen_helper_gvec_sqadd_d,
4237           .opt_opc = vecop_list,
4238           .write_aofs = true,
4239           .vece = MO_64 },
4240     };
4241     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4242                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4243 }
4244 
4245 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4246                           TCGv_vec a, TCGv_vec b)
4247 {
4248     TCGv_vec x = tcg_temp_new_vec_matching(t);
4249     tcg_gen_sub_vec(vece, x, a, b);
4250     tcg_gen_ussub_vec(vece, t, a, b);
4251     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4252     tcg_gen_or_vec(vece, sat, sat, x);
4253 }
4254 
4255 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4256                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4257 {
4258     static const TCGOpcode vecop_list[] = {
4259         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4260     };
4261     static const GVecGen4 ops[4] = {
4262         { .fniv = gen_uqsub_vec,
4263           .fno = gen_helper_gvec_uqsub_b,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_8 },
4267         { .fniv = gen_uqsub_vec,
4268           .fno = gen_helper_gvec_uqsub_h,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_16 },
4272         { .fniv = gen_uqsub_vec,
4273           .fno = gen_helper_gvec_uqsub_s,
4274           .opt_opc = vecop_list,
4275           .write_aofs = true,
4276           .vece = MO_32 },
4277         { .fniv = gen_uqsub_vec,
4278           .fno = gen_helper_gvec_uqsub_d,
4279           .opt_opc = vecop_list,
4280           .write_aofs = true,
4281           .vece = MO_64 },
4282     };
4283     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4284                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4285 }
4286 
4287 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4288                           TCGv_vec a, TCGv_vec b)
4289 {
4290     TCGv_vec x = tcg_temp_new_vec_matching(t);
4291     tcg_gen_sub_vec(vece, x, a, b);
4292     tcg_gen_sssub_vec(vece, t, a, b);
4293     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4294     tcg_gen_or_vec(vece, sat, sat, x);
4295 }
4296 
4297 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4298                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4299 {
4300     static const TCGOpcode vecop_list[] = {
4301         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4302     };
4303     static const GVecGen4 ops[4] = {
4304         { .fniv = gen_sqsub_vec,
4305           .fno = gen_helper_gvec_sqsub_b,
4306           .opt_opc = vecop_list,
4307           .write_aofs = true,
4308           .vece = MO_8 },
4309         { .fniv = gen_sqsub_vec,
4310           .fno = gen_helper_gvec_sqsub_h,
4311           .opt_opc = vecop_list,
4312           .write_aofs = true,
4313           .vece = MO_16 },
4314         { .fniv = gen_sqsub_vec,
4315           .fno = gen_helper_gvec_sqsub_s,
4316           .opt_opc = vecop_list,
4317           .write_aofs = true,
4318           .vece = MO_32 },
4319         { .fniv = gen_sqsub_vec,
4320           .fno = gen_helper_gvec_sqsub_d,
4321           .opt_opc = vecop_list,
4322           .write_aofs = true,
4323           .vece = MO_64 },
4324     };
4325     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4326                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4327 }
4328 
4329 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4330 {
4331     TCGv_i32 t = tcg_temp_new_i32();
4332 
4333     tcg_gen_sub_i32(t, a, b);
4334     tcg_gen_sub_i32(d, b, a);
4335     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4336 }
4337 
4338 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4339 {
4340     TCGv_i64 t = tcg_temp_new_i64();
4341 
4342     tcg_gen_sub_i64(t, a, b);
4343     tcg_gen_sub_i64(d, b, a);
4344     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4345 }
4346 
4347 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4348 {
4349     TCGv_vec t = tcg_temp_new_vec_matching(d);
4350 
4351     tcg_gen_smin_vec(vece, t, a, b);
4352     tcg_gen_smax_vec(vece, d, a, b);
4353     tcg_gen_sub_vec(vece, d, d, t);
4354 }
4355 
4356 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4357                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4358 {
4359     static const TCGOpcode vecop_list[] = {
4360         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4361     };
4362     static const GVecGen3 ops[4] = {
4363         { .fniv = gen_sabd_vec,
4364           .fno = gen_helper_gvec_sabd_b,
4365           .opt_opc = vecop_list,
4366           .vece = MO_8 },
4367         { .fniv = gen_sabd_vec,
4368           .fno = gen_helper_gvec_sabd_h,
4369           .opt_opc = vecop_list,
4370           .vece = MO_16 },
4371         { .fni4 = gen_sabd_i32,
4372           .fniv = gen_sabd_vec,
4373           .fno = gen_helper_gvec_sabd_s,
4374           .opt_opc = vecop_list,
4375           .vece = MO_32 },
4376         { .fni8 = gen_sabd_i64,
4377           .fniv = gen_sabd_vec,
4378           .fno = gen_helper_gvec_sabd_d,
4379           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4380           .opt_opc = vecop_list,
4381           .vece = MO_64 },
4382     };
4383     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4384 }
4385 
4386 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4387 {
4388     TCGv_i32 t = tcg_temp_new_i32();
4389 
4390     tcg_gen_sub_i32(t, a, b);
4391     tcg_gen_sub_i32(d, b, a);
4392     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4393 }
4394 
4395 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4396 {
4397     TCGv_i64 t = tcg_temp_new_i64();
4398 
4399     tcg_gen_sub_i64(t, a, b);
4400     tcg_gen_sub_i64(d, b, a);
4401     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4402 }
4403 
4404 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4405 {
4406     TCGv_vec t = tcg_temp_new_vec_matching(d);
4407 
4408     tcg_gen_umin_vec(vece, t, a, b);
4409     tcg_gen_umax_vec(vece, d, a, b);
4410     tcg_gen_sub_vec(vece, d, d, t);
4411 }
4412 
4413 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4414                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4415 {
4416     static const TCGOpcode vecop_list[] = {
4417         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4418     };
4419     static const GVecGen3 ops[4] = {
4420         { .fniv = gen_uabd_vec,
4421           .fno = gen_helper_gvec_uabd_b,
4422           .opt_opc = vecop_list,
4423           .vece = MO_8 },
4424         { .fniv = gen_uabd_vec,
4425           .fno = gen_helper_gvec_uabd_h,
4426           .opt_opc = vecop_list,
4427           .vece = MO_16 },
4428         { .fni4 = gen_uabd_i32,
4429           .fniv = gen_uabd_vec,
4430           .fno = gen_helper_gvec_uabd_s,
4431           .opt_opc = vecop_list,
4432           .vece = MO_32 },
4433         { .fni8 = gen_uabd_i64,
4434           .fniv = gen_uabd_vec,
4435           .fno = gen_helper_gvec_uabd_d,
4436           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4437           .opt_opc = vecop_list,
4438           .vece = MO_64 },
4439     };
4440     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4441 }
4442 
4443 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4444 {
4445     TCGv_i32 t = tcg_temp_new_i32();
4446     gen_sabd_i32(t, a, b);
4447     tcg_gen_add_i32(d, d, t);
4448 }
4449 
4450 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4451 {
4452     TCGv_i64 t = tcg_temp_new_i64();
4453     gen_sabd_i64(t, a, b);
4454     tcg_gen_add_i64(d, d, t);
4455 }
4456 
4457 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4458 {
4459     TCGv_vec t = tcg_temp_new_vec_matching(d);
4460     gen_sabd_vec(vece, t, a, b);
4461     tcg_gen_add_vec(vece, d, d, t);
4462 }
4463 
4464 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4465                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4466 {
4467     static const TCGOpcode vecop_list[] = {
4468         INDEX_op_sub_vec, INDEX_op_add_vec,
4469         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4470     };
4471     static const GVecGen3 ops[4] = {
4472         { .fniv = gen_saba_vec,
4473           .fno = gen_helper_gvec_saba_b,
4474           .opt_opc = vecop_list,
4475           .load_dest = true,
4476           .vece = MO_8 },
4477         { .fniv = gen_saba_vec,
4478           .fno = gen_helper_gvec_saba_h,
4479           .opt_opc = vecop_list,
4480           .load_dest = true,
4481           .vece = MO_16 },
4482         { .fni4 = gen_saba_i32,
4483           .fniv = gen_saba_vec,
4484           .fno = gen_helper_gvec_saba_s,
4485           .opt_opc = vecop_list,
4486           .load_dest = true,
4487           .vece = MO_32 },
4488         { .fni8 = gen_saba_i64,
4489           .fniv = gen_saba_vec,
4490           .fno = gen_helper_gvec_saba_d,
4491           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4492           .opt_opc = vecop_list,
4493           .load_dest = true,
4494           .vece = MO_64 },
4495     };
4496     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4497 }
4498 
4499 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4500 {
4501     TCGv_i32 t = tcg_temp_new_i32();
4502     gen_uabd_i32(t, a, b);
4503     tcg_gen_add_i32(d, d, t);
4504 }
4505 
4506 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4507 {
4508     TCGv_i64 t = tcg_temp_new_i64();
4509     gen_uabd_i64(t, a, b);
4510     tcg_gen_add_i64(d, d, t);
4511 }
4512 
4513 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4514 {
4515     TCGv_vec t = tcg_temp_new_vec_matching(d);
4516     gen_uabd_vec(vece, t, a, b);
4517     tcg_gen_add_vec(vece, d, d, t);
4518 }
4519 
4520 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4521                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4522 {
4523     static const TCGOpcode vecop_list[] = {
4524         INDEX_op_sub_vec, INDEX_op_add_vec,
4525         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4526     };
4527     static const GVecGen3 ops[4] = {
4528         { .fniv = gen_uaba_vec,
4529           .fno = gen_helper_gvec_uaba_b,
4530           .opt_opc = vecop_list,
4531           .load_dest = true,
4532           .vece = MO_8 },
4533         { .fniv = gen_uaba_vec,
4534           .fno = gen_helper_gvec_uaba_h,
4535           .opt_opc = vecop_list,
4536           .load_dest = true,
4537           .vece = MO_16 },
4538         { .fni4 = gen_uaba_i32,
4539           .fniv = gen_uaba_vec,
4540           .fno = gen_helper_gvec_uaba_s,
4541           .opt_opc = vecop_list,
4542           .load_dest = true,
4543           .vece = MO_32 },
4544         { .fni8 = gen_uaba_i64,
4545           .fniv = gen_uaba_vec,
4546           .fno = gen_helper_gvec_uaba_d,
4547           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4548           .opt_opc = vecop_list,
4549           .load_dest = true,
4550           .vece = MO_64 },
4551     };
4552     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4553 }
4554 
4555 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4556                            int opc1, int crn, int crm, int opc2,
4557                            bool isread, int rt, int rt2)
4558 {
4559     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4560     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4561     TCGv_ptr tcg_ri = NULL;
4562     bool need_exit_tb;
4563     uint32_t syndrome;
4564 
4565     /*
4566      * Note that since we are an implementation which takes an
4567      * exception on a trapped conditional instruction only if the
4568      * instruction passes its condition code check, we can take
4569      * advantage of the clause in the ARM ARM that allows us to set
4570      * the COND field in the instruction to 0xE in all cases.
4571      * We could fish the actual condition out of the insn (ARM)
4572      * or the condexec bits (Thumb) but it isn't necessary.
4573      */
4574     switch (cpnum) {
4575     case 14:
4576         if (is64) {
4577             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4578                                          isread, false);
4579         } else {
4580             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4581                                         rt, isread, false);
4582         }
4583         break;
4584     case 15:
4585         if (is64) {
4586             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4587                                          isread, false);
4588         } else {
4589             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4590                                         rt, isread, false);
4591         }
4592         break;
4593     default:
4594         /*
4595          * ARMv8 defines that only coprocessors 14 and 15 exist,
4596          * so this can only happen if this is an ARMv7 or earlier CPU,
4597          * in which case the syndrome information won't actually be
4598          * guest visible.
4599          */
4600         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4601         syndrome = syn_uncategorized();
4602         break;
4603     }
4604 
4605     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4606         /*
4607          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4608          * over the UNDEF for "no such register" or the UNDEF for "access
4609          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4610          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4611          * access_check_cp_reg(), after the checks for whether the access
4612          * configurably trapped to EL1.
4613          */
4614         uint32_t maskbit = is64 ? crm : crn;
4615 
4616         if (maskbit != 4 && maskbit != 14) {
4617             /* T4 and T14 are RES0 so never cause traps */
4618             TCGv_i32 t;
4619             DisasLabel over = gen_disas_label(s);
4620 
4621             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4622             tcg_gen_andi_i32(t, t, 1u << maskbit);
4623             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4624 
4625             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4626             set_disas_label(s, over);
4627         }
4628     }
4629 
4630     if (!ri) {
4631         /*
4632          * Unknown register; this might be a guest error or a QEMU
4633          * unimplemented feature.
4634          */
4635         if (is64) {
4636             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4637                           "64 bit system register cp:%d opc1: %d crm:%d "
4638                           "(%s)\n",
4639                           isread ? "read" : "write", cpnum, opc1, crm,
4640                           s->ns ? "non-secure" : "secure");
4641         } else {
4642             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4643                           "system register cp:%d opc1:%d crn:%d crm:%d "
4644                           "opc2:%d (%s)\n",
4645                           isread ? "read" : "write", cpnum, opc1, crn,
4646                           crm, opc2, s->ns ? "non-secure" : "secure");
4647         }
4648         unallocated_encoding(s);
4649         return;
4650     }
4651 
4652     /* Check access permissions */
4653     if (!cp_access_ok(s->current_el, ri, isread)) {
4654         unallocated_encoding(s);
4655         return;
4656     }
4657 
4658     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4659         (ri->fgt && s->fgt_active) ||
4660         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4661         /*
4662          * Emit code to perform further access permissions checks at
4663          * runtime; this may result in an exception.
4664          * Note that on XScale all cp0..c13 registers do an access check
4665          * call in order to handle c15_cpar.
4666          */
4667         gen_set_condexec(s);
4668         gen_update_pc(s, 0);
4669         tcg_ri = tcg_temp_new_ptr();
4670         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4671                                        tcg_constant_i32(key),
4672                                        tcg_constant_i32(syndrome),
4673                                        tcg_constant_i32(isread));
4674     } else if (ri->type & ARM_CP_RAISES_EXC) {
4675         /*
4676          * The readfn or writefn might raise an exception;
4677          * synchronize the CPU state in case it does.
4678          */
4679         gen_set_condexec(s);
4680         gen_update_pc(s, 0);
4681     }
4682 
4683     /* Handle special cases first */
4684     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4685     case 0:
4686         break;
4687     case ARM_CP_NOP:
4688         return;
4689     case ARM_CP_WFI:
4690         if (isread) {
4691             unallocated_encoding(s);
4692         } else {
4693             gen_update_pc(s, curr_insn_len(s));
4694             s->base.is_jmp = DISAS_WFI;
4695         }
4696         return;
4697     default:
4698         g_assert_not_reached();
4699     }
4700 
4701     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4702         gen_io_start();
4703     }
4704 
4705     if (isread) {
4706         /* Read */
4707         if (is64) {
4708             TCGv_i64 tmp64;
4709             TCGv_i32 tmp;
4710             if (ri->type & ARM_CP_CONST) {
4711                 tmp64 = tcg_constant_i64(ri->resetvalue);
4712             } else if (ri->readfn) {
4713                 if (!tcg_ri) {
4714                     tcg_ri = gen_lookup_cp_reg(key);
4715                 }
4716                 tmp64 = tcg_temp_new_i64();
4717                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4718             } else {
4719                 tmp64 = tcg_temp_new_i64();
4720                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4721             }
4722             tmp = tcg_temp_new_i32();
4723             tcg_gen_extrl_i64_i32(tmp, tmp64);
4724             store_reg(s, rt, tmp);
4725             tmp = tcg_temp_new_i32();
4726             tcg_gen_extrh_i64_i32(tmp, tmp64);
4727             store_reg(s, rt2, tmp);
4728         } else {
4729             TCGv_i32 tmp;
4730             if (ri->type & ARM_CP_CONST) {
4731                 tmp = tcg_constant_i32(ri->resetvalue);
4732             } else if (ri->readfn) {
4733                 if (!tcg_ri) {
4734                     tcg_ri = gen_lookup_cp_reg(key);
4735                 }
4736                 tmp = tcg_temp_new_i32();
4737                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4738             } else {
4739                 tmp = load_cpu_offset(ri->fieldoffset);
4740             }
4741             if (rt == 15) {
4742                 /* Destination register of r15 for 32 bit loads sets
4743                  * the condition codes from the high 4 bits of the value
4744                  */
4745                 gen_set_nzcv(tmp);
4746             } else {
4747                 store_reg(s, rt, tmp);
4748             }
4749         }
4750     } else {
4751         /* Write */
4752         if (ri->type & ARM_CP_CONST) {
4753             /* If not forbidden by access permissions, treat as WI */
4754             return;
4755         }
4756 
4757         if (is64) {
4758             TCGv_i32 tmplo, tmphi;
4759             TCGv_i64 tmp64 = tcg_temp_new_i64();
4760             tmplo = load_reg(s, rt);
4761             tmphi = load_reg(s, rt2);
4762             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4763             if (ri->writefn) {
4764                 if (!tcg_ri) {
4765                     tcg_ri = gen_lookup_cp_reg(key);
4766                 }
4767                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4768             } else {
4769                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4770             }
4771         } else {
4772             TCGv_i32 tmp = load_reg(s, rt);
4773             if (ri->writefn) {
4774                 if (!tcg_ri) {
4775                     tcg_ri = gen_lookup_cp_reg(key);
4776                 }
4777                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4778             } else {
4779                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4780             }
4781         }
4782     }
4783 
4784     /* I/O operations must end the TB here (whether read or write) */
4785     need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4786                     (ri->type & ARM_CP_IO));
4787 
4788     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4789         /*
4790          * A write to any coprocessor register that ends a TB
4791          * must rebuild the hflags for the next TB.
4792          */
4793         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4794         /*
4795          * We default to ending the TB on a coprocessor register write,
4796          * but allow this to be suppressed by the register definition
4797          * (usually only necessary to work around guest bugs).
4798          */
4799         need_exit_tb = true;
4800     }
4801     if (need_exit_tb) {
4802         gen_lookup_tb(s);
4803     }
4804 }
4805 
4806 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4807 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4808 {
4809     int cpnum = (insn >> 8) & 0xf;
4810 
4811     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4812         unallocated_encoding(s);
4813     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4814         if (disas_iwmmxt_insn(s, insn)) {
4815             unallocated_encoding(s);
4816         }
4817     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4818         if (disas_dsp_insn(s, insn)) {
4819             unallocated_encoding(s);
4820         }
4821     }
4822 }
4823 
4824 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4825 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4826 {
4827     TCGv_i32 tmp;
4828     tmp = tcg_temp_new_i32();
4829     tcg_gen_extrl_i64_i32(tmp, val);
4830     store_reg(s, rlow, tmp);
4831     tmp = tcg_temp_new_i32();
4832     tcg_gen_extrh_i64_i32(tmp, val);
4833     store_reg(s, rhigh, tmp);
4834 }
4835 
4836 /* load and add a 64-bit value from a register pair.  */
4837 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4838 {
4839     TCGv_i64 tmp;
4840     TCGv_i32 tmpl;
4841     TCGv_i32 tmph;
4842 
4843     /* Load 64-bit value rd:rn.  */
4844     tmpl = load_reg(s, rlow);
4845     tmph = load_reg(s, rhigh);
4846     tmp = tcg_temp_new_i64();
4847     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4848     tcg_gen_add_i64(val, val, tmp);
4849 }
4850 
4851 /* Set N and Z flags from hi|lo.  */
4852 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4853 {
4854     tcg_gen_mov_i32(cpu_NF, hi);
4855     tcg_gen_or_i32(cpu_ZF, lo, hi);
4856 }
4857 
4858 /* Load/Store exclusive instructions are implemented by remembering
4859    the value/address loaded, and seeing if these are the same
4860    when the store is performed.  This should be sufficient to implement
4861    the architecturally mandated semantics, and avoids having to monitor
4862    regular stores.  The compare vs the remembered value is done during
4863    the cmpxchg operation, but we must compare the addresses manually.  */
4864 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4865                                TCGv_i32 addr, int size)
4866 {
4867     TCGv_i32 tmp = tcg_temp_new_i32();
4868     MemOp opc = size | MO_ALIGN | s->be_data;
4869 
4870     s->is_ldex = true;
4871 
4872     if (size == 3) {
4873         TCGv_i32 tmp2 = tcg_temp_new_i32();
4874         TCGv_i64 t64 = tcg_temp_new_i64();
4875 
4876         /*
4877          * For AArch32, architecturally the 32-bit word at the lowest
4878          * address is always Rt and the one at addr+4 is Rt2, even if
4879          * the CPU is big-endian. That means we don't want to do a
4880          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4881          * architecturally 64-bit access, but instead do a 64-bit access
4882          * using MO_BE if appropriate and then split the two halves.
4883          */
4884         TCGv taddr = gen_aa32_addr(s, addr, opc);
4885 
4886         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4887         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4888         if (s->be_data == MO_BE) {
4889             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4890         } else {
4891             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4892         }
4893         store_reg(s, rt2, tmp2);
4894     } else {
4895         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4896         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4897     }
4898 
4899     store_reg(s, rt, tmp);
4900     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4901 }
4902 
4903 static void gen_clrex(DisasContext *s)
4904 {
4905     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4906 }
4907 
4908 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4909                                 TCGv_i32 addr, int size)
4910 {
4911     TCGv_i32 t0, t1, t2;
4912     TCGv_i64 extaddr;
4913     TCGv taddr;
4914     TCGLabel *done_label;
4915     TCGLabel *fail_label;
4916     MemOp opc = size | MO_ALIGN | s->be_data;
4917 
4918     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4919          [addr] = {Rt};
4920          {Rd} = 0;
4921        } else {
4922          {Rd} = 1;
4923        } */
4924     fail_label = gen_new_label();
4925     done_label = gen_new_label();
4926     extaddr = tcg_temp_new_i64();
4927     tcg_gen_extu_i32_i64(extaddr, addr);
4928     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4929 
4930     taddr = gen_aa32_addr(s, addr, opc);
4931     t0 = tcg_temp_new_i32();
4932     t1 = load_reg(s, rt);
4933     if (size == 3) {
4934         TCGv_i64 o64 = tcg_temp_new_i64();
4935         TCGv_i64 n64 = tcg_temp_new_i64();
4936 
4937         t2 = load_reg(s, rt2);
4938 
4939         /*
4940          * For AArch32, architecturally the 32-bit word at the lowest
4941          * address is always Rt and the one at addr+4 is Rt2, even if
4942          * the CPU is big-endian. Since we're going to treat this as a
4943          * single 64-bit BE store, we need to put the two halves in the
4944          * opposite order for BE to LE, so that they end up in the right
4945          * places.  We don't want gen_aa32_st_i64, because that checks
4946          * SCTLR_B as if for an architectural 64-bit access.
4947          */
4948         if (s->be_data == MO_BE) {
4949             tcg_gen_concat_i32_i64(n64, t2, t1);
4950         } else {
4951             tcg_gen_concat_i32_i64(n64, t1, t2);
4952         }
4953 
4954         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4955                                    get_mem_index(s), opc);
4956 
4957         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4958         tcg_gen_extrl_i64_i32(t0, o64);
4959     } else {
4960         t2 = tcg_temp_new_i32();
4961         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4962         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4963         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4964     }
4965     tcg_gen_mov_i32(cpu_R[rd], t0);
4966     tcg_gen_br(done_label);
4967 
4968     gen_set_label(fail_label);
4969     tcg_gen_movi_i32(cpu_R[rd], 1);
4970     gen_set_label(done_label);
4971     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4972 }
4973 
4974 /* gen_srs:
4975  * @env: CPUARMState
4976  * @s: DisasContext
4977  * @mode: mode field from insn (which stack to store to)
4978  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4979  * @writeback: true if writeback bit set
4980  *
4981  * Generate code for the SRS (Store Return State) insn.
4982  */
4983 static void gen_srs(DisasContext *s,
4984                     uint32_t mode, uint32_t amode, bool writeback)
4985 {
4986     int32_t offset;
4987     TCGv_i32 addr, tmp;
4988     bool undef = false;
4989 
4990     /* SRS is:
4991      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4992      *   and specified mode is monitor mode
4993      * - UNDEFINED in Hyp mode
4994      * - UNPREDICTABLE in User or System mode
4995      * - UNPREDICTABLE if the specified mode is:
4996      * -- not implemented
4997      * -- not a valid mode number
4998      * -- a mode that's at a higher exception level
4999      * -- Monitor, if we are Non-secure
5000      * For the UNPREDICTABLE cases we choose to UNDEF.
5001      */
5002     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5003         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5004         return;
5005     }
5006 
5007     if (s->current_el == 0 || s->current_el == 2) {
5008         undef = true;
5009     }
5010 
5011     switch (mode) {
5012     case ARM_CPU_MODE_USR:
5013     case ARM_CPU_MODE_FIQ:
5014     case ARM_CPU_MODE_IRQ:
5015     case ARM_CPU_MODE_SVC:
5016     case ARM_CPU_MODE_ABT:
5017     case ARM_CPU_MODE_UND:
5018     case ARM_CPU_MODE_SYS:
5019         break;
5020     case ARM_CPU_MODE_HYP:
5021         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5022             undef = true;
5023         }
5024         break;
5025     case ARM_CPU_MODE_MON:
5026         /* No need to check specifically for "are we non-secure" because
5027          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5028          * so if this isn't EL3 then we must be non-secure.
5029          */
5030         if (s->current_el != 3) {
5031             undef = true;
5032         }
5033         break;
5034     default:
5035         undef = true;
5036     }
5037 
5038     if (undef) {
5039         unallocated_encoding(s);
5040         return;
5041     }
5042 
5043     addr = tcg_temp_new_i32();
5044     /* get_r13_banked() will raise an exception if called from System mode */
5045     gen_set_condexec(s);
5046     gen_update_pc(s, 0);
5047     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5048     switch (amode) {
5049     case 0: /* DA */
5050         offset = -4;
5051         break;
5052     case 1: /* IA */
5053         offset = 0;
5054         break;
5055     case 2: /* DB */
5056         offset = -8;
5057         break;
5058     case 3: /* IB */
5059         offset = 4;
5060         break;
5061     default:
5062         g_assert_not_reached();
5063     }
5064     tcg_gen_addi_i32(addr, addr, offset);
5065     tmp = load_reg(s, 14);
5066     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5067     tmp = load_cpu_field(spsr);
5068     tcg_gen_addi_i32(addr, addr, 4);
5069     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5070     if (writeback) {
5071         switch (amode) {
5072         case 0:
5073             offset = -8;
5074             break;
5075         case 1:
5076             offset = 4;
5077             break;
5078         case 2:
5079             offset = -4;
5080             break;
5081         case 3:
5082             offset = 0;
5083             break;
5084         default:
5085             g_assert_not_reached();
5086         }
5087         tcg_gen_addi_i32(addr, addr, offset);
5088         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5089     }
5090     s->base.is_jmp = DISAS_UPDATE_EXIT;
5091 }
5092 
5093 /* Skip this instruction if the ARM condition is false */
5094 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5095 {
5096     arm_gen_condlabel(s);
5097     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5098 }
5099 
5100 
5101 /*
5102  * Constant expanders used by T16/T32 decode
5103  */
5104 
5105 /* Return only the rotation part of T32ExpandImm.  */
5106 static int t32_expandimm_rot(DisasContext *s, int x)
5107 {
5108     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5109 }
5110 
5111 /* Return the unrotated immediate from T32ExpandImm.  */
5112 static int t32_expandimm_imm(DisasContext *s, int x)
5113 {
5114     int imm = extract32(x, 0, 8);
5115 
5116     switch (extract32(x, 8, 4)) {
5117     case 0: /* XY */
5118         /* Nothing to do.  */
5119         break;
5120     case 1: /* 00XY00XY */
5121         imm *= 0x00010001;
5122         break;
5123     case 2: /* XY00XY00 */
5124         imm *= 0x01000100;
5125         break;
5126     case 3: /* XYXYXYXY */
5127         imm *= 0x01010101;
5128         break;
5129     default:
5130         /* Rotated constant.  */
5131         imm |= 0x80;
5132         break;
5133     }
5134     return imm;
5135 }
5136 
5137 static int t32_branch24(DisasContext *s, int x)
5138 {
5139     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5140     x ^= !(x < 0) * (3 << 21);
5141     /* Append the final zero.  */
5142     return x << 1;
5143 }
5144 
5145 static int t16_setflags(DisasContext *s)
5146 {
5147     return s->condexec_mask == 0;
5148 }
5149 
5150 static int t16_push_list(DisasContext *s, int x)
5151 {
5152     return (x & 0xff) | (x & 0x100) << (14 - 8);
5153 }
5154 
5155 static int t16_pop_list(DisasContext *s, int x)
5156 {
5157     return (x & 0xff) | (x & 0x100) << (15 - 8);
5158 }
5159 
5160 /*
5161  * Include the generated decoders.
5162  */
5163 
5164 #include "decode-a32.c.inc"
5165 #include "decode-a32-uncond.c.inc"
5166 #include "decode-t32.c.inc"
5167 #include "decode-t16.c.inc"
5168 
5169 static bool valid_cp(DisasContext *s, int cp)
5170 {
5171     /*
5172      * Return true if this coprocessor field indicates something
5173      * that's really a possible coprocessor.
5174      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5175      * and of those only cp14 and cp15 were used for registers.
5176      * cp10 and cp11 were used for VFP and Neon, whose decode is
5177      * dealt with elsewhere. With the advent of fp16, cp9 is also
5178      * now part of VFP.
5179      * For v8A and later, the encoding has been tightened so that
5180      * only cp14 and cp15 are valid, and other values aren't considered
5181      * to be in the coprocessor-instruction space at all. v8M still
5182      * permits coprocessors 0..7.
5183      * For XScale, we must not decode the XScale cp0, cp1 space as
5184      * a standard coprocessor insn, because we want to fall through to
5185      * the legacy disas_xscale_insn() decoder after decodetree is done.
5186      */
5187     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5188         return false;
5189     }
5190 
5191     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5192         !arm_dc_feature(s, ARM_FEATURE_M)) {
5193         return cp >= 14;
5194     }
5195     return cp < 8 || cp >= 14;
5196 }
5197 
5198 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5199 {
5200     if (!valid_cp(s, a->cp)) {
5201         return false;
5202     }
5203     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5204                    false, a->rt, 0);
5205     return true;
5206 }
5207 
5208 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5209 {
5210     if (!valid_cp(s, a->cp)) {
5211         return false;
5212     }
5213     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5214                    true, a->rt, 0);
5215     return true;
5216 }
5217 
5218 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5219 {
5220     if (!valid_cp(s, a->cp)) {
5221         return false;
5222     }
5223     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5224                    false, a->rt, a->rt2);
5225     return true;
5226 }
5227 
5228 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5229 {
5230     if (!valid_cp(s, a->cp)) {
5231         return false;
5232     }
5233     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5234                    true, a->rt, a->rt2);
5235     return true;
5236 }
5237 
5238 /* Helpers to swap operands for reverse-subtract.  */
5239 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5240 {
5241     tcg_gen_sub_i32(dst, b, a);
5242 }
5243 
5244 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5245 {
5246     gen_sub_CC(dst, b, a);
5247 }
5248 
5249 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5250 {
5251     gen_sub_carry(dest, b, a);
5252 }
5253 
5254 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5255 {
5256     gen_sbc_CC(dest, b, a);
5257 }
5258 
5259 /*
5260  * Helpers for the data processing routines.
5261  *
5262  * After the computation store the results back.
5263  * This may be suppressed altogether (STREG_NONE), require a runtime
5264  * check against the stack limits (STREG_SP_CHECK), or generate an
5265  * exception return.  Oh, or store into a register.
5266  *
5267  * Always return true, indicating success for a trans_* function.
5268  */
5269 typedef enum {
5270    STREG_NONE,
5271    STREG_NORMAL,
5272    STREG_SP_CHECK,
5273    STREG_EXC_RET,
5274 } StoreRegKind;
5275 
5276 static bool store_reg_kind(DisasContext *s, int rd,
5277                             TCGv_i32 val, StoreRegKind kind)
5278 {
5279     switch (kind) {
5280     case STREG_NONE:
5281         return true;
5282     case STREG_NORMAL:
5283         /* See ALUWritePC: Interworking only from a32 mode. */
5284         if (s->thumb) {
5285             store_reg(s, rd, val);
5286         } else {
5287             store_reg_bx(s, rd, val);
5288         }
5289         return true;
5290     case STREG_SP_CHECK:
5291         store_sp_checked(s, val);
5292         return true;
5293     case STREG_EXC_RET:
5294         gen_exception_return(s, val);
5295         return true;
5296     }
5297     g_assert_not_reached();
5298 }
5299 
5300 /*
5301  * Data Processing (register)
5302  *
5303  * Operate, with set flags, one register source,
5304  * one immediate shifted register source, and a destination.
5305  */
5306 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5307                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5308                          int logic_cc, StoreRegKind kind)
5309 {
5310     TCGv_i32 tmp1, tmp2;
5311 
5312     tmp2 = load_reg(s, a->rm);
5313     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5314     tmp1 = load_reg(s, a->rn);
5315 
5316     gen(tmp1, tmp1, tmp2);
5317 
5318     if (logic_cc) {
5319         gen_logic_CC(tmp1);
5320     }
5321     return store_reg_kind(s, a->rd, tmp1, kind);
5322 }
5323 
5324 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5325                          void (*gen)(TCGv_i32, TCGv_i32),
5326                          int logic_cc, StoreRegKind kind)
5327 {
5328     TCGv_i32 tmp;
5329 
5330     tmp = load_reg(s, a->rm);
5331     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5332 
5333     gen(tmp, tmp);
5334     if (logic_cc) {
5335         gen_logic_CC(tmp);
5336     }
5337     return store_reg_kind(s, a->rd, tmp, kind);
5338 }
5339 
5340 /*
5341  * Data-processing (register-shifted register)
5342  *
5343  * Operate, with set flags, one register source,
5344  * one register shifted register source, and a destination.
5345  */
5346 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5347                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5348                          int logic_cc, StoreRegKind kind)
5349 {
5350     TCGv_i32 tmp1, tmp2;
5351 
5352     tmp1 = load_reg(s, a->rs);
5353     tmp2 = load_reg(s, a->rm);
5354     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5355     tmp1 = load_reg(s, a->rn);
5356 
5357     gen(tmp1, tmp1, tmp2);
5358 
5359     if (logic_cc) {
5360         gen_logic_CC(tmp1);
5361     }
5362     return store_reg_kind(s, a->rd, tmp1, kind);
5363 }
5364 
5365 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5366                          void (*gen)(TCGv_i32, TCGv_i32),
5367                          int logic_cc, StoreRegKind kind)
5368 {
5369     TCGv_i32 tmp1, tmp2;
5370 
5371     tmp1 = load_reg(s, a->rs);
5372     tmp2 = load_reg(s, a->rm);
5373     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5374 
5375     gen(tmp2, tmp2);
5376     if (logic_cc) {
5377         gen_logic_CC(tmp2);
5378     }
5379     return store_reg_kind(s, a->rd, tmp2, kind);
5380 }
5381 
5382 /*
5383  * Data-processing (immediate)
5384  *
5385  * Operate, with set flags, one register source,
5386  * one rotated immediate, and a destination.
5387  *
5388  * Note that logic_cc && a->rot setting CF based on the msb of the
5389  * immediate is the reason why we must pass in the unrotated form
5390  * of the immediate.
5391  */
5392 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5393                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5394                          int logic_cc, StoreRegKind kind)
5395 {
5396     TCGv_i32 tmp1;
5397     uint32_t imm;
5398 
5399     imm = ror32(a->imm, a->rot);
5400     if (logic_cc && a->rot) {
5401         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5402     }
5403     tmp1 = load_reg(s, a->rn);
5404 
5405     gen(tmp1, tmp1, tcg_constant_i32(imm));
5406 
5407     if (logic_cc) {
5408         gen_logic_CC(tmp1);
5409     }
5410     return store_reg_kind(s, a->rd, tmp1, kind);
5411 }
5412 
5413 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5414                          void (*gen)(TCGv_i32, TCGv_i32),
5415                          int logic_cc, StoreRegKind kind)
5416 {
5417     TCGv_i32 tmp;
5418     uint32_t imm;
5419 
5420     imm = ror32(a->imm, a->rot);
5421     if (logic_cc && a->rot) {
5422         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5423     }
5424 
5425     tmp = tcg_temp_new_i32();
5426     gen(tmp, tcg_constant_i32(imm));
5427 
5428     if (logic_cc) {
5429         gen_logic_CC(tmp);
5430     }
5431     return store_reg_kind(s, a->rd, tmp, kind);
5432 }
5433 
5434 #define DO_ANY3(NAME, OP, L, K)                                         \
5435     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5436     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5437     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5438     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5439     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5440     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5441 
5442 #define DO_ANY2(NAME, OP, L, K)                                         \
5443     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5444     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5445     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5446     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5447     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5448     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5449 
5450 #define DO_CMP2(NAME, OP, L)                                            \
5451     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5452     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5453     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5454     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5455     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5456     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5457 
5458 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5459 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5460 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5461 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5462 
5463 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5464 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5465 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5466 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5467 
5468 DO_CMP2(TST, tcg_gen_and_i32, true)
5469 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5470 DO_CMP2(CMN, gen_add_CC, false)
5471 DO_CMP2(CMP, gen_sub_CC, false)
5472 
5473 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5474         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5475 
5476 /*
5477  * Note for the computation of StoreRegKind we return out of the
5478  * middle of the functions that are expanded by DO_ANY3, and that
5479  * we modify a->s via that parameter before it is used by OP.
5480  */
5481 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5482         ({
5483             StoreRegKind ret = STREG_NORMAL;
5484             if (a->rd == 15 && a->s) {
5485                 /*
5486                  * See ALUExceptionReturn:
5487                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5488                  * In Hyp mode, UNDEFINED.
5489                  */
5490                 if (IS_USER(s) || s->current_el == 2) {
5491                     unallocated_encoding(s);
5492                     return true;
5493                 }
5494                 /* There is no writeback of nzcv to PSTATE.  */
5495                 a->s = 0;
5496                 ret = STREG_EXC_RET;
5497             } else if (a->rd == 13 && a->rn == 13) {
5498                 ret = STREG_SP_CHECK;
5499             }
5500             ret;
5501         }))
5502 
5503 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5504         ({
5505             StoreRegKind ret = STREG_NORMAL;
5506             if (a->rd == 15 && a->s) {
5507                 /*
5508                  * See ALUExceptionReturn:
5509                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5510                  * In Hyp mode, UNDEFINED.
5511                  */
5512                 if (IS_USER(s) || s->current_el == 2) {
5513                     unallocated_encoding(s);
5514                     return true;
5515                 }
5516                 /* There is no writeback of nzcv to PSTATE.  */
5517                 a->s = 0;
5518                 ret = STREG_EXC_RET;
5519             } else if (a->rd == 13) {
5520                 ret = STREG_SP_CHECK;
5521             }
5522             ret;
5523         }))
5524 
5525 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5526 
5527 /*
5528  * ORN is only available with T32, so there is no register-shifted-register
5529  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5530  */
5531 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5532 {
5533     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5534 }
5535 
5536 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5537 {
5538     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5539 }
5540 
5541 #undef DO_ANY3
5542 #undef DO_ANY2
5543 #undef DO_CMP2
5544 
5545 static bool trans_ADR(DisasContext *s, arg_ri *a)
5546 {
5547     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5548     return true;
5549 }
5550 
5551 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5552 {
5553     if (!ENABLE_ARCH_6T2) {
5554         return false;
5555     }
5556 
5557     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5558     return true;
5559 }
5560 
5561 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5562 {
5563     TCGv_i32 tmp;
5564 
5565     if (!ENABLE_ARCH_6T2) {
5566         return false;
5567     }
5568 
5569     tmp = load_reg(s, a->rd);
5570     tcg_gen_ext16u_i32(tmp, tmp);
5571     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5572     store_reg(s, a->rd, tmp);
5573     return true;
5574 }
5575 
5576 /*
5577  * v8.1M MVE wide-shifts
5578  */
5579 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5580                           WideShiftImmFn *fn)
5581 {
5582     TCGv_i64 rda;
5583     TCGv_i32 rdalo, rdahi;
5584 
5585     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5586         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5587         return false;
5588     }
5589     if (a->rdahi == 15) {
5590         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5591         return false;
5592     }
5593     if (!dc_isar_feature(aa32_mve, s) ||
5594         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5595         a->rdahi == 13) {
5596         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5597         unallocated_encoding(s);
5598         return true;
5599     }
5600 
5601     if (a->shim == 0) {
5602         a->shim = 32;
5603     }
5604 
5605     rda = tcg_temp_new_i64();
5606     rdalo = load_reg(s, a->rdalo);
5607     rdahi = load_reg(s, a->rdahi);
5608     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5609 
5610     fn(rda, rda, a->shim);
5611 
5612     tcg_gen_extrl_i64_i32(rdalo, rda);
5613     tcg_gen_extrh_i64_i32(rdahi, rda);
5614     store_reg(s, a->rdalo, rdalo);
5615     store_reg(s, a->rdahi, rdahi);
5616 
5617     return true;
5618 }
5619 
5620 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5621 {
5622     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5623 }
5624 
5625 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5626 {
5627     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5628 }
5629 
5630 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5631 {
5632     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5633 }
5634 
5635 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5636 {
5637     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5638 }
5639 
5640 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5641 {
5642     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5643 }
5644 
5645 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5646 {
5647     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5648 }
5649 
5650 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5651 {
5652     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5653 }
5654 
5655 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5656 {
5657     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5658 }
5659 
5660 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5661 {
5662     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5663 }
5664 
5665 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5666 {
5667     TCGv_i64 rda;
5668     TCGv_i32 rdalo, rdahi;
5669 
5670     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5671         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5672         return false;
5673     }
5674     if (a->rdahi == 15) {
5675         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5676         return false;
5677     }
5678     if (!dc_isar_feature(aa32_mve, s) ||
5679         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5680         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5681         a->rm == a->rdahi || a->rm == a->rdalo) {
5682         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5683         unallocated_encoding(s);
5684         return true;
5685     }
5686 
5687     rda = tcg_temp_new_i64();
5688     rdalo = load_reg(s, a->rdalo);
5689     rdahi = load_reg(s, a->rdahi);
5690     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5691 
5692     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5693     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5694 
5695     tcg_gen_extrl_i64_i32(rdalo, rda);
5696     tcg_gen_extrh_i64_i32(rdahi, rda);
5697     store_reg(s, a->rdalo, rdalo);
5698     store_reg(s, a->rdahi, rdahi);
5699 
5700     return true;
5701 }
5702 
5703 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5704 {
5705     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5706 }
5707 
5708 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5709 {
5710     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5711 }
5712 
5713 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5714 {
5715     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5716 }
5717 
5718 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5719 {
5720     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5721 }
5722 
5723 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5724 {
5725     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5726 }
5727 
5728 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5729 {
5730     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5731 }
5732 
5733 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5734 {
5735     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5736         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5737         return false;
5738     }
5739     if (!dc_isar_feature(aa32_mve, s) ||
5740         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5741         a->rda == 13 || a->rda == 15) {
5742         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5743         unallocated_encoding(s);
5744         return true;
5745     }
5746 
5747     if (a->shim == 0) {
5748         a->shim = 32;
5749     }
5750     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5751 
5752     return true;
5753 }
5754 
5755 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5756 {
5757     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5758 }
5759 
5760 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5761 {
5762     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5763 }
5764 
5765 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5766 {
5767     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5768 }
5769 
5770 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5771 {
5772     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5773 }
5774 
5775 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5776 {
5777     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5778 }
5779 
5780 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5781 {
5782     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5783 }
5784 
5785 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5786 {
5787     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5788         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5789         return false;
5790     }
5791     if (!dc_isar_feature(aa32_mve, s) ||
5792         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5793         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5794         a->rm == a->rda) {
5795         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5796         unallocated_encoding(s);
5797         return true;
5798     }
5799 
5800     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5801     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5802     return true;
5803 }
5804 
5805 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5806 {
5807     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5808 }
5809 
5810 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5811 {
5812     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5813 }
5814 
5815 /*
5816  * Multiply and multiply accumulate
5817  */
5818 
5819 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5820 {
5821     TCGv_i32 t1, t2;
5822 
5823     t1 = load_reg(s, a->rn);
5824     t2 = load_reg(s, a->rm);
5825     tcg_gen_mul_i32(t1, t1, t2);
5826     if (add) {
5827         t2 = load_reg(s, a->ra);
5828         tcg_gen_add_i32(t1, t1, t2);
5829     }
5830     if (a->s) {
5831         gen_logic_CC(t1);
5832     }
5833     store_reg(s, a->rd, t1);
5834     return true;
5835 }
5836 
5837 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5838 {
5839     return op_mla(s, a, false);
5840 }
5841 
5842 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5843 {
5844     return op_mla(s, a, true);
5845 }
5846 
5847 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5848 {
5849     TCGv_i32 t1, t2;
5850 
5851     if (!ENABLE_ARCH_6T2) {
5852         return false;
5853     }
5854     t1 = load_reg(s, a->rn);
5855     t2 = load_reg(s, a->rm);
5856     tcg_gen_mul_i32(t1, t1, t2);
5857     t2 = load_reg(s, a->ra);
5858     tcg_gen_sub_i32(t1, t2, t1);
5859     store_reg(s, a->rd, t1);
5860     return true;
5861 }
5862 
5863 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5864 {
5865     TCGv_i32 t0, t1, t2, t3;
5866 
5867     t0 = load_reg(s, a->rm);
5868     t1 = load_reg(s, a->rn);
5869     if (uns) {
5870         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5871     } else {
5872         tcg_gen_muls2_i32(t0, t1, t0, t1);
5873     }
5874     if (add) {
5875         t2 = load_reg(s, a->ra);
5876         t3 = load_reg(s, a->rd);
5877         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5878     }
5879     if (a->s) {
5880         gen_logicq_cc(t0, t1);
5881     }
5882     store_reg(s, a->ra, t0);
5883     store_reg(s, a->rd, t1);
5884     return true;
5885 }
5886 
5887 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5888 {
5889     return op_mlal(s, a, true, false);
5890 }
5891 
5892 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5893 {
5894     return op_mlal(s, a, false, false);
5895 }
5896 
5897 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5898 {
5899     return op_mlal(s, a, true, true);
5900 }
5901 
5902 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5903 {
5904     return op_mlal(s, a, false, true);
5905 }
5906 
5907 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5908 {
5909     TCGv_i32 t0, t1, t2, zero;
5910 
5911     if (s->thumb
5912         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5913         : !ENABLE_ARCH_6) {
5914         return false;
5915     }
5916 
5917     t0 = load_reg(s, a->rm);
5918     t1 = load_reg(s, a->rn);
5919     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5920     zero = tcg_constant_i32(0);
5921     t2 = load_reg(s, a->ra);
5922     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5923     t2 = load_reg(s, a->rd);
5924     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5925     store_reg(s, a->ra, t0);
5926     store_reg(s, a->rd, t1);
5927     return true;
5928 }
5929 
5930 /*
5931  * Saturating addition and subtraction
5932  */
5933 
5934 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5935 {
5936     TCGv_i32 t0, t1;
5937 
5938     if (s->thumb
5939         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5940         : !ENABLE_ARCH_5TE) {
5941         return false;
5942     }
5943 
5944     t0 = load_reg(s, a->rm);
5945     t1 = load_reg(s, a->rn);
5946     if (doub) {
5947         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5948     }
5949     if (add) {
5950         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5951     } else {
5952         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5953     }
5954     store_reg(s, a->rd, t0);
5955     return true;
5956 }
5957 
5958 #define DO_QADDSUB(NAME, ADD, DOUB) \
5959 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5960 {                                                        \
5961     return op_qaddsub(s, a, ADD, DOUB);                  \
5962 }
5963 
5964 DO_QADDSUB(QADD, true, false)
5965 DO_QADDSUB(QSUB, false, false)
5966 DO_QADDSUB(QDADD, true, true)
5967 DO_QADDSUB(QDSUB, false, true)
5968 
5969 #undef DO_QADDSUB
5970 
5971 /*
5972  * Halfword multiply and multiply accumulate
5973  */
5974 
5975 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5976                        int add_long, bool nt, bool mt)
5977 {
5978     TCGv_i32 t0, t1, tl, th;
5979 
5980     if (s->thumb
5981         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5982         : !ENABLE_ARCH_5TE) {
5983         return false;
5984     }
5985 
5986     t0 = load_reg(s, a->rn);
5987     t1 = load_reg(s, a->rm);
5988     gen_mulxy(t0, t1, nt, mt);
5989 
5990     switch (add_long) {
5991     case 0:
5992         store_reg(s, a->rd, t0);
5993         break;
5994     case 1:
5995         t1 = load_reg(s, a->ra);
5996         gen_helper_add_setq(t0, cpu_env, t0, t1);
5997         store_reg(s, a->rd, t0);
5998         break;
5999     case 2:
6000         tl = load_reg(s, a->ra);
6001         th = load_reg(s, a->rd);
6002         /* Sign-extend the 32-bit product to 64 bits.  */
6003         t1 = tcg_temp_new_i32();
6004         tcg_gen_sari_i32(t1, t0, 31);
6005         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6006         store_reg(s, a->ra, tl);
6007         store_reg(s, a->rd, th);
6008         break;
6009     default:
6010         g_assert_not_reached();
6011     }
6012     return true;
6013 }
6014 
6015 #define DO_SMLAX(NAME, add, nt, mt) \
6016 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6017 {                                                          \
6018     return op_smlaxxx(s, a, add, nt, mt);                  \
6019 }
6020 
6021 DO_SMLAX(SMULBB, 0, 0, 0)
6022 DO_SMLAX(SMULBT, 0, 0, 1)
6023 DO_SMLAX(SMULTB, 0, 1, 0)
6024 DO_SMLAX(SMULTT, 0, 1, 1)
6025 
6026 DO_SMLAX(SMLABB, 1, 0, 0)
6027 DO_SMLAX(SMLABT, 1, 0, 1)
6028 DO_SMLAX(SMLATB, 1, 1, 0)
6029 DO_SMLAX(SMLATT, 1, 1, 1)
6030 
6031 DO_SMLAX(SMLALBB, 2, 0, 0)
6032 DO_SMLAX(SMLALBT, 2, 0, 1)
6033 DO_SMLAX(SMLALTB, 2, 1, 0)
6034 DO_SMLAX(SMLALTT, 2, 1, 1)
6035 
6036 #undef DO_SMLAX
6037 
6038 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6039 {
6040     TCGv_i32 t0, t1;
6041 
6042     if (!ENABLE_ARCH_5TE) {
6043         return false;
6044     }
6045 
6046     t0 = load_reg(s, a->rn);
6047     t1 = load_reg(s, a->rm);
6048     /*
6049      * Since the nominal result is product<47:16>, shift the 16-bit
6050      * input up by 16 bits, so that the result is at product<63:32>.
6051      */
6052     if (mt) {
6053         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6054     } else {
6055         tcg_gen_shli_i32(t1, t1, 16);
6056     }
6057     tcg_gen_muls2_i32(t0, t1, t0, t1);
6058     if (add) {
6059         t0 = load_reg(s, a->ra);
6060         gen_helper_add_setq(t1, cpu_env, t1, t0);
6061     }
6062     store_reg(s, a->rd, t1);
6063     return true;
6064 }
6065 
6066 #define DO_SMLAWX(NAME, add, mt) \
6067 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6068 {                                                          \
6069     return op_smlawx(s, a, add, mt);                       \
6070 }
6071 
6072 DO_SMLAWX(SMULWB, 0, 0)
6073 DO_SMLAWX(SMULWT, 0, 1)
6074 DO_SMLAWX(SMLAWB, 1, 0)
6075 DO_SMLAWX(SMLAWT, 1, 1)
6076 
6077 #undef DO_SMLAWX
6078 
6079 /*
6080  * MSR (immediate) and hints
6081  */
6082 
6083 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6084 {
6085     /*
6086      * When running single-threaded TCG code, use the helper to ensure that
6087      * the next round-robin scheduled vCPU gets a crack.  When running in
6088      * MTTCG we don't generate jumps to the helper as it won't affect the
6089      * scheduling of other vCPUs.
6090      */
6091     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6092         gen_update_pc(s, curr_insn_len(s));
6093         s->base.is_jmp = DISAS_YIELD;
6094     }
6095     return true;
6096 }
6097 
6098 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6099 {
6100     /*
6101      * When running single-threaded TCG code, use the helper to ensure that
6102      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6103      * just skip this instruction.  Currently the SEV/SEVL instructions,
6104      * which are *one* of many ways to wake the CPU from WFE, are not
6105      * implemented so we can't sleep like WFI does.
6106      */
6107     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6108         gen_update_pc(s, curr_insn_len(s));
6109         s->base.is_jmp = DISAS_WFE;
6110     }
6111     return true;
6112 }
6113 
6114 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6115 {
6116     /* For WFI, halt the vCPU until an IRQ. */
6117     gen_update_pc(s, curr_insn_len(s));
6118     s->base.is_jmp = DISAS_WFI;
6119     return true;
6120 }
6121 
6122 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6123 {
6124     /*
6125      * For M-profile, minimal-RAS ESB can be a NOP.
6126      * Without RAS, we must implement this as NOP.
6127      */
6128     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6129         /*
6130          * QEMU does not have a source of physical SErrors,
6131          * so we are only concerned with virtual SErrors.
6132          * The pseudocode in the ARM for this case is
6133          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6134          *      AArch32.vESBOperation();
6135          * Most of the condition can be evaluated at translation time.
6136          * Test for EL2 present, and defer test for SEL2 to runtime.
6137          */
6138         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6139             gen_helper_vesb(cpu_env);
6140         }
6141     }
6142     return true;
6143 }
6144 
6145 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6146 {
6147     return true;
6148 }
6149 
6150 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6151 {
6152     uint32_t val = ror32(a->imm, a->rot * 2);
6153     uint32_t mask = msr_mask(s, a->mask, a->r);
6154 
6155     if (gen_set_psr_im(s, mask, a->r, val)) {
6156         unallocated_encoding(s);
6157     }
6158     return true;
6159 }
6160 
6161 /*
6162  * Cyclic Redundancy Check
6163  */
6164 
6165 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6166 {
6167     TCGv_i32 t1, t2, t3;
6168 
6169     if (!dc_isar_feature(aa32_crc32, s)) {
6170         return false;
6171     }
6172 
6173     t1 = load_reg(s, a->rn);
6174     t2 = load_reg(s, a->rm);
6175     switch (sz) {
6176     case MO_8:
6177         gen_uxtb(t2);
6178         break;
6179     case MO_16:
6180         gen_uxth(t2);
6181         break;
6182     case MO_32:
6183         break;
6184     default:
6185         g_assert_not_reached();
6186     }
6187     t3 = tcg_constant_i32(1 << sz);
6188     if (c) {
6189         gen_helper_crc32c(t1, t1, t2, t3);
6190     } else {
6191         gen_helper_crc32(t1, t1, t2, t3);
6192     }
6193     store_reg(s, a->rd, t1);
6194     return true;
6195 }
6196 
6197 #define DO_CRC32(NAME, c, sz) \
6198 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6199     { return op_crc32(s, a, c, sz); }
6200 
6201 DO_CRC32(CRC32B, false, MO_8)
6202 DO_CRC32(CRC32H, false, MO_16)
6203 DO_CRC32(CRC32W, false, MO_32)
6204 DO_CRC32(CRC32CB, true, MO_8)
6205 DO_CRC32(CRC32CH, true, MO_16)
6206 DO_CRC32(CRC32CW, true, MO_32)
6207 
6208 #undef DO_CRC32
6209 
6210 /*
6211  * Miscellaneous instructions
6212  */
6213 
6214 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6215 {
6216     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6217         return false;
6218     }
6219     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6220     return true;
6221 }
6222 
6223 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6224 {
6225     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6226         return false;
6227     }
6228     gen_msr_banked(s, a->r, a->sysm, a->rn);
6229     return true;
6230 }
6231 
6232 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6233 {
6234     TCGv_i32 tmp;
6235 
6236     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6237         return false;
6238     }
6239     if (a->r) {
6240         if (IS_USER(s)) {
6241             unallocated_encoding(s);
6242             return true;
6243         }
6244         tmp = load_cpu_field(spsr);
6245     } else {
6246         tmp = tcg_temp_new_i32();
6247         gen_helper_cpsr_read(tmp, cpu_env);
6248     }
6249     store_reg(s, a->rd, tmp);
6250     return true;
6251 }
6252 
6253 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6254 {
6255     TCGv_i32 tmp;
6256     uint32_t mask = msr_mask(s, a->mask, a->r);
6257 
6258     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6259         return false;
6260     }
6261     tmp = load_reg(s, a->rn);
6262     if (gen_set_psr(s, mask, a->r, tmp)) {
6263         unallocated_encoding(s);
6264     }
6265     return true;
6266 }
6267 
6268 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6269 {
6270     TCGv_i32 tmp;
6271 
6272     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6273         return false;
6274     }
6275     tmp = tcg_temp_new_i32();
6276     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6277     store_reg(s, a->rd, tmp);
6278     return true;
6279 }
6280 
6281 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6282 {
6283     TCGv_i32 addr, reg;
6284 
6285     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6286         return false;
6287     }
6288     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6289     reg = load_reg(s, a->rn);
6290     gen_helper_v7m_msr(cpu_env, addr, reg);
6291     /* If we wrote to CONTROL, the EL might have changed */
6292     gen_rebuild_hflags(s, true);
6293     gen_lookup_tb(s);
6294     return true;
6295 }
6296 
6297 static bool trans_BX(DisasContext *s, arg_BX *a)
6298 {
6299     if (!ENABLE_ARCH_4T) {
6300         return false;
6301     }
6302     gen_bx_excret(s, load_reg(s, a->rm));
6303     return true;
6304 }
6305 
6306 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6307 {
6308     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6309         return false;
6310     }
6311     /*
6312      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6313      * TBFLAGS bit on a basically-never-happens case, so call a helper
6314      * function to check for the trap and raise the exception if needed
6315      * (passing it the register number for the syndrome value).
6316      * v8A doesn't have this HSTR bit.
6317      */
6318     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6319         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6320         s->current_el < 2 && s->ns) {
6321         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6322     }
6323     /* Trivial implementation equivalent to bx.  */
6324     gen_bx(s, load_reg(s, a->rm));
6325     return true;
6326 }
6327 
6328 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6329 {
6330     TCGv_i32 tmp;
6331 
6332     if (!ENABLE_ARCH_5) {
6333         return false;
6334     }
6335     tmp = load_reg(s, a->rm);
6336     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6337     gen_bx(s, tmp);
6338     return true;
6339 }
6340 
6341 /*
6342  * BXNS/BLXNS: only exist for v8M with the security extensions,
6343  * and always UNDEF if NonSecure.  We don't implement these in
6344  * the user-only mode either (in theory you can use them from
6345  * Secure User mode but they are too tied in to system emulation).
6346  */
6347 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6348 {
6349     if (!s->v8m_secure || IS_USER_ONLY) {
6350         unallocated_encoding(s);
6351     } else {
6352         gen_bxns(s, a->rm);
6353     }
6354     return true;
6355 }
6356 
6357 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6358 {
6359     if (!s->v8m_secure || IS_USER_ONLY) {
6360         unallocated_encoding(s);
6361     } else {
6362         gen_blxns(s, a->rm);
6363     }
6364     return true;
6365 }
6366 
6367 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6368 {
6369     TCGv_i32 tmp;
6370 
6371     if (!ENABLE_ARCH_5) {
6372         return false;
6373     }
6374     tmp = load_reg(s, a->rm);
6375     tcg_gen_clzi_i32(tmp, tmp, 32);
6376     store_reg(s, a->rd, tmp);
6377     return true;
6378 }
6379 
6380 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6381 {
6382     TCGv_i32 tmp;
6383 
6384     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6385         return false;
6386     }
6387     if (IS_USER(s)) {
6388         unallocated_encoding(s);
6389         return true;
6390     }
6391     if (s->current_el == 2) {
6392         /* ERET from Hyp uses ELR_Hyp, not LR */
6393         tmp = load_cpu_field(elr_el[2]);
6394     } else {
6395         tmp = load_reg(s, 14);
6396     }
6397     gen_exception_return(s, tmp);
6398     return true;
6399 }
6400 
6401 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6402 {
6403     gen_hlt(s, a->imm);
6404     return true;
6405 }
6406 
6407 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6408 {
6409     if (!ENABLE_ARCH_5) {
6410         return false;
6411     }
6412     /* BKPT is OK with ECI set and leaves it untouched */
6413     s->eci_handled = true;
6414     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6415         semihosting_enabled(s->current_el == 0) &&
6416         (a->imm == 0xab)) {
6417         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6418     } else {
6419         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6420     }
6421     return true;
6422 }
6423 
6424 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6425 {
6426     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6427         return false;
6428     }
6429     if (IS_USER(s)) {
6430         unallocated_encoding(s);
6431     } else {
6432         gen_hvc(s, a->imm);
6433     }
6434     return true;
6435 }
6436 
6437 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6438 {
6439     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6440         return false;
6441     }
6442     if (IS_USER(s)) {
6443         unallocated_encoding(s);
6444     } else {
6445         gen_smc(s);
6446     }
6447     return true;
6448 }
6449 
6450 static bool trans_SG(DisasContext *s, arg_SG *a)
6451 {
6452     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6453         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6454         return false;
6455     }
6456     /*
6457      * SG (v8M only)
6458      * The bulk of the behaviour for this instruction is implemented
6459      * in v7m_handle_execute_nsc(), which deals with the insn when
6460      * it is executed by a CPU in non-secure state from memory
6461      * which is Secure & NonSecure-Callable.
6462      * Here we only need to handle the remaining cases:
6463      *  * in NS memory (including the "security extension not
6464      *    implemented" case) : NOP
6465      *  * in S memory but CPU already secure (clear IT bits)
6466      * We know that the attribute for the memory this insn is
6467      * in must match the current CPU state, because otherwise
6468      * get_phys_addr_pmsav8 would have generated an exception.
6469      */
6470     if (s->v8m_secure) {
6471         /* Like the IT insn, we don't need to generate any code */
6472         s->condexec_cond = 0;
6473         s->condexec_mask = 0;
6474     }
6475     return true;
6476 }
6477 
6478 static bool trans_TT(DisasContext *s, arg_TT *a)
6479 {
6480     TCGv_i32 addr, tmp;
6481 
6482     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6483         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6484         return false;
6485     }
6486     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6487         /* We UNDEF for these UNPREDICTABLE cases */
6488         unallocated_encoding(s);
6489         return true;
6490     }
6491     if (a->A && !s->v8m_secure) {
6492         /* This case is UNDEFINED.  */
6493         unallocated_encoding(s);
6494         return true;
6495     }
6496 
6497     addr = load_reg(s, a->rn);
6498     tmp = tcg_temp_new_i32();
6499     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6500     store_reg(s, a->rd, tmp);
6501     return true;
6502 }
6503 
6504 /*
6505  * Load/store register index
6506  */
6507 
6508 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6509 {
6510     ISSInfo ret;
6511 
6512     /* ISS not valid if writeback */
6513     if (p && !w) {
6514         ret = rd;
6515         if (curr_insn_len(s) == 2) {
6516             ret |= ISSIs16Bit;
6517         }
6518     } else {
6519         ret = ISSInvalid;
6520     }
6521     return ret;
6522 }
6523 
6524 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6525 {
6526     TCGv_i32 addr = load_reg(s, a->rn);
6527 
6528     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6529         gen_helper_v8m_stackcheck(cpu_env, addr);
6530     }
6531 
6532     if (a->p) {
6533         TCGv_i32 ofs = load_reg(s, a->rm);
6534         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6535         if (a->u) {
6536             tcg_gen_add_i32(addr, addr, ofs);
6537         } else {
6538             tcg_gen_sub_i32(addr, addr, ofs);
6539         }
6540     }
6541     return addr;
6542 }
6543 
6544 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6545                             TCGv_i32 addr, int address_offset)
6546 {
6547     if (!a->p) {
6548         TCGv_i32 ofs = load_reg(s, a->rm);
6549         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6550         if (a->u) {
6551             tcg_gen_add_i32(addr, addr, ofs);
6552         } else {
6553             tcg_gen_sub_i32(addr, addr, ofs);
6554         }
6555     } else if (!a->w) {
6556         return;
6557     }
6558     tcg_gen_addi_i32(addr, addr, address_offset);
6559     store_reg(s, a->rn, addr);
6560 }
6561 
6562 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6563                        MemOp mop, int mem_idx)
6564 {
6565     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6566     TCGv_i32 addr, tmp;
6567 
6568     addr = op_addr_rr_pre(s, a);
6569 
6570     tmp = tcg_temp_new_i32();
6571     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6572     disas_set_da_iss(s, mop, issinfo);
6573 
6574     /*
6575      * Perform base writeback before the loaded value to
6576      * ensure correct behavior with overlapping index registers.
6577      */
6578     op_addr_rr_post(s, a, addr, 0);
6579     store_reg_from_load(s, a->rt, tmp);
6580     return true;
6581 }
6582 
6583 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6584                         MemOp mop, int mem_idx)
6585 {
6586     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6587     TCGv_i32 addr, tmp;
6588 
6589     /*
6590      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6591      * is either UNPREDICTABLE or has defined behaviour
6592      */
6593     if (s->thumb && a->rn == 15) {
6594         return false;
6595     }
6596 
6597     addr = op_addr_rr_pre(s, a);
6598 
6599     tmp = load_reg(s, a->rt);
6600     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6601     disas_set_da_iss(s, mop, issinfo);
6602 
6603     op_addr_rr_post(s, a, addr, 0);
6604     return true;
6605 }
6606 
6607 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6608 {
6609     int mem_idx = get_mem_index(s);
6610     TCGv_i32 addr, tmp;
6611 
6612     if (!ENABLE_ARCH_5TE) {
6613         return false;
6614     }
6615     if (a->rt & 1) {
6616         unallocated_encoding(s);
6617         return true;
6618     }
6619     addr = op_addr_rr_pre(s, a);
6620 
6621     tmp = tcg_temp_new_i32();
6622     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6623     store_reg(s, a->rt, tmp);
6624 
6625     tcg_gen_addi_i32(addr, addr, 4);
6626 
6627     tmp = tcg_temp_new_i32();
6628     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6629     store_reg(s, a->rt + 1, tmp);
6630 
6631     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6632     op_addr_rr_post(s, a, addr, -4);
6633     return true;
6634 }
6635 
6636 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6637 {
6638     int mem_idx = get_mem_index(s);
6639     TCGv_i32 addr, tmp;
6640 
6641     if (!ENABLE_ARCH_5TE) {
6642         return false;
6643     }
6644     if (a->rt & 1) {
6645         unallocated_encoding(s);
6646         return true;
6647     }
6648     addr = op_addr_rr_pre(s, a);
6649 
6650     tmp = load_reg(s, a->rt);
6651     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6652 
6653     tcg_gen_addi_i32(addr, addr, 4);
6654 
6655     tmp = load_reg(s, a->rt + 1);
6656     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6657 
6658     op_addr_rr_post(s, a, addr, -4);
6659     return true;
6660 }
6661 
6662 /*
6663  * Load/store immediate index
6664  */
6665 
6666 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6667 {
6668     int ofs = a->imm;
6669 
6670     if (!a->u) {
6671         ofs = -ofs;
6672     }
6673 
6674     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6675         /*
6676          * Stackcheck. Here we know 'addr' is the current SP;
6677          * U is set if we're moving SP up, else down. It is
6678          * UNKNOWN whether the limit check triggers when SP starts
6679          * below the limit and ends up above it; we chose to do so.
6680          */
6681         if (!a->u) {
6682             TCGv_i32 newsp = tcg_temp_new_i32();
6683             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6684             gen_helper_v8m_stackcheck(cpu_env, newsp);
6685         } else {
6686             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6687         }
6688     }
6689 
6690     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6691 }
6692 
6693 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6694                             TCGv_i32 addr, int address_offset)
6695 {
6696     if (!a->p) {
6697         if (a->u) {
6698             address_offset += a->imm;
6699         } else {
6700             address_offset -= a->imm;
6701         }
6702     } else if (!a->w) {
6703         return;
6704     }
6705     tcg_gen_addi_i32(addr, addr, address_offset);
6706     store_reg(s, a->rn, addr);
6707 }
6708 
6709 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6710                        MemOp mop, int mem_idx)
6711 {
6712     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6713     TCGv_i32 addr, tmp;
6714 
6715     addr = op_addr_ri_pre(s, a);
6716 
6717     tmp = tcg_temp_new_i32();
6718     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6719     disas_set_da_iss(s, mop, issinfo);
6720 
6721     /*
6722      * Perform base writeback before the loaded value to
6723      * ensure correct behavior with overlapping index registers.
6724      */
6725     op_addr_ri_post(s, a, addr, 0);
6726     store_reg_from_load(s, a->rt, tmp);
6727     return true;
6728 }
6729 
6730 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6731                         MemOp mop, int mem_idx)
6732 {
6733     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6734     TCGv_i32 addr, tmp;
6735 
6736     /*
6737      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6738      * is either UNPREDICTABLE or has defined behaviour
6739      */
6740     if (s->thumb && a->rn == 15) {
6741         return false;
6742     }
6743 
6744     addr = op_addr_ri_pre(s, a);
6745 
6746     tmp = load_reg(s, a->rt);
6747     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6748     disas_set_da_iss(s, mop, issinfo);
6749 
6750     op_addr_ri_post(s, a, addr, 0);
6751     return true;
6752 }
6753 
6754 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6755 {
6756     int mem_idx = get_mem_index(s);
6757     TCGv_i32 addr, tmp;
6758 
6759     addr = op_addr_ri_pre(s, a);
6760 
6761     tmp = tcg_temp_new_i32();
6762     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6763     store_reg(s, a->rt, tmp);
6764 
6765     tcg_gen_addi_i32(addr, addr, 4);
6766 
6767     tmp = tcg_temp_new_i32();
6768     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6769     store_reg(s, rt2, tmp);
6770 
6771     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6772     op_addr_ri_post(s, a, addr, -4);
6773     return true;
6774 }
6775 
6776 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6777 {
6778     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6779         return false;
6780     }
6781     return op_ldrd_ri(s, a, a->rt + 1);
6782 }
6783 
6784 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6785 {
6786     arg_ldst_ri b = {
6787         .u = a->u, .w = a->w, .p = a->p,
6788         .rn = a->rn, .rt = a->rt, .imm = a->imm
6789     };
6790     return op_ldrd_ri(s, &b, a->rt2);
6791 }
6792 
6793 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6794 {
6795     int mem_idx = get_mem_index(s);
6796     TCGv_i32 addr, tmp;
6797 
6798     addr = op_addr_ri_pre(s, a);
6799 
6800     tmp = load_reg(s, a->rt);
6801     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6802 
6803     tcg_gen_addi_i32(addr, addr, 4);
6804 
6805     tmp = load_reg(s, rt2);
6806     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6807 
6808     op_addr_ri_post(s, a, addr, -4);
6809     return true;
6810 }
6811 
6812 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6813 {
6814     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6815         return false;
6816     }
6817     return op_strd_ri(s, a, a->rt + 1);
6818 }
6819 
6820 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6821 {
6822     arg_ldst_ri b = {
6823         .u = a->u, .w = a->w, .p = a->p,
6824         .rn = a->rn, .rt = a->rt, .imm = a->imm
6825     };
6826     return op_strd_ri(s, &b, a->rt2);
6827 }
6828 
6829 #define DO_LDST(NAME, WHICH, MEMOP) \
6830 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6831 {                                                                     \
6832     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6833 }                                                                     \
6834 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6835 {                                                                     \
6836     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6837 }                                                                     \
6838 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6839 {                                                                     \
6840     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6841 }                                                                     \
6842 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6843 {                                                                     \
6844     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6845 }
6846 
6847 DO_LDST(LDR, load, MO_UL)
6848 DO_LDST(LDRB, load, MO_UB)
6849 DO_LDST(LDRH, load, MO_UW)
6850 DO_LDST(LDRSB, load, MO_SB)
6851 DO_LDST(LDRSH, load, MO_SW)
6852 
6853 DO_LDST(STR, store, MO_UL)
6854 DO_LDST(STRB, store, MO_UB)
6855 DO_LDST(STRH, store, MO_UW)
6856 
6857 #undef DO_LDST
6858 
6859 /*
6860  * Synchronization primitives
6861  */
6862 
6863 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6864 {
6865     TCGv_i32 addr, tmp;
6866     TCGv taddr;
6867 
6868     opc |= s->be_data;
6869     addr = load_reg(s, a->rn);
6870     taddr = gen_aa32_addr(s, addr, opc);
6871 
6872     tmp = load_reg(s, a->rt2);
6873     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6874 
6875     store_reg(s, a->rt, tmp);
6876     return true;
6877 }
6878 
6879 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6880 {
6881     return op_swp(s, a, MO_UL | MO_ALIGN);
6882 }
6883 
6884 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6885 {
6886     return op_swp(s, a, MO_UB);
6887 }
6888 
6889 /*
6890  * Load/Store Exclusive and Load-Acquire/Store-Release
6891  */
6892 
6893 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6894 {
6895     TCGv_i32 addr;
6896     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6897     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6898 
6899     /* We UNDEF for these UNPREDICTABLE cases.  */
6900     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6901         || a->rd == a->rn || a->rd == a->rt
6902         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6903         || (mop == MO_64
6904             && (a->rt2 == 15
6905                 || a->rd == a->rt2
6906                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6907         unallocated_encoding(s);
6908         return true;
6909     }
6910 
6911     if (rel) {
6912         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6913     }
6914 
6915     addr = tcg_temp_new_i32();
6916     load_reg_var(s, addr, a->rn);
6917     tcg_gen_addi_i32(addr, addr, a->imm);
6918 
6919     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6920     return true;
6921 }
6922 
6923 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6924 {
6925     if (!ENABLE_ARCH_6) {
6926         return false;
6927     }
6928     return op_strex(s, a, MO_32, false);
6929 }
6930 
6931 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6932 {
6933     if (!ENABLE_ARCH_6K) {
6934         return false;
6935     }
6936     /* We UNDEF for these UNPREDICTABLE cases.  */
6937     if (a->rt & 1) {
6938         unallocated_encoding(s);
6939         return true;
6940     }
6941     a->rt2 = a->rt + 1;
6942     return op_strex(s, a, MO_64, false);
6943 }
6944 
6945 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6946 {
6947     return op_strex(s, a, MO_64, false);
6948 }
6949 
6950 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6951 {
6952     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6953         return false;
6954     }
6955     return op_strex(s, a, MO_8, false);
6956 }
6957 
6958 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6959 {
6960     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6961         return false;
6962     }
6963     return op_strex(s, a, MO_16, false);
6964 }
6965 
6966 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6967 {
6968     if (!ENABLE_ARCH_8) {
6969         return false;
6970     }
6971     return op_strex(s, a, MO_32, true);
6972 }
6973 
6974 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6975 {
6976     if (!ENABLE_ARCH_8) {
6977         return false;
6978     }
6979     /* We UNDEF for these UNPREDICTABLE cases.  */
6980     if (a->rt & 1) {
6981         unallocated_encoding(s);
6982         return true;
6983     }
6984     a->rt2 = a->rt + 1;
6985     return op_strex(s, a, MO_64, true);
6986 }
6987 
6988 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6989 {
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     return op_strex(s, a, MO_64, true);
6994 }
6995 
6996 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6997 {
6998     if (!ENABLE_ARCH_8) {
6999         return false;
7000     }
7001     return op_strex(s, a, MO_8, true);
7002 }
7003 
7004 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7005 {
7006     if (!ENABLE_ARCH_8) {
7007         return false;
7008     }
7009     return op_strex(s, a, MO_16, true);
7010 }
7011 
7012 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7013 {
7014     TCGv_i32 addr, tmp;
7015 
7016     if (!ENABLE_ARCH_8) {
7017         return false;
7018     }
7019     /* We UNDEF for these UNPREDICTABLE cases.  */
7020     if (a->rn == 15 || a->rt == 15) {
7021         unallocated_encoding(s);
7022         return true;
7023     }
7024 
7025     addr = load_reg(s, a->rn);
7026     tmp = load_reg(s, a->rt);
7027     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7028     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7029     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7030 
7031     return true;
7032 }
7033 
7034 static bool trans_STL(DisasContext *s, arg_STL *a)
7035 {
7036     return op_stl(s, a, MO_UL);
7037 }
7038 
7039 static bool trans_STLB(DisasContext *s, arg_STL *a)
7040 {
7041     return op_stl(s, a, MO_UB);
7042 }
7043 
7044 static bool trans_STLH(DisasContext *s, arg_STL *a)
7045 {
7046     return op_stl(s, a, MO_UW);
7047 }
7048 
7049 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7050 {
7051     TCGv_i32 addr;
7052     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7053     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7054 
7055     /* We UNDEF for these UNPREDICTABLE cases.  */
7056     if (a->rn == 15 || a->rt == 15
7057         || (!v8a && s->thumb && a->rt == 13)
7058         || (mop == MO_64
7059             && (a->rt2 == 15 || a->rt == a->rt2
7060                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7061         unallocated_encoding(s);
7062         return true;
7063     }
7064 
7065     addr = tcg_temp_new_i32();
7066     load_reg_var(s, addr, a->rn);
7067     tcg_gen_addi_i32(addr, addr, a->imm);
7068 
7069     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7070 
7071     if (acq) {
7072         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7073     }
7074     return true;
7075 }
7076 
7077 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7078 {
7079     if (!ENABLE_ARCH_6) {
7080         return false;
7081     }
7082     return op_ldrex(s, a, MO_32, false);
7083 }
7084 
7085 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7086 {
7087     if (!ENABLE_ARCH_6K) {
7088         return false;
7089     }
7090     /* We UNDEF for these UNPREDICTABLE cases.  */
7091     if (a->rt & 1) {
7092         unallocated_encoding(s);
7093         return true;
7094     }
7095     a->rt2 = a->rt + 1;
7096     return op_ldrex(s, a, MO_64, false);
7097 }
7098 
7099 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7100 {
7101     return op_ldrex(s, a, MO_64, false);
7102 }
7103 
7104 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7105 {
7106     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7107         return false;
7108     }
7109     return op_ldrex(s, a, MO_8, false);
7110 }
7111 
7112 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7113 {
7114     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7115         return false;
7116     }
7117     return op_ldrex(s, a, MO_16, false);
7118 }
7119 
7120 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7121 {
7122     if (!ENABLE_ARCH_8) {
7123         return false;
7124     }
7125     return op_ldrex(s, a, MO_32, true);
7126 }
7127 
7128 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7129 {
7130     if (!ENABLE_ARCH_8) {
7131         return false;
7132     }
7133     /* We UNDEF for these UNPREDICTABLE cases.  */
7134     if (a->rt & 1) {
7135         unallocated_encoding(s);
7136         return true;
7137     }
7138     a->rt2 = a->rt + 1;
7139     return op_ldrex(s, a, MO_64, true);
7140 }
7141 
7142 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7143 {
7144     if (!ENABLE_ARCH_8) {
7145         return false;
7146     }
7147     return op_ldrex(s, a, MO_64, true);
7148 }
7149 
7150 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7151 {
7152     if (!ENABLE_ARCH_8) {
7153         return false;
7154     }
7155     return op_ldrex(s, a, MO_8, true);
7156 }
7157 
7158 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7159 {
7160     if (!ENABLE_ARCH_8) {
7161         return false;
7162     }
7163     return op_ldrex(s, a, MO_16, true);
7164 }
7165 
7166 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7167 {
7168     TCGv_i32 addr, tmp;
7169 
7170     if (!ENABLE_ARCH_8) {
7171         return false;
7172     }
7173     /* We UNDEF for these UNPREDICTABLE cases.  */
7174     if (a->rn == 15 || a->rt == 15) {
7175         unallocated_encoding(s);
7176         return true;
7177     }
7178 
7179     addr = load_reg(s, a->rn);
7180     tmp = tcg_temp_new_i32();
7181     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7182     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7183 
7184     store_reg(s, a->rt, tmp);
7185     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7186     return true;
7187 }
7188 
7189 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7190 {
7191     return op_lda(s, a, MO_UL);
7192 }
7193 
7194 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7195 {
7196     return op_lda(s, a, MO_UB);
7197 }
7198 
7199 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7200 {
7201     return op_lda(s, a, MO_UW);
7202 }
7203 
7204 /*
7205  * Media instructions
7206  */
7207 
7208 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7209 {
7210     TCGv_i32 t1, t2;
7211 
7212     if (!ENABLE_ARCH_6) {
7213         return false;
7214     }
7215 
7216     t1 = load_reg(s, a->rn);
7217     t2 = load_reg(s, a->rm);
7218     gen_helper_usad8(t1, t1, t2);
7219     if (a->ra != 15) {
7220         t2 = load_reg(s, a->ra);
7221         tcg_gen_add_i32(t1, t1, t2);
7222     }
7223     store_reg(s, a->rd, t1);
7224     return true;
7225 }
7226 
7227 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7228 {
7229     TCGv_i32 tmp;
7230     int width = a->widthm1 + 1;
7231     int shift = a->lsb;
7232 
7233     if (!ENABLE_ARCH_6T2) {
7234         return false;
7235     }
7236     if (shift + width > 32) {
7237         /* UNPREDICTABLE; we choose to UNDEF */
7238         unallocated_encoding(s);
7239         return true;
7240     }
7241 
7242     tmp = load_reg(s, a->rn);
7243     if (u) {
7244         tcg_gen_extract_i32(tmp, tmp, shift, width);
7245     } else {
7246         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7247     }
7248     store_reg(s, a->rd, tmp);
7249     return true;
7250 }
7251 
7252 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7253 {
7254     return op_bfx(s, a, false);
7255 }
7256 
7257 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7258 {
7259     return op_bfx(s, a, true);
7260 }
7261 
7262 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7263 {
7264     TCGv_i32 tmp;
7265     int msb = a->msb, lsb = a->lsb;
7266     int width;
7267 
7268     if (!ENABLE_ARCH_6T2) {
7269         return false;
7270     }
7271     if (msb < lsb) {
7272         /* UNPREDICTABLE; we choose to UNDEF */
7273         unallocated_encoding(s);
7274         return true;
7275     }
7276 
7277     width = msb + 1 - lsb;
7278     if (a->rn == 15) {
7279         /* BFC */
7280         tmp = tcg_const_i32(0);
7281     } else {
7282         /* BFI */
7283         tmp = load_reg(s, a->rn);
7284     }
7285     if (width != 32) {
7286         TCGv_i32 tmp2 = load_reg(s, a->rd);
7287         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7288     }
7289     store_reg(s, a->rd, tmp);
7290     return true;
7291 }
7292 
7293 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7294 {
7295     unallocated_encoding(s);
7296     return true;
7297 }
7298 
7299 /*
7300  * Parallel addition and subtraction
7301  */
7302 
7303 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7304                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7305 {
7306     TCGv_i32 t0, t1;
7307 
7308     if (s->thumb
7309         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7310         : !ENABLE_ARCH_6) {
7311         return false;
7312     }
7313 
7314     t0 = load_reg(s, a->rn);
7315     t1 = load_reg(s, a->rm);
7316 
7317     gen(t0, t0, t1);
7318 
7319     store_reg(s, a->rd, t0);
7320     return true;
7321 }
7322 
7323 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7324                              void (*gen)(TCGv_i32, TCGv_i32,
7325                                          TCGv_i32, TCGv_ptr))
7326 {
7327     TCGv_i32 t0, t1;
7328     TCGv_ptr ge;
7329 
7330     if (s->thumb
7331         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7332         : !ENABLE_ARCH_6) {
7333         return false;
7334     }
7335 
7336     t0 = load_reg(s, a->rn);
7337     t1 = load_reg(s, a->rm);
7338 
7339     ge = tcg_temp_new_ptr();
7340     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7341     gen(t0, t0, t1, ge);
7342 
7343     store_reg(s, a->rd, t0);
7344     return true;
7345 }
7346 
7347 #define DO_PAR_ADDSUB(NAME, helper) \
7348 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7349 {                                                       \
7350     return op_par_addsub(s, a, helper);                 \
7351 }
7352 
7353 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7354 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7355 {                                                       \
7356     return op_par_addsub_ge(s, a, helper);              \
7357 }
7358 
7359 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7360 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7361 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7362 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7363 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7364 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7365 
7366 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7367 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7368 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7369 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7370 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7371 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7372 
7373 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7374 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7375 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7376 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7377 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7378 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7379 
7380 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7381 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7382 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7383 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7384 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7385 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7386 
7387 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7388 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7389 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7390 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7391 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7392 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7393 
7394 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7395 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7396 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7397 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7398 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7399 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7400 
7401 #undef DO_PAR_ADDSUB
7402 #undef DO_PAR_ADDSUB_GE
7403 
7404 /*
7405  * Packing, unpacking, saturation, and reversal
7406  */
7407 
7408 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7409 {
7410     TCGv_i32 tn, tm;
7411     int shift = a->imm;
7412 
7413     if (s->thumb
7414         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7415         : !ENABLE_ARCH_6) {
7416         return false;
7417     }
7418 
7419     tn = load_reg(s, a->rn);
7420     tm = load_reg(s, a->rm);
7421     if (a->tb) {
7422         /* PKHTB */
7423         if (shift == 0) {
7424             shift = 31;
7425         }
7426         tcg_gen_sari_i32(tm, tm, shift);
7427         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7428     } else {
7429         /* PKHBT */
7430         tcg_gen_shli_i32(tm, tm, shift);
7431         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7432     }
7433     store_reg(s, a->rd, tn);
7434     return true;
7435 }
7436 
7437 static bool op_sat(DisasContext *s, arg_sat *a,
7438                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7439 {
7440     TCGv_i32 tmp;
7441     int shift = a->imm;
7442 
7443     if (!ENABLE_ARCH_6) {
7444         return false;
7445     }
7446 
7447     tmp = load_reg(s, a->rn);
7448     if (a->sh) {
7449         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7450     } else {
7451         tcg_gen_shli_i32(tmp, tmp, shift);
7452     }
7453 
7454     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7455 
7456     store_reg(s, a->rd, tmp);
7457     return true;
7458 }
7459 
7460 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7461 {
7462     return op_sat(s, a, gen_helper_ssat);
7463 }
7464 
7465 static bool trans_USAT(DisasContext *s, arg_sat *a)
7466 {
7467     return op_sat(s, a, gen_helper_usat);
7468 }
7469 
7470 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7471 {
7472     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7473         return false;
7474     }
7475     return op_sat(s, a, gen_helper_ssat16);
7476 }
7477 
7478 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7479 {
7480     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7481         return false;
7482     }
7483     return op_sat(s, a, gen_helper_usat16);
7484 }
7485 
7486 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7487                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7488                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7489 {
7490     TCGv_i32 tmp;
7491 
7492     if (!ENABLE_ARCH_6) {
7493         return false;
7494     }
7495 
7496     tmp = load_reg(s, a->rm);
7497     /*
7498      * TODO: In many cases we could do a shift instead of a rotate.
7499      * Combined with a simple extend, that becomes an extract.
7500      */
7501     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7502     gen_extract(tmp, tmp);
7503 
7504     if (a->rn != 15) {
7505         TCGv_i32 tmp2 = load_reg(s, a->rn);
7506         gen_add(tmp, tmp, tmp2);
7507     }
7508     store_reg(s, a->rd, tmp);
7509     return true;
7510 }
7511 
7512 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7513 {
7514     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7515 }
7516 
7517 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7518 {
7519     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7520 }
7521 
7522 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7523 {
7524     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7525         return false;
7526     }
7527     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7528 }
7529 
7530 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7531 {
7532     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7533 }
7534 
7535 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7536 {
7537     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7538 }
7539 
7540 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7541 {
7542     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7543         return false;
7544     }
7545     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7546 }
7547 
7548 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7549 {
7550     TCGv_i32 t1, t2, t3;
7551 
7552     if (s->thumb
7553         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7554         : !ENABLE_ARCH_6) {
7555         return false;
7556     }
7557 
7558     t1 = load_reg(s, a->rn);
7559     t2 = load_reg(s, a->rm);
7560     t3 = tcg_temp_new_i32();
7561     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7562     gen_helper_sel_flags(t1, t3, t1, t2);
7563     store_reg(s, a->rd, t1);
7564     return true;
7565 }
7566 
7567 static bool op_rr(DisasContext *s, arg_rr *a,
7568                   void (*gen)(TCGv_i32, TCGv_i32))
7569 {
7570     TCGv_i32 tmp;
7571 
7572     tmp = load_reg(s, a->rm);
7573     gen(tmp, tmp);
7574     store_reg(s, a->rd, tmp);
7575     return true;
7576 }
7577 
7578 static bool trans_REV(DisasContext *s, arg_rr *a)
7579 {
7580     if (!ENABLE_ARCH_6) {
7581         return false;
7582     }
7583     return op_rr(s, a, tcg_gen_bswap32_i32);
7584 }
7585 
7586 static bool trans_REV16(DisasContext *s, arg_rr *a)
7587 {
7588     if (!ENABLE_ARCH_6) {
7589         return false;
7590     }
7591     return op_rr(s, a, gen_rev16);
7592 }
7593 
7594 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7595 {
7596     if (!ENABLE_ARCH_6) {
7597         return false;
7598     }
7599     return op_rr(s, a, gen_revsh);
7600 }
7601 
7602 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7603 {
7604     if (!ENABLE_ARCH_6T2) {
7605         return false;
7606     }
7607     return op_rr(s, a, gen_helper_rbit);
7608 }
7609 
7610 /*
7611  * Signed multiply, signed and unsigned divide
7612  */
7613 
7614 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7615 {
7616     TCGv_i32 t1, t2;
7617 
7618     if (!ENABLE_ARCH_6) {
7619         return false;
7620     }
7621 
7622     t1 = load_reg(s, a->rn);
7623     t2 = load_reg(s, a->rm);
7624     if (m_swap) {
7625         gen_swap_half(t2, t2);
7626     }
7627     gen_smul_dual(t1, t2);
7628 
7629     if (sub) {
7630         /*
7631          * This subtraction cannot overflow, so we can do a simple
7632          * 32-bit subtraction and then a possible 32-bit saturating
7633          * addition of Ra.
7634          */
7635         tcg_gen_sub_i32(t1, t1, t2);
7636 
7637         if (a->ra != 15) {
7638             t2 = load_reg(s, a->ra);
7639             gen_helper_add_setq(t1, cpu_env, t1, t2);
7640         }
7641     } else if (a->ra == 15) {
7642         /* Single saturation-checking addition */
7643         gen_helper_add_setq(t1, cpu_env, t1, t2);
7644     } else {
7645         /*
7646          * We need to add the products and Ra together and then
7647          * determine whether the final result overflowed. Doing
7648          * this as two separate add-and-check-overflow steps incorrectly
7649          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7650          * Do all the arithmetic at 64-bits and then check for overflow.
7651          */
7652         TCGv_i64 p64, q64;
7653         TCGv_i32 t3, qf, one;
7654 
7655         p64 = tcg_temp_new_i64();
7656         q64 = tcg_temp_new_i64();
7657         tcg_gen_ext_i32_i64(p64, t1);
7658         tcg_gen_ext_i32_i64(q64, t2);
7659         tcg_gen_add_i64(p64, p64, q64);
7660         load_reg_var(s, t2, a->ra);
7661         tcg_gen_ext_i32_i64(q64, t2);
7662         tcg_gen_add_i64(p64, p64, q64);
7663 
7664         tcg_gen_extr_i64_i32(t1, t2, p64);
7665         /*
7666          * t1 is the low half of the result which goes into Rd.
7667          * We have overflow and must set Q if the high half (t2)
7668          * is different from the sign-extension of t1.
7669          */
7670         t3 = tcg_temp_new_i32();
7671         tcg_gen_sari_i32(t3, t1, 31);
7672         qf = load_cpu_field(QF);
7673         one = tcg_constant_i32(1);
7674         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7675         store_cpu_field(qf, QF);
7676     }
7677     store_reg(s, a->rd, t1);
7678     return true;
7679 }
7680 
7681 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7682 {
7683     return op_smlad(s, a, false, false);
7684 }
7685 
7686 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7687 {
7688     return op_smlad(s, a, true, false);
7689 }
7690 
7691 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7692 {
7693     return op_smlad(s, a, false, true);
7694 }
7695 
7696 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7697 {
7698     return op_smlad(s, a, true, true);
7699 }
7700 
7701 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7702 {
7703     TCGv_i32 t1, t2;
7704     TCGv_i64 l1, l2;
7705 
7706     if (!ENABLE_ARCH_6) {
7707         return false;
7708     }
7709 
7710     t1 = load_reg(s, a->rn);
7711     t2 = load_reg(s, a->rm);
7712     if (m_swap) {
7713         gen_swap_half(t2, t2);
7714     }
7715     gen_smul_dual(t1, t2);
7716 
7717     l1 = tcg_temp_new_i64();
7718     l2 = tcg_temp_new_i64();
7719     tcg_gen_ext_i32_i64(l1, t1);
7720     tcg_gen_ext_i32_i64(l2, t2);
7721 
7722     if (sub) {
7723         tcg_gen_sub_i64(l1, l1, l2);
7724     } else {
7725         tcg_gen_add_i64(l1, l1, l2);
7726     }
7727 
7728     gen_addq(s, l1, a->ra, a->rd);
7729     gen_storeq_reg(s, a->ra, a->rd, l1);
7730     return true;
7731 }
7732 
7733 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7734 {
7735     return op_smlald(s, a, false, false);
7736 }
7737 
7738 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7739 {
7740     return op_smlald(s, a, true, false);
7741 }
7742 
7743 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7744 {
7745     return op_smlald(s, a, false, true);
7746 }
7747 
7748 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7749 {
7750     return op_smlald(s, a, true, true);
7751 }
7752 
7753 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7754 {
7755     TCGv_i32 t1, t2;
7756 
7757     if (s->thumb
7758         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7759         : !ENABLE_ARCH_6) {
7760         return false;
7761     }
7762 
7763     t1 = load_reg(s, a->rn);
7764     t2 = load_reg(s, a->rm);
7765     tcg_gen_muls2_i32(t2, t1, t1, t2);
7766 
7767     if (a->ra != 15) {
7768         TCGv_i32 t3 = load_reg(s, a->ra);
7769         if (sub) {
7770             /*
7771              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7772              * a non-zero multiplicand lowpart, and the correct result
7773              * lowpart for rounding.
7774              */
7775             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7776         } else {
7777             tcg_gen_add_i32(t1, t1, t3);
7778         }
7779     }
7780     if (round) {
7781         /*
7782          * Adding 0x80000000 to the 64-bit quantity means that we have
7783          * carry in to the high word when the low word has the msb set.
7784          */
7785         tcg_gen_shri_i32(t2, t2, 31);
7786         tcg_gen_add_i32(t1, t1, t2);
7787     }
7788     store_reg(s, a->rd, t1);
7789     return true;
7790 }
7791 
7792 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7793 {
7794     return op_smmla(s, a, false, false);
7795 }
7796 
7797 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7798 {
7799     return op_smmla(s, a, true, false);
7800 }
7801 
7802 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7803 {
7804     return op_smmla(s, a, false, true);
7805 }
7806 
7807 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7808 {
7809     return op_smmla(s, a, true, true);
7810 }
7811 
7812 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7813 {
7814     TCGv_i32 t1, t2;
7815 
7816     if (s->thumb
7817         ? !dc_isar_feature(aa32_thumb_div, s)
7818         : !dc_isar_feature(aa32_arm_div, s)) {
7819         return false;
7820     }
7821 
7822     t1 = load_reg(s, a->rn);
7823     t2 = load_reg(s, a->rm);
7824     if (u) {
7825         gen_helper_udiv(t1, cpu_env, t1, t2);
7826     } else {
7827         gen_helper_sdiv(t1, cpu_env, t1, t2);
7828     }
7829     store_reg(s, a->rd, t1);
7830     return true;
7831 }
7832 
7833 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7834 {
7835     return op_div(s, a, false);
7836 }
7837 
7838 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7839 {
7840     return op_div(s, a, true);
7841 }
7842 
7843 /*
7844  * Block data transfer
7845  */
7846 
7847 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7848 {
7849     TCGv_i32 addr = load_reg(s, a->rn);
7850 
7851     if (a->b) {
7852         if (a->i) {
7853             /* pre increment */
7854             tcg_gen_addi_i32(addr, addr, 4);
7855         } else {
7856             /* pre decrement */
7857             tcg_gen_addi_i32(addr, addr, -(n * 4));
7858         }
7859     } else if (!a->i && n != 1) {
7860         /* post decrement */
7861         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7862     }
7863 
7864     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7865         /*
7866          * If the writeback is incrementing SP rather than
7867          * decrementing it, and the initial SP is below the
7868          * stack limit but the final written-back SP would
7869          * be above, then we must not perform any memory
7870          * accesses, but it is IMPDEF whether we generate
7871          * an exception. We choose to do so in this case.
7872          * At this point 'addr' is the lowest address, so
7873          * either the original SP (if incrementing) or our
7874          * final SP (if decrementing), so that's what we check.
7875          */
7876         gen_helper_v8m_stackcheck(cpu_env, addr);
7877     }
7878 
7879     return addr;
7880 }
7881 
7882 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7883                                TCGv_i32 addr, int n)
7884 {
7885     if (a->w) {
7886         /* write back */
7887         if (!a->b) {
7888             if (a->i) {
7889                 /* post increment */
7890                 tcg_gen_addi_i32(addr, addr, 4);
7891             } else {
7892                 /* post decrement */
7893                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7894             }
7895         } else if (!a->i && n != 1) {
7896             /* pre decrement */
7897             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7898         }
7899         store_reg(s, a->rn, addr);
7900     }
7901 }
7902 
7903 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7904 {
7905     int i, j, n, list, mem_idx;
7906     bool user = a->u;
7907     TCGv_i32 addr, tmp;
7908 
7909     if (user) {
7910         /* STM (user) */
7911         if (IS_USER(s)) {
7912             /* Only usable in supervisor mode.  */
7913             unallocated_encoding(s);
7914             return true;
7915         }
7916     }
7917 
7918     list = a->list;
7919     n = ctpop16(list);
7920     if (n < min_n || a->rn == 15) {
7921         unallocated_encoding(s);
7922         return true;
7923     }
7924 
7925     s->eci_handled = true;
7926 
7927     addr = op_addr_block_pre(s, a, n);
7928     mem_idx = get_mem_index(s);
7929 
7930     for (i = j = 0; i < 16; i++) {
7931         if (!(list & (1 << i))) {
7932             continue;
7933         }
7934 
7935         if (user && i != 15) {
7936             tmp = tcg_temp_new_i32();
7937             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7938         } else {
7939             tmp = load_reg(s, i);
7940         }
7941         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7942 
7943         /* No need to add after the last transfer.  */
7944         if (++j != n) {
7945             tcg_gen_addi_i32(addr, addr, 4);
7946         }
7947     }
7948 
7949     op_addr_block_post(s, a, addr, n);
7950     clear_eci_state(s);
7951     return true;
7952 }
7953 
7954 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7955 {
7956     /* BitCount(list) < 1 is UNPREDICTABLE */
7957     return op_stm(s, a, 1);
7958 }
7959 
7960 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7961 {
7962     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7963     if (a->w && (a->list & (1 << a->rn))) {
7964         unallocated_encoding(s);
7965         return true;
7966     }
7967     /* BitCount(list) < 2 is UNPREDICTABLE */
7968     return op_stm(s, a, 2);
7969 }
7970 
7971 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7972 {
7973     int i, j, n, list, mem_idx;
7974     bool loaded_base;
7975     bool user = a->u;
7976     bool exc_return = false;
7977     TCGv_i32 addr, tmp, loaded_var;
7978 
7979     if (user) {
7980         /* LDM (user), LDM (exception return) */
7981         if (IS_USER(s)) {
7982             /* Only usable in supervisor mode.  */
7983             unallocated_encoding(s);
7984             return true;
7985         }
7986         if (extract32(a->list, 15, 1)) {
7987             exc_return = true;
7988             user = false;
7989         } else {
7990             /* LDM (user) does not allow writeback.  */
7991             if (a->w) {
7992                 unallocated_encoding(s);
7993                 return true;
7994             }
7995         }
7996     }
7997 
7998     list = a->list;
7999     n = ctpop16(list);
8000     if (n < min_n || a->rn == 15) {
8001         unallocated_encoding(s);
8002         return true;
8003     }
8004 
8005     s->eci_handled = true;
8006 
8007     addr = op_addr_block_pre(s, a, n);
8008     mem_idx = get_mem_index(s);
8009     loaded_base = false;
8010     loaded_var = NULL;
8011 
8012     for (i = j = 0; i < 16; i++) {
8013         if (!(list & (1 << i))) {
8014             continue;
8015         }
8016 
8017         tmp = tcg_temp_new_i32();
8018         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8019         if (user) {
8020             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8021         } else if (i == a->rn) {
8022             loaded_var = tmp;
8023             loaded_base = true;
8024         } else if (i == 15 && exc_return) {
8025             store_pc_exc_ret(s, tmp);
8026         } else {
8027             store_reg_from_load(s, i, tmp);
8028         }
8029 
8030         /* No need to add after the last transfer.  */
8031         if (++j != n) {
8032             tcg_gen_addi_i32(addr, addr, 4);
8033         }
8034     }
8035 
8036     op_addr_block_post(s, a, addr, n);
8037 
8038     if (loaded_base) {
8039         /* Note that we reject base == pc above.  */
8040         store_reg(s, a->rn, loaded_var);
8041     }
8042 
8043     if (exc_return) {
8044         /* Restore CPSR from SPSR.  */
8045         tmp = load_cpu_field(spsr);
8046         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8047             gen_io_start();
8048         }
8049         gen_helper_cpsr_write_eret(cpu_env, tmp);
8050         /* Must exit loop to check un-masked IRQs */
8051         s->base.is_jmp = DISAS_EXIT;
8052     }
8053     clear_eci_state(s);
8054     return true;
8055 }
8056 
8057 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8058 {
8059     /*
8060      * Writeback register in register list is UNPREDICTABLE
8061      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8062      * an UNKNOWN value to the base register.
8063      */
8064     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8065         unallocated_encoding(s);
8066         return true;
8067     }
8068     /* BitCount(list) < 1 is UNPREDICTABLE */
8069     return do_ldm(s, a, 1);
8070 }
8071 
8072 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8073 {
8074     /* Writeback register in register list is UNPREDICTABLE for T32. */
8075     if (a->w && (a->list & (1 << a->rn))) {
8076         unallocated_encoding(s);
8077         return true;
8078     }
8079     /* BitCount(list) < 2 is UNPREDICTABLE */
8080     return do_ldm(s, a, 2);
8081 }
8082 
8083 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8084 {
8085     /* Writeback is conditional on the base register not being loaded.  */
8086     a->w = !(a->list & (1 << a->rn));
8087     /* BitCount(list) < 1 is UNPREDICTABLE */
8088     return do_ldm(s, a, 1);
8089 }
8090 
8091 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8092 {
8093     int i;
8094     TCGv_i32 zero;
8095 
8096     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8097         return false;
8098     }
8099 
8100     if (extract32(a->list, 13, 1)) {
8101         return false;
8102     }
8103 
8104     if (!a->list) {
8105         /* UNPREDICTABLE; we choose to UNDEF */
8106         return false;
8107     }
8108 
8109     s->eci_handled = true;
8110 
8111     zero = tcg_constant_i32(0);
8112     for (i = 0; i < 15; i++) {
8113         if (extract32(a->list, i, 1)) {
8114             /* Clear R[i] */
8115             tcg_gen_mov_i32(cpu_R[i], zero);
8116         }
8117     }
8118     if (extract32(a->list, 15, 1)) {
8119         /*
8120          * Clear APSR (by calling the MSR helper with the same argument
8121          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8122          */
8123         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8124     }
8125     clear_eci_state(s);
8126     return true;
8127 }
8128 
8129 /*
8130  * Branch, branch with link
8131  */
8132 
8133 static bool trans_B(DisasContext *s, arg_i *a)
8134 {
8135     gen_jmp(s, jmp_diff(s, a->imm));
8136     return true;
8137 }
8138 
8139 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8140 {
8141     /* This has cond from encoding, required to be outside IT block.  */
8142     if (a->cond >= 0xe) {
8143         return false;
8144     }
8145     if (s->condexec_mask) {
8146         unallocated_encoding(s);
8147         return true;
8148     }
8149     arm_skip_unless(s, a->cond);
8150     gen_jmp(s, jmp_diff(s, a->imm));
8151     return true;
8152 }
8153 
8154 static bool trans_BL(DisasContext *s, arg_i *a)
8155 {
8156     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8157     gen_jmp(s, jmp_diff(s, a->imm));
8158     return true;
8159 }
8160 
8161 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8162 {
8163     /*
8164      * BLX <imm> would be useless on M-profile; the encoding space
8165      * is used for other insns from v8.1M onward, and UNDEFs before that.
8166      */
8167     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8168         return false;
8169     }
8170 
8171     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8172     if (s->thumb && (a->imm & 2)) {
8173         return false;
8174     }
8175     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8176     store_cpu_field_constant(!s->thumb, thumb);
8177     /* This jump is computed from an aligned PC: subtract off the low bits. */
8178     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8179     return true;
8180 }
8181 
8182 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8183 {
8184     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8185     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8186     return true;
8187 }
8188 
8189 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8190 {
8191     TCGv_i32 tmp = tcg_temp_new_i32();
8192 
8193     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8194     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8195     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8196     gen_bx(s, tmp);
8197     return true;
8198 }
8199 
8200 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8201 {
8202     TCGv_i32 tmp;
8203 
8204     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8205     if (!ENABLE_ARCH_5) {
8206         return false;
8207     }
8208     tmp = tcg_temp_new_i32();
8209     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8210     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8211     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8212     gen_bx(s, tmp);
8213     return true;
8214 }
8215 
8216 static bool trans_BF(DisasContext *s, arg_BF *a)
8217 {
8218     /*
8219      * M-profile branch future insns. The architecture permits an
8220      * implementation to implement these as NOPs (equivalent to
8221      * discarding the LO_BRANCH_INFO cache immediately), and we
8222      * take that IMPDEF option because for QEMU a "real" implementation
8223      * would be complicated and wouldn't execute any faster.
8224      */
8225     if (!dc_isar_feature(aa32_lob, s)) {
8226         return false;
8227     }
8228     if (a->boff == 0) {
8229         /* SEE "Related encodings" (loop insns) */
8230         return false;
8231     }
8232     /* Handle as NOP */
8233     return true;
8234 }
8235 
8236 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8237 {
8238     /* M-profile low-overhead loop start */
8239     TCGv_i32 tmp;
8240 
8241     if (!dc_isar_feature(aa32_lob, s)) {
8242         return false;
8243     }
8244     if (a->rn == 13 || a->rn == 15) {
8245         /*
8246          * For DLSTP rn == 15 is a related encoding (LCTP); the
8247          * other cases caught by this condition are all
8248          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8249          */
8250         return false;
8251     }
8252 
8253     if (a->size != 4) {
8254         /* DLSTP */
8255         if (!dc_isar_feature(aa32_mve, s)) {
8256             return false;
8257         }
8258         if (!vfp_access_check(s)) {
8259             return true;
8260         }
8261     }
8262 
8263     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8264     tmp = load_reg(s, a->rn);
8265     store_reg(s, 14, tmp);
8266     if (a->size != 4) {
8267         /* DLSTP: set FPSCR.LTPSIZE */
8268         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8269         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8270     }
8271     return true;
8272 }
8273 
8274 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8275 {
8276     /* M-profile low-overhead while-loop start */
8277     TCGv_i32 tmp;
8278     DisasLabel nextlabel;
8279 
8280     if (!dc_isar_feature(aa32_lob, s)) {
8281         return false;
8282     }
8283     if (a->rn == 13 || a->rn == 15) {
8284         /*
8285          * For WLSTP rn == 15 is a related encoding (LE); the
8286          * other cases caught by this condition are all
8287          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8288          */
8289         return false;
8290     }
8291     if (s->condexec_mask) {
8292         /*
8293          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8294          * we choose to UNDEF, because otherwise our use of
8295          * gen_goto_tb(1) would clash with the use of TB exit 1
8296          * in the dc->condjmp condition-failed codepath in
8297          * arm_tr_tb_stop() and we'd get an assertion.
8298          */
8299         return false;
8300     }
8301     if (a->size != 4) {
8302         /* WLSTP */
8303         if (!dc_isar_feature(aa32_mve, s)) {
8304             return false;
8305         }
8306         /*
8307          * We need to check that the FPU is enabled here, but mustn't
8308          * call vfp_access_check() to do that because we don't want to
8309          * do the lazy state preservation in the "loop count is zero" case.
8310          * Do the check-and-raise-exception by hand.
8311          */
8312         if (s->fp_excp_el) {
8313             gen_exception_insn_el(s, 0, EXCP_NOCP,
8314                                   syn_uncategorized(), s->fp_excp_el);
8315             return true;
8316         }
8317     }
8318 
8319     nextlabel = gen_disas_label(s);
8320     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8321     tmp = load_reg(s, a->rn);
8322     store_reg(s, 14, tmp);
8323     if (a->size != 4) {
8324         /*
8325          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8326          * lazy state preservation, new FP context creation, etc,
8327          * that vfp_access_check() does. We know that the actual
8328          * access check will succeed (ie it won't generate code that
8329          * throws an exception) because we did that check by hand earlier.
8330          */
8331         bool ok = vfp_access_check(s);
8332         assert(ok);
8333         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8334         /*
8335          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8336          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8337          */
8338     }
8339     gen_jmp_tb(s, curr_insn_len(s), 1);
8340 
8341     set_disas_label(s, nextlabel);
8342     gen_jmp(s, jmp_diff(s, a->imm));
8343     return true;
8344 }
8345 
8346 static bool trans_LE(DisasContext *s, arg_LE *a)
8347 {
8348     /*
8349      * M-profile low-overhead loop end. The architecture permits an
8350      * implementation to discard the LO_BRANCH_INFO cache at any time,
8351      * and we take the IMPDEF option to never set it in the first place
8352      * (equivalent to always discarding it immediately), because for QEMU
8353      * a "real" implementation would be complicated and wouldn't execute
8354      * any faster.
8355      */
8356     TCGv_i32 tmp;
8357     DisasLabel loopend;
8358     bool fpu_active;
8359 
8360     if (!dc_isar_feature(aa32_lob, s)) {
8361         return false;
8362     }
8363     if (a->f && a->tp) {
8364         return false;
8365     }
8366     if (s->condexec_mask) {
8367         /*
8368          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8369          * we choose to UNDEF, because otherwise our use of
8370          * gen_goto_tb(1) would clash with the use of TB exit 1
8371          * in the dc->condjmp condition-failed codepath in
8372          * arm_tr_tb_stop() and we'd get an assertion.
8373          */
8374         return false;
8375     }
8376     if (a->tp) {
8377         /* LETP */
8378         if (!dc_isar_feature(aa32_mve, s)) {
8379             return false;
8380         }
8381         if (!vfp_access_check(s)) {
8382             s->eci_handled = true;
8383             return true;
8384         }
8385     }
8386 
8387     /* LE/LETP is OK with ECI set and leaves it untouched */
8388     s->eci_handled = true;
8389 
8390     /*
8391      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8392      * UsageFault exception for the LE insn in that case. Note that we
8393      * are not directly checking FPSCR.LTPSIZE but instead check the
8394      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8395      * not currently active (ie ActiveFPState() returns false). We
8396      * can identify not-active purely from our TB state flags, as the
8397      * FPU is active only if:
8398      *  the FPU is enabled
8399      *  AND lazy state preservation is not active
8400      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8401      *
8402      * Usually we don't need to care about this distinction between
8403      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8404      * will either take an exception or clear the conditions that make
8405      * the FPU not active. But LE is an unusual case of a non-FP insn
8406      * that looks at LTPSIZE.
8407      */
8408     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8409 
8410     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8411         /* Need to do a runtime check for LTPSIZE != 4 */
8412         DisasLabel skipexc = gen_disas_label(s);
8413         tmp = load_cpu_field(v7m.ltpsize);
8414         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8415         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8416         set_disas_label(s, skipexc);
8417     }
8418 
8419     if (a->f) {
8420         /* Loop-forever: just jump back to the loop start */
8421         gen_jmp(s, jmp_diff(s, -a->imm));
8422         return true;
8423     }
8424 
8425     /*
8426      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8427      * For LE, we know at this point that LTPSIZE must be 4 and the
8428      * loop decrement value is 1. For LETP we need to calculate the decrement
8429      * value from LTPSIZE.
8430      */
8431     loopend = gen_disas_label(s);
8432     if (!a->tp) {
8433         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8434         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8435     } else {
8436         /*
8437          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8438          * so that decr stays live after the brcondi.
8439          */
8440         TCGv_i32 decr = tcg_temp_new_i32();
8441         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8442         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8443         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8444 
8445         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8446 
8447         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8448     }
8449     /* Jump back to the loop start */
8450     gen_jmp(s, jmp_diff(s, -a->imm));
8451 
8452     set_disas_label(s, loopend);
8453     if (a->tp) {
8454         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8455         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8456     }
8457     /* End TB, continuing to following insn */
8458     gen_jmp_tb(s, curr_insn_len(s), 1);
8459     return true;
8460 }
8461 
8462 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8463 {
8464     /*
8465      * M-profile Loop Clear with Tail Predication. Since our implementation
8466      * doesn't cache branch information, all we need to do is reset
8467      * FPSCR.LTPSIZE to 4.
8468      */
8469 
8470     if (!dc_isar_feature(aa32_lob, s) ||
8471         !dc_isar_feature(aa32_mve, s)) {
8472         return false;
8473     }
8474 
8475     if (!vfp_access_check(s)) {
8476         return true;
8477     }
8478 
8479     store_cpu_field_constant(4, v7m.ltpsize);
8480     return true;
8481 }
8482 
8483 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8484 {
8485     /*
8486      * M-profile Create Vector Tail Predicate. This insn is itself
8487      * predicated and is subject to beatwise execution.
8488      */
8489     TCGv_i32 rn_shifted, masklen;
8490 
8491     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8492         return false;
8493     }
8494 
8495     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8496         return true;
8497     }
8498 
8499     /*
8500      * We pre-calculate the mask length here to avoid having
8501      * to have multiple helpers specialized for size.
8502      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8503      */
8504     rn_shifted = tcg_temp_new_i32();
8505     masklen = load_reg(s, a->rn);
8506     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8507     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8508                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8509                         rn_shifted, tcg_constant_i32(16));
8510     gen_helper_mve_vctp(cpu_env, masklen);
8511     /* This insn updates predication bits */
8512     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8513     mve_update_eci(s);
8514     return true;
8515 }
8516 
8517 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8518 {
8519     TCGv_i32 addr, tmp;
8520 
8521     tmp = load_reg(s, a->rm);
8522     if (half) {
8523         tcg_gen_add_i32(tmp, tmp, tmp);
8524     }
8525     addr = load_reg(s, a->rn);
8526     tcg_gen_add_i32(addr, addr, tmp);
8527 
8528     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8529 
8530     tcg_gen_add_i32(tmp, tmp, tmp);
8531     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8532     tcg_gen_add_i32(tmp, tmp, addr);
8533     store_reg(s, 15, tmp);
8534     return true;
8535 }
8536 
8537 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8538 {
8539     return op_tbranch(s, a, false);
8540 }
8541 
8542 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8543 {
8544     return op_tbranch(s, a, true);
8545 }
8546 
8547 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8548 {
8549     TCGv_i32 tmp = load_reg(s, a->rn);
8550 
8551     arm_gen_condlabel(s);
8552     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8553                         tmp, 0, s->condlabel.label);
8554     gen_jmp(s, jmp_diff(s, a->imm));
8555     return true;
8556 }
8557 
8558 /*
8559  * Supervisor call - both T32 & A32 come here so we need to check
8560  * which mode we are in when checking for semihosting.
8561  */
8562 
8563 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8564 {
8565     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8566 
8567     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8568         semihosting_enabled(s->current_el == 0) &&
8569         (a->imm == semihost_imm)) {
8570         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8571     } else {
8572         if (s->fgt_svc) {
8573             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8574             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8575         } else {
8576             gen_update_pc(s, curr_insn_len(s));
8577             s->svc_imm = a->imm;
8578             s->base.is_jmp = DISAS_SWI;
8579         }
8580     }
8581     return true;
8582 }
8583 
8584 /*
8585  * Unconditional system instructions
8586  */
8587 
8588 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8589 {
8590     static const int8_t pre_offset[4] = {
8591         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8592     };
8593     static const int8_t post_offset[4] = {
8594         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8595     };
8596     TCGv_i32 addr, t1, t2;
8597 
8598     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8599         return false;
8600     }
8601     if (IS_USER(s)) {
8602         unallocated_encoding(s);
8603         return true;
8604     }
8605 
8606     addr = load_reg(s, a->rn);
8607     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8608 
8609     /* Load PC into tmp and CPSR into tmp2.  */
8610     t1 = tcg_temp_new_i32();
8611     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8612     tcg_gen_addi_i32(addr, addr, 4);
8613     t2 = tcg_temp_new_i32();
8614     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8615 
8616     if (a->w) {
8617         /* Base writeback.  */
8618         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8619         store_reg(s, a->rn, addr);
8620     }
8621     gen_rfe(s, t1, t2);
8622     return true;
8623 }
8624 
8625 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8626 {
8627     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8628         return false;
8629     }
8630     gen_srs(s, a->mode, a->pu, a->w);
8631     return true;
8632 }
8633 
8634 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8635 {
8636     uint32_t mask, val;
8637 
8638     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8639         return false;
8640     }
8641     if (IS_USER(s)) {
8642         /* Implemented as NOP in user mode.  */
8643         return true;
8644     }
8645     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8646 
8647     mask = val = 0;
8648     if (a->imod & 2) {
8649         if (a->A) {
8650             mask |= CPSR_A;
8651         }
8652         if (a->I) {
8653             mask |= CPSR_I;
8654         }
8655         if (a->F) {
8656             mask |= CPSR_F;
8657         }
8658         if (a->imod & 1) {
8659             val |= mask;
8660         }
8661     }
8662     if (a->M) {
8663         mask |= CPSR_M;
8664         val |= a->mode;
8665     }
8666     if (mask) {
8667         gen_set_psr_im(s, mask, 0, val);
8668     }
8669     return true;
8670 }
8671 
8672 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8673 {
8674     TCGv_i32 tmp, addr;
8675 
8676     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8677         return false;
8678     }
8679     if (IS_USER(s)) {
8680         /* Implemented as NOP in user mode.  */
8681         return true;
8682     }
8683 
8684     tmp = tcg_constant_i32(a->im);
8685     /* FAULTMASK */
8686     if (a->F) {
8687         addr = tcg_constant_i32(19);
8688         gen_helper_v7m_msr(cpu_env, addr, tmp);
8689     }
8690     /* PRIMASK */
8691     if (a->I) {
8692         addr = tcg_constant_i32(16);
8693         gen_helper_v7m_msr(cpu_env, addr, tmp);
8694     }
8695     gen_rebuild_hflags(s, false);
8696     gen_lookup_tb(s);
8697     return true;
8698 }
8699 
8700 /*
8701  * Clear-Exclusive, Barriers
8702  */
8703 
8704 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8705 {
8706     if (s->thumb
8707         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8708         : !ENABLE_ARCH_6K) {
8709         return false;
8710     }
8711     gen_clrex(s);
8712     return true;
8713 }
8714 
8715 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8716 {
8717     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8718         return false;
8719     }
8720     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8721     return true;
8722 }
8723 
8724 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8725 {
8726     return trans_DSB(s, NULL);
8727 }
8728 
8729 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8730 {
8731     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8732         return false;
8733     }
8734     /*
8735      * We need to break the TB after this insn to execute
8736      * self-modifying code correctly and also to take
8737      * any pending interrupts immediately.
8738      */
8739     s->base.is_jmp = DISAS_TOO_MANY;
8740     return true;
8741 }
8742 
8743 static bool trans_SB(DisasContext *s, arg_SB *a)
8744 {
8745     if (!dc_isar_feature(aa32_sb, s)) {
8746         return false;
8747     }
8748     /*
8749      * TODO: There is no speculation barrier opcode
8750      * for TCG; MB and end the TB instead.
8751      */
8752     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8753     s->base.is_jmp = DISAS_TOO_MANY;
8754     return true;
8755 }
8756 
8757 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8758 {
8759     if (!ENABLE_ARCH_6) {
8760         return false;
8761     }
8762     if (a->E != (s->be_data == MO_BE)) {
8763         gen_helper_setend(cpu_env);
8764         s->base.is_jmp = DISAS_UPDATE_EXIT;
8765     }
8766     return true;
8767 }
8768 
8769 /*
8770  * Preload instructions
8771  * All are nops, contingent on the appropriate arch level.
8772  */
8773 
8774 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8775 {
8776     return ENABLE_ARCH_5TE;
8777 }
8778 
8779 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8780 {
8781     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8782 }
8783 
8784 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8785 {
8786     return ENABLE_ARCH_7;
8787 }
8788 
8789 /*
8790  * If-then
8791  */
8792 
8793 static bool trans_IT(DisasContext *s, arg_IT *a)
8794 {
8795     int cond_mask = a->cond_mask;
8796 
8797     /*
8798      * No actual code generated for this insn, just setup state.
8799      *
8800      * Combinations of firstcond and mask which set up an 0b1111
8801      * condition are UNPREDICTABLE; we take the CONSTRAINED
8802      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8803      * i.e. both meaning "execute always".
8804      */
8805     s->condexec_cond = (cond_mask >> 4) & 0xe;
8806     s->condexec_mask = cond_mask & 0x1f;
8807     return true;
8808 }
8809 
8810 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8811 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8812 {
8813     TCGv_i32 rn, rm, zero;
8814     DisasCompare c;
8815 
8816     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8817         return false;
8818     }
8819 
8820     if (a->rm == 13) {
8821         /* SEE "Related encodings" (MVE shifts) */
8822         return false;
8823     }
8824 
8825     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8826         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8827         return false;
8828     }
8829 
8830     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8831     zero = tcg_constant_i32(0);
8832     if (a->rn == 15) {
8833         rn = zero;
8834     } else {
8835         rn = load_reg(s, a->rn);
8836     }
8837     if (a->rm == 15) {
8838         rm = zero;
8839     } else {
8840         rm = load_reg(s, a->rm);
8841     }
8842 
8843     switch (a->op) {
8844     case 0: /* CSEL */
8845         break;
8846     case 1: /* CSINC */
8847         tcg_gen_addi_i32(rm, rm, 1);
8848         break;
8849     case 2: /* CSINV */
8850         tcg_gen_not_i32(rm, rm);
8851         break;
8852     case 3: /* CSNEG */
8853         tcg_gen_neg_i32(rm, rm);
8854         break;
8855     default:
8856         g_assert_not_reached();
8857     }
8858 
8859     arm_test_cc(&c, a->fcond);
8860     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
8861 
8862     store_reg(s, a->rd, rn);
8863     return true;
8864 }
8865 
8866 /*
8867  * Legacy decoder.
8868  */
8869 
8870 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8871 {
8872     unsigned int cond = insn >> 28;
8873 
8874     /* M variants do not implement ARM mode; this must raise the INVSTATE
8875      * UsageFault exception.
8876      */
8877     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8878         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8879         return;
8880     }
8881 
8882     if (s->pstate_il) {
8883         /*
8884          * Illegal execution state. This has priority over BTI
8885          * exceptions, but comes after instruction abort exceptions.
8886          */
8887         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8888         return;
8889     }
8890 
8891     if (cond == 0xf) {
8892         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8893          * choose to UNDEF. In ARMv5 and above the space is used
8894          * for miscellaneous unconditional instructions.
8895          */
8896         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8897             unallocated_encoding(s);
8898             return;
8899         }
8900 
8901         /* Unconditional instructions.  */
8902         /* TODO: Perhaps merge these into one decodetree output file.  */
8903         if (disas_a32_uncond(s, insn) ||
8904             disas_vfp_uncond(s, insn) ||
8905             disas_neon_dp(s, insn) ||
8906             disas_neon_ls(s, insn) ||
8907             disas_neon_shared(s, insn)) {
8908             return;
8909         }
8910         /* fall back to legacy decoder */
8911 
8912         if ((insn & 0x0e000f00) == 0x0c000100) {
8913             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8914                 /* iWMMXt register transfer.  */
8915                 if (extract32(s->c15_cpar, 1, 1)) {
8916                     if (!disas_iwmmxt_insn(s, insn)) {
8917                         return;
8918                     }
8919                 }
8920             }
8921         }
8922         goto illegal_op;
8923     }
8924     if (cond != 0xe) {
8925         /* if not always execute, we generate a conditional jump to
8926            next instruction */
8927         arm_skip_unless(s, cond);
8928     }
8929 
8930     /* TODO: Perhaps merge these into one decodetree output file.  */
8931     if (disas_a32(s, insn) ||
8932         disas_vfp(s, insn)) {
8933         return;
8934     }
8935     /* fall back to legacy decoder */
8936     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8937     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8938         if (((insn & 0x0c000e00) == 0x0c000000)
8939             && ((insn & 0x03000000) != 0x03000000)) {
8940             /* Coprocessor insn, coprocessor 0 or 1 */
8941             disas_xscale_insn(s, insn);
8942             return;
8943         }
8944     }
8945 
8946 illegal_op:
8947     unallocated_encoding(s);
8948 }
8949 
8950 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8951 {
8952     /*
8953      * Return true if this is a 16 bit instruction. We must be precise
8954      * about this (matching the decode).
8955      */
8956     if ((insn >> 11) < 0x1d) {
8957         /* Definitely a 16-bit instruction */
8958         return true;
8959     }
8960 
8961     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8962      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8963      * end up actually treating this as two 16-bit insns, though,
8964      * if it's half of a bl/blx pair that might span a page boundary.
8965      */
8966     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8967         arm_dc_feature(s, ARM_FEATURE_M)) {
8968         /* Thumb2 cores (including all M profile ones) always treat
8969          * 32-bit insns as 32-bit.
8970          */
8971         return false;
8972     }
8973 
8974     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8975         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8976          * is not on the next page; we merge this into a 32-bit
8977          * insn.
8978          */
8979         return false;
8980     }
8981     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8982      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8983      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8984      *  -- handle as single 16 bit insn
8985      */
8986     return true;
8987 }
8988 
8989 /* Translate a 32-bit thumb instruction. */
8990 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8991 {
8992     /*
8993      * ARMv6-M supports a limited subset of Thumb2 instructions.
8994      * Other Thumb1 architectures allow only 32-bit
8995      * combined BL/BLX prefix and suffix.
8996      */
8997     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8998         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8999         int i;
9000         bool found = false;
9001         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9002                                                0xf3b08040 /* dsb */,
9003                                                0xf3b08050 /* dmb */,
9004                                                0xf3b08060 /* isb */,
9005                                                0xf3e08000 /* mrs */,
9006                                                0xf000d000 /* bl */};
9007         static const uint32_t armv6m_mask[] = {0xffe0d000,
9008                                                0xfff0d0f0,
9009                                                0xfff0d0f0,
9010                                                0xfff0d0f0,
9011                                                0xffe0d000,
9012                                                0xf800d000};
9013 
9014         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9015             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9016                 found = true;
9017                 break;
9018             }
9019         }
9020         if (!found) {
9021             goto illegal_op;
9022         }
9023     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9024         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9025             unallocated_encoding(s);
9026             return;
9027         }
9028     }
9029 
9030     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9031         /*
9032          * NOCP takes precedence over any UNDEF for (almost) the
9033          * entire wide range of coprocessor-space encodings, so check
9034          * for it first before proceeding to actually decode eg VFP
9035          * insns. This decode also handles the few insns which are
9036          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9037          */
9038         if (disas_m_nocp(s, insn)) {
9039             return;
9040         }
9041     }
9042 
9043     if ((insn & 0xef000000) == 0xef000000) {
9044         /*
9045          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9046          * transform into
9047          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9048          */
9049         uint32_t a32_insn = (insn & 0xe2ffffff) |
9050             ((insn & (1 << 28)) >> 4) | (1 << 28);
9051 
9052         if (disas_neon_dp(s, a32_insn)) {
9053             return;
9054         }
9055     }
9056 
9057     if ((insn & 0xff100000) == 0xf9000000) {
9058         /*
9059          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9060          * transform into
9061          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9062          */
9063         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9064 
9065         if (disas_neon_ls(s, a32_insn)) {
9066             return;
9067         }
9068     }
9069 
9070     /*
9071      * TODO: Perhaps merge these into one decodetree output file.
9072      * Note disas_vfp is written for a32 with cond field in the
9073      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9074      */
9075     if (disas_t32(s, insn) ||
9076         disas_vfp_uncond(s, insn) ||
9077         disas_neon_shared(s, insn) ||
9078         disas_mve(s, insn) ||
9079         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9080         return;
9081     }
9082 
9083 illegal_op:
9084     unallocated_encoding(s);
9085 }
9086 
9087 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9088 {
9089     if (!disas_t16(s, insn)) {
9090         unallocated_encoding(s);
9091     }
9092 }
9093 
9094 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9095 {
9096     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9097      * (False positives are OK, false negatives are not.)
9098      * We know this is a Thumb insn, and our caller ensures we are
9099      * only called if dc->base.pc_next is less than 4 bytes from the page
9100      * boundary, so we cross the page if the first 16 bits indicate
9101      * that this is a 32 bit insn.
9102      */
9103     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9104 
9105     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9106 }
9107 
9108 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9109 {
9110     DisasContext *dc = container_of(dcbase, DisasContext, base);
9111     CPUARMState *env = cs->env_ptr;
9112     ARMCPU *cpu = env_archcpu(env);
9113     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9114     uint32_t condexec, core_mmu_idx;
9115 
9116     dc->isar = &cpu->isar;
9117     dc->condjmp = 0;
9118     dc->pc_save = dc->base.pc_first;
9119     dc->aarch64 = false;
9120     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9121     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9122     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9123     /*
9124      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9125      * is always the IT bits. On M-profile, some of the reserved encodings
9126      * of IT are used instead to indicate either ICI or ECI, which
9127      * indicate partial progress of a restartable insn that was interrupted
9128      * partway through by an exception:
9129      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9130      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9131      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9132      * insn, behave normally".
9133      */
9134     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9135     dc->eci_handled = false;
9136     if (condexec & 0xf) {
9137         dc->condexec_mask = (condexec & 0xf) << 1;
9138         dc->condexec_cond = condexec >> 4;
9139     } else {
9140         if (arm_feature(env, ARM_FEATURE_M)) {
9141             dc->eci = condexec >> 4;
9142         }
9143     }
9144 
9145     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9146     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9147     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9148 #if !defined(CONFIG_USER_ONLY)
9149     dc->user = (dc->current_el == 0);
9150 #endif
9151     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9152     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9153     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9154     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9155     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9156 
9157     if (arm_feature(env, ARM_FEATURE_M)) {
9158         dc->vfp_enabled = 1;
9159         dc->be_data = MO_TE;
9160         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9161         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9162         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9163         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9164         dc->v7m_new_fp_ctxt_needed =
9165             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9166         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9167         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9168     } else {
9169         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9170         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9171         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9172         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9173         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9174             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9175         } else {
9176             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9177             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9178         }
9179         dc->sme_trap_nonstreaming =
9180             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9181     }
9182     dc->cp_regs = cpu->cp_regs;
9183     dc->features = env->features;
9184 
9185     /* Single step state. The code-generation logic here is:
9186      *  SS_ACTIVE == 0:
9187      *   generate code with no special handling for single-stepping (except
9188      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9189      *   this happens anyway because those changes are all system register or
9190      *   PSTATE writes).
9191      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9192      *   emit code for one insn
9193      *   emit code to clear PSTATE.SS
9194      *   emit code to generate software step exception for completed step
9195      *   end TB (as usual for having generated an exception)
9196      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9197      *   emit code to generate a software step exception
9198      *   end the TB
9199      */
9200     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9201     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9202     dc->is_ldex = false;
9203 
9204     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9205 
9206     /* If architectural single step active, limit to 1.  */
9207     if (dc->ss_active) {
9208         dc->base.max_insns = 1;
9209     }
9210 
9211     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9212        to those left on the page.  */
9213     if (!dc->thumb) {
9214         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9215         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9216     }
9217 
9218     cpu_V0 = tcg_temp_new_i64();
9219     cpu_V1 = tcg_temp_new_i64();
9220     cpu_M0 = tcg_temp_new_i64();
9221 }
9222 
9223 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9224 {
9225     DisasContext *dc = container_of(dcbase, DisasContext, base);
9226 
9227     /* A note on handling of the condexec (IT) bits:
9228      *
9229      * We want to avoid the overhead of having to write the updated condexec
9230      * bits back to the CPUARMState for every instruction in an IT block. So:
9231      * (1) if the condexec bits are not already zero then we write
9232      * zero back into the CPUARMState now. This avoids complications trying
9233      * to do it at the end of the block. (For example if we don't do this
9234      * it's hard to identify whether we can safely skip writing condexec
9235      * at the end of the TB, which we definitely want to do for the case
9236      * where a TB doesn't do anything with the IT state at all.)
9237      * (2) if we are going to leave the TB then we call gen_set_condexec()
9238      * which will write the correct value into CPUARMState if zero is wrong.
9239      * This is done both for leaving the TB at the end, and for leaving
9240      * it because of an exception we know will happen, which is done in
9241      * gen_exception_insn(). The latter is necessary because we need to
9242      * leave the TB with the PC/IT state just prior to execution of the
9243      * instruction which caused the exception.
9244      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9245      * then the CPUARMState will be wrong and we need to reset it.
9246      * This is handled in the same way as restoration of the
9247      * PC in these situations; we save the value of the condexec bits
9248      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9249      * then uses this to restore them after an exception.
9250      *
9251      * Note that there are no instructions which can read the condexec
9252      * bits, and none which can write non-static values to them, so
9253      * we don't need to care about whether CPUARMState is correct in the
9254      * middle of a TB.
9255      */
9256 
9257     /* Reset the conditional execution bits immediately. This avoids
9258        complications trying to do it at the end of the block.  */
9259     if (dc->condexec_mask || dc->condexec_cond) {
9260         store_cpu_field_constant(0, condexec_bits);
9261     }
9262 }
9263 
9264 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9265 {
9266     DisasContext *dc = container_of(dcbase, DisasContext, base);
9267     /*
9268      * The ECI/ICI bits share PSR bits with the IT bits, so we
9269      * need to reconstitute the bits from the split-out DisasContext
9270      * fields here.
9271      */
9272     uint32_t condexec_bits;
9273     target_ulong pc_arg = dc->base.pc_next;
9274 
9275     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9276         pc_arg &= ~TARGET_PAGE_MASK;
9277     }
9278     if (dc->eci) {
9279         condexec_bits = dc->eci << 4;
9280     } else {
9281         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9282     }
9283     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9284     dc->insn_start = tcg_last_op();
9285 }
9286 
9287 static bool arm_check_kernelpage(DisasContext *dc)
9288 {
9289 #ifdef CONFIG_USER_ONLY
9290     /* Intercept jump to the magic kernel page.  */
9291     if (dc->base.pc_next >= 0xffff0000) {
9292         /* We always get here via a jump, so know we are not in a
9293            conditional execution block.  */
9294         gen_exception_internal(EXCP_KERNEL_TRAP);
9295         dc->base.is_jmp = DISAS_NORETURN;
9296         return true;
9297     }
9298 #endif
9299     return false;
9300 }
9301 
9302 static bool arm_check_ss_active(DisasContext *dc)
9303 {
9304     if (dc->ss_active && !dc->pstate_ss) {
9305         /* Singlestep state is Active-pending.
9306          * If we're in this state at the start of a TB then either
9307          *  a) we just took an exception to an EL which is being debugged
9308          *     and this is the first insn in the exception handler
9309          *  b) debug exceptions were masked and we just unmasked them
9310          *     without changing EL (eg by clearing PSTATE.D)
9311          * In either case we're going to take a swstep exception in the
9312          * "did not step an insn" case, and so the syndrome ISV and EX
9313          * bits should be zero.
9314          */
9315         assert(dc->base.num_insns == 1);
9316         gen_swstep_exception(dc, 0, 0);
9317         dc->base.is_jmp = DISAS_NORETURN;
9318         return true;
9319     }
9320 
9321     return false;
9322 }
9323 
9324 static void arm_post_translate_insn(DisasContext *dc)
9325 {
9326     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9327         if (dc->pc_save != dc->condlabel.pc_save) {
9328             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9329         }
9330         gen_set_label(dc->condlabel.label);
9331         dc->condjmp = 0;
9332     }
9333 }
9334 
9335 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9336 {
9337     DisasContext *dc = container_of(dcbase, DisasContext, base);
9338     CPUARMState *env = cpu->env_ptr;
9339     uint32_t pc = dc->base.pc_next;
9340     unsigned int insn;
9341 
9342     /* Singlestep exceptions have the highest priority. */
9343     if (arm_check_ss_active(dc)) {
9344         dc->base.pc_next = pc + 4;
9345         return;
9346     }
9347 
9348     if (pc & 3) {
9349         /*
9350          * PC alignment fault.  This has priority over the instruction abort
9351          * that we would receive from a translation fault via arm_ldl_code
9352          * (or the execution of the kernelpage entrypoint). This should only
9353          * be possible after an indirect branch, at the start of the TB.
9354          */
9355         assert(dc->base.num_insns == 1);
9356         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9357         dc->base.is_jmp = DISAS_NORETURN;
9358         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9359         return;
9360     }
9361 
9362     if (arm_check_kernelpage(dc)) {
9363         dc->base.pc_next = pc + 4;
9364         return;
9365     }
9366 
9367     dc->pc_curr = pc;
9368     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9369     dc->insn = insn;
9370     dc->base.pc_next = pc + 4;
9371     disas_arm_insn(dc, insn);
9372 
9373     arm_post_translate_insn(dc);
9374 
9375     /* ARM is a fixed-length ISA.  We performed the cross-page check
9376        in init_disas_context by adjusting max_insns.  */
9377 }
9378 
9379 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9380 {
9381     /* Return true if this Thumb insn is always unconditional,
9382      * even inside an IT block. This is true of only a very few
9383      * instructions: BKPT, HLT, and SG.
9384      *
9385      * A larger class of instructions are UNPREDICTABLE if used
9386      * inside an IT block; we do not need to detect those here, because
9387      * what we do by default (perform the cc check and update the IT
9388      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9389      * choice for those situations.
9390      *
9391      * insn is either a 16-bit or a 32-bit instruction; the two are
9392      * distinguishable because for the 16-bit case the top 16 bits
9393      * are zeroes, and that isn't a valid 32-bit encoding.
9394      */
9395     if ((insn & 0xffffff00) == 0xbe00) {
9396         /* BKPT */
9397         return true;
9398     }
9399 
9400     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9401         !arm_dc_feature(s, ARM_FEATURE_M)) {
9402         /* HLT: v8A only. This is unconditional even when it is going to
9403          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9404          * For v7 cores this was a plain old undefined encoding and so
9405          * honours its cc check. (We might be using the encoding as
9406          * a semihosting trap, but we don't change the cc check behaviour
9407          * on that account, because a debugger connected to a real v7A
9408          * core and emulating semihosting traps by catching the UNDEF
9409          * exception would also only see cases where the cc check passed.
9410          * No guest code should be trying to do a HLT semihosting trap
9411          * in an IT block anyway.
9412          */
9413         return true;
9414     }
9415 
9416     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9417         arm_dc_feature(s, ARM_FEATURE_M)) {
9418         /* SG: v8M only */
9419         return true;
9420     }
9421 
9422     return false;
9423 }
9424 
9425 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9426 {
9427     DisasContext *dc = container_of(dcbase, DisasContext, base);
9428     CPUARMState *env = cpu->env_ptr;
9429     uint32_t pc = dc->base.pc_next;
9430     uint32_t insn;
9431     bool is_16bit;
9432     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9433     TCGOp *insn_eci_rewind = NULL;
9434     target_ulong insn_eci_pc_save = -1;
9435 
9436     /* Misaligned thumb PC is architecturally impossible. */
9437     assert((dc->base.pc_next & 1) == 0);
9438 
9439     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9440         dc->base.pc_next = pc + 2;
9441         return;
9442     }
9443 
9444     dc->pc_curr = pc;
9445     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9446     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9447     pc += 2;
9448     if (!is_16bit) {
9449         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9450         insn = insn << 16 | insn2;
9451         pc += 2;
9452     }
9453     dc->base.pc_next = pc;
9454     dc->insn = insn;
9455 
9456     if (dc->pstate_il) {
9457         /*
9458          * Illegal execution state. This has priority over BTI
9459          * exceptions, but comes after instruction abort exceptions.
9460          */
9461         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9462         return;
9463     }
9464 
9465     if (dc->eci) {
9466         /*
9467          * For M-profile continuable instructions, ECI/ICI handling
9468          * falls into these cases:
9469          *  - interrupt-continuable instructions
9470          *     These are the various load/store multiple insns (both
9471          *     integer and fp). The ICI bits indicate the register
9472          *     where the load/store can resume. We make the IMPDEF
9473          *     choice to always do "instruction restart", ie ignore
9474          *     the ICI value and always execute the ldm/stm from the
9475          *     start. So all we need to do is zero PSR.ICI if the
9476          *     insn executes.
9477          *  - MVE instructions subject to beat-wise execution
9478          *     Here the ECI bits indicate which beats have already been
9479          *     executed, and we must honour this. Each insn of this
9480          *     type will handle it correctly. We will update PSR.ECI
9481          *     in the helper function for the insn (some ECI values
9482          *     mean that the following insn also has been partially
9483          *     executed).
9484          *  - Special cases which don't advance ECI
9485          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9486          *     bits untouched.
9487          *  - all other insns (the common case)
9488          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9489          *     We place a rewind-marker here. Insns in the previous
9490          *     three categories will set a flag in the DisasContext.
9491          *     If the flag isn't set after we call disas_thumb_insn()
9492          *     or disas_thumb2_insn() then we know we have a "some other
9493          *     insn" case. We will rewind to the marker (ie throwing away
9494          *     all the generated code) and instead emit "take exception".
9495          */
9496         insn_eci_rewind = tcg_last_op();
9497         insn_eci_pc_save = dc->pc_save;
9498     }
9499 
9500     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9501         uint32_t cond = dc->condexec_cond;
9502 
9503         /*
9504          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9505          * "always"; 0xf is not "never".
9506          */
9507         if (cond < 0x0e) {
9508             arm_skip_unless(dc, cond);
9509         }
9510     }
9511 
9512     if (is_16bit) {
9513         disas_thumb_insn(dc, insn);
9514     } else {
9515         disas_thumb2_insn(dc, insn);
9516     }
9517 
9518     /* Advance the Thumb condexec condition.  */
9519     if (dc->condexec_mask) {
9520         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9521                              ((dc->condexec_mask >> 4) & 1));
9522         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9523         if (dc->condexec_mask == 0) {
9524             dc->condexec_cond = 0;
9525         }
9526     }
9527 
9528     if (dc->eci && !dc->eci_handled) {
9529         /*
9530          * Insn wasn't valid for ECI/ICI at all: undo what we
9531          * just generated and instead emit an exception
9532          */
9533         tcg_remove_ops_after(insn_eci_rewind);
9534         dc->pc_save = insn_eci_pc_save;
9535         dc->condjmp = 0;
9536         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9537     }
9538 
9539     arm_post_translate_insn(dc);
9540 
9541     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9542      * will touch a new page.  This ensures that prefetch aborts occur at
9543      * the right place.
9544      *
9545      * We want to stop the TB if the next insn starts in a new page,
9546      * or if it spans between this page and the next. This means that
9547      * if we're looking at the last halfword in the page we need to
9548      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9549      * or a 32-bit Thumb insn (which won't).
9550      * This is to avoid generating a silly TB with a single 16-bit insn
9551      * in it at the end of this page (which would execute correctly
9552      * but isn't very efficient).
9553      */
9554     if (dc->base.is_jmp == DISAS_NEXT
9555         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9556             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9557                 && insn_crosses_page(env, dc)))) {
9558         dc->base.is_jmp = DISAS_TOO_MANY;
9559     }
9560 }
9561 
9562 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9563 {
9564     DisasContext *dc = container_of(dcbase, DisasContext, base);
9565 
9566     /* At this stage dc->condjmp will only be set when the skipped
9567        instruction was a conditional branch or trap, and the PC has
9568        already been written.  */
9569     gen_set_condexec(dc);
9570     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9571         /* Exception return branches need some special case code at the
9572          * end of the TB, which is complex enough that it has to
9573          * handle the single-step vs not and the condition-failed
9574          * insn codepath itself.
9575          */
9576         gen_bx_excret_final_code(dc);
9577     } else if (unlikely(dc->ss_active)) {
9578         /* Unconditional and "condition passed" instruction codepath. */
9579         switch (dc->base.is_jmp) {
9580         case DISAS_SWI:
9581             gen_ss_advance(dc);
9582             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9583             break;
9584         case DISAS_HVC:
9585             gen_ss_advance(dc);
9586             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9587             break;
9588         case DISAS_SMC:
9589             gen_ss_advance(dc);
9590             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9591             break;
9592         case DISAS_NEXT:
9593         case DISAS_TOO_MANY:
9594         case DISAS_UPDATE_EXIT:
9595         case DISAS_UPDATE_NOCHAIN:
9596             gen_update_pc(dc, curr_insn_len(dc));
9597             /* fall through */
9598         default:
9599             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9600             gen_singlestep_exception(dc);
9601             break;
9602         case DISAS_NORETURN:
9603             break;
9604         }
9605     } else {
9606         /* While branches must always occur at the end of an IT block,
9607            there are a few other things that can cause us to terminate
9608            the TB in the middle of an IT block:
9609             - Exception generating instructions (bkpt, swi, undefined).
9610             - Page boundaries.
9611             - Hardware watchpoints.
9612            Hardware breakpoints have already been handled and skip this code.
9613          */
9614         switch (dc->base.is_jmp) {
9615         case DISAS_NEXT:
9616         case DISAS_TOO_MANY:
9617             gen_goto_tb(dc, 1, curr_insn_len(dc));
9618             break;
9619         case DISAS_UPDATE_NOCHAIN:
9620             gen_update_pc(dc, curr_insn_len(dc));
9621             /* fall through */
9622         case DISAS_JUMP:
9623             gen_goto_ptr();
9624             break;
9625         case DISAS_UPDATE_EXIT:
9626             gen_update_pc(dc, curr_insn_len(dc));
9627             /* fall through */
9628         default:
9629             /* indicate that the hash table must be used to find the next TB */
9630             tcg_gen_exit_tb(NULL, 0);
9631             break;
9632         case DISAS_NORETURN:
9633             /* nothing more to generate */
9634             break;
9635         case DISAS_WFI:
9636             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9637             /*
9638              * The helper doesn't necessarily throw an exception, but we
9639              * must go back to the main loop to check for interrupts anyway.
9640              */
9641             tcg_gen_exit_tb(NULL, 0);
9642             break;
9643         case DISAS_WFE:
9644             gen_helper_wfe(cpu_env);
9645             break;
9646         case DISAS_YIELD:
9647             gen_helper_yield(cpu_env);
9648             break;
9649         case DISAS_SWI:
9650             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9651             break;
9652         case DISAS_HVC:
9653             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9654             break;
9655         case DISAS_SMC:
9656             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9657             break;
9658         }
9659     }
9660 
9661     if (dc->condjmp) {
9662         /* "Condition failed" instruction codepath for the branch/trap insn */
9663         set_disas_label(dc, dc->condlabel);
9664         gen_set_condexec(dc);
9665         if (unlikely(dc->ss_active)) {
9666             gen_update_pc(dc, curr_insn_len(dc));
9667             gen_singlestep_exception(dc);
9668         } else {
9669             gen_goto_tb(dc, 1, curr_insn_len(dc));
9670         }
9671     }
9672 }
9673 
9674 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9675                              CPUState *cpu, FILE *logfile)
9676 {
9677     DisasContext *dc = container_of(dcbase, DisasContext, base);
9678 
9679     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9680     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9681 }
9682 
9683 static const TranslatorOps arm_translator_ops = {
9684     .init_disas_context = arm_tr_init_disas_context,
9685     .tb_start           = arm_tr_tb_start,
9686     .insn_start         = arm_tr_insn_start,
9687     .translate_insn     = arm_tr_translate_insn,
9688     .tb_stop            = arm_tr_tb_stop,
9689     .disas_log          = arm_tr_disas_log,
9690 };
9691 
9692 static const TranslatorOps thumb_translator_ops = {
9693     .init_disas_context = arm_tr_init_disas_context,
9694     .tb_start           = arm_tr_tb_start,
9695     .insn_start         = arm_tr_insn_start,
9696     .translate_insn     = thumb_tr_translate_insn,
9697     .tb_stop            = arm_tr_tb_stop,
9698     .disas_log          = arm_tr_disas_log,
9699 };
9700 
9701 /* generate intermediate code for basic block 'tb'.  */
9702 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9703                            target_ulong pc, void *host_pc)
9704 {
9705     DisasContext dc = { };
9706     const TranslatorOps *ops = &arm_translator_ops;
9707     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9708 
9709     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9710         ops = &thumb_translator_ops;
9711     }
9712 #ifdef TARGET_AARCH64
9713     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9714         ops = &aarch64_translator_ops;
9715     }
9716 #endif
9717 
9718     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9719 }
9720