xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 51e47cf8)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "semihosting/semihost.h"
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 #include "exec/log.h"
36 #include "cpregs.h"
37 
38 
39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
41 /* currently all emulated v5 cores are also v5TE, so don't bother */
42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
49 
50 #include "translate.h"
51 #include "translate-a32.h"
52 
53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
55 /* These are TCG globals which alias CPUARMState fields */
56 static TCGv_i32 cpu_R[16];
57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
58 TCGv_i64 cpu_exclusive_addr;
59 TCGv_i64 cpu_exclusive_val;
60 
61 #include "exec/gen-icount.h"
62 
63 static const char * const regnames[] =
64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
66 
67 
68 /* initialize TCG globals.  */
69 void arm_translate_init(void)
70 {
71     int i;
72 
73     for (i = 0; i < 16; i++) {
74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
75                                           offsetof(CPUARMState, regs[i]),
76                                           regnames[i]);
77     }
78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
82 
83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
87 
88     a64_translate_init();
89 }
90 
91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
92 {
93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
94     switch (cmode) {
95     case 0: case 1:
96         /* no-op */
97         break;
98     case 2: case 3:
99         imm <<= 8;
100         break;
101     case 4: case 5:
102         imm <<= 16;
103         break;
104     case 6: case 7:
105         imm <<= 24;
106         break;
107     case 8: case 9:
108         imm |= imm << 16;
109         break;
110     case 10: case 11:
111         imm = (imm << 8) | (imm << 24);
112         break;
113     case 12:
114         imm = (imm << 8) | 0xff;
115         break;
116     case 13:
117         imm = (imm << 16) | 0xffff;
118         break;
119     case 14:
120         if (op) {
121             /*
122              * This and cmode == 15 op == 1 are the only cases where
123              * the top and bottom 32 bits of the encoded constant differ.
124              */
125             uint64_t imm64 = 0;
126             int n;
127 
128             for (n = 0; n < 8; n++) {
129                 if (imm & (1 << n)) {
130                     imm64 |= (0xffULL << (n * 8));
131                 }
132             }
133             return imm64;
134         }
135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
136         break;
137     case 15:
138         if (op) {
139             /* Reserved encoding for AArch32; valid for AArch64 */
140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
141             if (imm & 0x80) {
142                 imm64 |= 0x8000000000000000ULL;
143             }
144             if (imm & 0x40) {
145                 imm64 |= 0x3fc0000000000000ULL;
146             } else {
147                 imm64 |= 0x4000000000000000ULL;
148             }
149             return imm64;
150         }
151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
153         break;
154     }
155     if (op) {
156         imm = ~imm;
157     }
158     return dup_const(MO_32, imm);
159 }
160 
161 /* Generate a label used for skipping this instruction */
162 void arm_gen_condlabel(DisasContext *s)
163 {
164     if (!s->condjmp) {
165         s->condlabel = gen_disas_label(s);
166         s->condjmp = 1;
167     }
168 }
169 
170 /* Flags for the disas_set_da_iss info argument:
171  * lower bits hold the Rt register number, higher bits are flags.
172  */
173 typedef enum ISSInfo {
174     ISSNone = 0,
175     ISSRegMask = 0x1f,
176     ISSInvalid = (1 << 5),
177     ISSIsAcqRel = (1 << 6),
178     ISSIsWrite = (1 << 7),
179     ISSIs16Bit = (1 << 8),
180 } ISSInfo;
181 
182 /*
183  * Store var into env + offset to a member with size bytes.
184  * Free var after use.
185  */
186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
187 {
188     switch (size) {
189     case 1:
190         tcg_gen_st8_i32(var, cpu_env, offset);
191         break;
192     case 4:
193         tcg_gen_st_i32(var, cpu_env, offset);
194         break;
195     default:
196         g_assert_not_reached();
197     }
198 }
199 
200 /* Save the syndrome information for a Data Abort */
201 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
202 {
203     uint32_t syn;
204     int sas = memop & MO_SIZE;
205     bool sse = memop & MO_SIGN;
206     bool is_acqrel = issinfo & ISSIsAcqRel;
207     bool is_write = issinfo & ISSIsWrite;
208     bool is_16bit = issinfo & ISSIs16Bit;
209     int srt = issinfo & ISSRegMask;
210 
211     if (issinfo & ISSInvalid) {
212         /* Some callsites want to conditionally provide ISS info,
213          * eg "only if this was not a writeback"
214          */
215         return;
216     }
217 
218     if (srt == 15) {
219         /* For AArch32, insns where the src/dest is R15 never generate
220          * ISS information. Catching that here saves checking at all
221          * the call sites.
222          */
223         return;
224     }
225 
226     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
227                                   0, 0, 0, is_write, 0, is_16bit);
228     disas_set_insn_syndrome(s, syn);
229 }
230 
231 static inline int get_a32_user_mem_index(DisasContext *s)
232 {
233     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
234      * insns:
235      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
236      *  otherwise, access as if at PL0.
237      */
238     switch (s->mmu_idx) {
239     case ARMMMUIdx_E3:
240     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
241     case ARMMMUIdx_E10_0:
242     case ARMMMUIdx_E10_1:
243     case ARMMMUIdx_E10_1_PAN:
244         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
245     case ARMMMUIdx_MUser:
246     case ARMMMUIdx_MPriv:
247         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
248     case ARMMMUIdx_MUserNegPri:
249     case ARMMMUIdx_MPrivNegPri:
250         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
251     case ARMMMUIdx_MSUser:
252     case ARMMMUIdx_MSPriv:
253         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
254     case ARMMMUIdx_MSUserNegPri:
255     case ARMMMUIdx_MSPrivNegPri:
256         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
257     default:
258         g_assert_not_reached();
259     }
260 }
261 
262 /* The pc_curr difference for an architectural jump. */
263 static target_long jmp_diff(DisasContext *s, target_long diff)
264 {
265     return diff + (s->thumb ? 4 : 8);
266 }
267 
268 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
269 {
270     assert(s->pc_save != -1);
271     if (tb_cflags(s->base.tb) & CF_PCREL) {
272         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
273     } else {
274         tcg_gen_movi_i32(var, s->pc_curr + diff);
275     }
276 }
277 
278 /* Set a variable to the value of a CPU register.  */
279 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
280 {
281     if (reg == 15) {
282         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
283     } else {
284         tcg_gen_mov_i32(var, cpu_R[reg]);
285     }
286 }
287 
288 /*
289  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
290  * This is used for load/store for which use of PC implies (literal),
291  * or ADD that implies ADR.
292  */
293 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
294 {
295     TCGv_i32 tmp = tcg_temp_new_i32();
296 
297     if (reg == 15) {
298         /*
299          * This address is computed from an aligned PC:
300          * subtract off the low bits.
301          */
302         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
303     } else {
304         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
305     }
306     return tmp;
307 }
308 
309 /* Set a CPU register.  The source must be a temporary and will be
310    marked as dead.  */
311 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
312 {
313     if (reg == 15) {
314         /* In Thumb mode, we must ignore bit 0.
315          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
316          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
317          * We choose to ignore [1:0] in ARM mode for all architecture versions.
318          */
319         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
320         s->base.is_jmp = DISAS_JUMP;
321         s->pc_save = -1;
322     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
323         /* For M-profile SP bits [1:0] are always zero */
324         tcg_gen_andi_i32(var, var, ~3);
325     }
326     tcg_gen_mov_i32(cpu_R[reg], var);
327 }
328 
329 /*
330  * Variant of store_reg which applies v8M stack-limit checks before updating
331  * SP. If the check fails this will result in an exception being taken.
332  * We disable the stack checks for CONFIG_USER_ONLY because we have
333  * no idea what the stack limits should be in that case.
334  * If stack checking is not being done this just acts like store_reg().
335  */
336 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
337 {
338 #ifndef CONFIG_USER_ONLY
339     if (s->v8m_stackcheck) {
340         gen_helper_v8m_stackcheck(cpu_env, var);
341     }
342 #endif
343     store_reg(s, 13, var);
344 }
345 
346 /* Value extensions.  */
347 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
348 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
349 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
350 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
351 
352 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
353 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
354 
355 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
356 {
357     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
358 }
359 
360 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
361 {
362     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
363 
364     if (new_el) {
365         if (m_profile) {
366             gen_helper_rebuild_hflags_m32_newel(cpu_env);
367         } else {
368             gen_helper_rebuild_hflags_a32_newel(cpu_env);
369         }
370     } else {
371         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
372         if (m_profile) {
373             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
374         } else {
375             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
376         }
377     }
378 }
379 
380 static void gen_exception_internal(int excp)
381 {
382     assert(excp_is_internal(excp));
383     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
384 }
385 
386 static void gen_singlestep_exception(DisasContext *s)
387 {
388     /* We just completed step of an insn. Move from Active-not-pending
389      * to Active-pending, and then also take the swstep exception.
390      * This corresponds to making the (IMPDEF) choice to prioritize
391      * swstep exceptions over asynchronous exceptions taken to an exception
392      * level where debug is disabled. This choice has the advantage that
393      * we do not need to maintain internal state corresponding to the
394      * ISV/EX syndrome bits between completion of the step and generation
395      * of the exception, and our syndrome information is always correct.
396      */
397     gen_ss_advance(s);
398     gen_swstep_exception(s, 1, s->is_ldex);
399     s->base.is_jmp = DISAS_NORETURN;
400 }
401 
402 void clear_eci_state(DisasContext *s)
403 {
404     /*
405      * Clear any ECI/ICI state: used when a load multiple/store
406      * multiple insn executes.
407      */
408     if (s->eci) {
409         store_cpu_field_constant(0, condexec_bits);
410         s->eci = 0;
411     }
412 }
413 
414 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
415 {
416     TCGv_i32 tmp1 = tcg_temp_new_i32();
417     TCGv_i32 tmp2 = tcg_temp_new_i32();
418     tcg_gen_ext16s_i32(tmp1, a);
419     tcg_gen_ext16s_i32(tmp2, b);
420     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
421     tcg_gen_sari_i32(a, a, 16);
422     tcg_gen_sari_i32(b, b, 16);
423     tcg_gen_mul_i32(b, b, a);
424     tcg_gen_mov_i32(a, tmp1);
425 }
426 
427 /* Byteswap each halfword.  */
428 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
429 {
430     TCGv_i32 tmp = tcg_temp_new_i32();
431     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
432     tcg_gen_shri_i32(tmp, var, 8);
433     tcg_gen_and_i32(tmp, tmp, mask);
434     tcg_gen_and_i32(var, var, mask);
435     tcg_gen_shli_i32(var, var, 8);
436     tcg_gen_or_i32(dest, var, tmp);
437 }
438 
439 /* Byteswap low halfword and sign extend.  */
440 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
441 {
442     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
443 }
444 
445 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
446     tmp = (t0 ^ t1) & 0x8000;
447     t0 &= ~0x8000;
448     t1 &= ~0x8000;
449     t0 = (t0 + t1) ^ tmp;
450  */
451 
452 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
453 {
454     TCGv_i32 tmp = tcg_temp_new_i32();
455     tcg_gen_xor_i32(tmp, t0, t1);
456     tcg_gen_andi_i32(tmp, tmp, 0x8000);
457     tcg_gen_andi_i32(t0, t0, ~0x8000);
458     tcg_gen_andi_i32(t1, t1, ~0x8000);
459     tcg_gen_add_i32(t0, t0, t1);
460     tcg_gen_xor_i32(dest, t0, tmp);
461 }
462 
463 /* Set N and Z flags from var.  */
464 static inline void gen_logic_CC(TCGv_i32 var)
465 {
466     tcg_gen_mov_i32(cpu_NF, var);
467     tcg_gen_mov_i32(cpu_ZF, var);
468 }
469 
470 /* dest = T0 + T1 + CF. */
471 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
472 {
473     tcg_gen_add_i32(dest, t0, t1);
474     tcg_gen_add_i32(dest, dest, cpu_CF);
475 }
476 
477 /* dest = T0 - T1 + CF - 1.  */
478 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     tcg_gen_sub_i32(dest, t0, t1);
481     tcg_gen_add_i32(dest, dest, cpu_CF);
482     tcg_gen_subi_i32(dest, dest, 1);
483 }
484 
485 /* dest = T0 + T1. Compute C, N, V and Z flags */
486 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
487 {
488     TCGv_i32 tmp = tcg_temp_new_i32();
489     tcg_gen_movi_i32(tmp, 0);
490     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
491     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
492     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
493     tcg_gen_xor_i32(tmp, t0, t1);
494     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
495     tcg_gen_mov_i32(dest, cpu_NF);
496 }
497 
498 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
499 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
500 {
501     TCGv_i32 tmp = tcg_temp_new_i32();
502     if (TCG_TARGET_HAS_add2_i32) {
503         tcg_gen_movi_i32(tmp, 0);
504         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
505         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
506     } else {
507         TCGv_i64 q0 = tcg_temp_new_i64();
508         TCGv_i64 q1 = tcg_temp_new_i64();
509         tcg_gen_extu_i32_i64(q0, t0);
510         tcg_gen_extu_i32_i64(q1, t1);
511         tcg_gen_add_i64(q0, q0, q1);
512         tcg_gen_extu_i32_i64(q1, cpu_CF);
513         tcg_gen_add_i64(q0, q0, q1);
514         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
515     }
516     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
517     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
518     tcg_gen_xor_i32(tmp, t0, t1);
519     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
520     tcg_gen_mov_i32(dest, cpu_NF);
521 }
522 
523 /* dest = T0 - T1. Compute C, N, V and Z flags */
524 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
525 {
526     TCGv_i32 tmp;
527     tcg_gen_sub_i32(cpu_NF, t0, t1);
528     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
529     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
530     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
531     tmp = tcg_temp_new_i32();
532     tcg_gen_xor_i32(tmp, t0, t1);
533     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
534     tcg_gen_mov_i32(dest, cpu_NF);
535 }
536 
537 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
538 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
539 {
540     TCGv_i32 tmp = tcg_temp_new_i32();
541     tcg_gen_not_i32(tmp, t1);
542     gen_adc_CC(dest, t0, tmp);
543 }
544 
545 #define GEN_SHIFT(name)                                               \
546 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
547 {                                                                     \
548     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
549     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
550     TCGv_i32 zero = tcg_constant_i32(0);                              \
551     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
552     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
553     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
554     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
555 }
556 GEN_SHIFT(shl)
557 GEN_SHIFT(shr)
558 #undef GEN_SHIFT
559 
560 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
561 {
562     TCGv_i32 tmp1 = tcg_temp_new_i32();
563 
564     tcg_gen_andi_i32(tmp1, t1, 0xff);
565     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
566     tcg_gen_sar_i32(dest, t0, tmp1);
567 }
568 
569 static void shifter_out_im(TCGv_i32 var, int shift)
570 {
571     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
572 }
573 
574 /* Shift by immediate.  Includes special handling for shift == 0.  */
575 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
576                                     int shift, int flags)
577 {
578     switch (shiftop) {
579     case 0: /* LSL */
580         if (shift != 0) {
581             if (flags)
582                 shifter_out_im(var, 32 - shift);
583             tcg_gen_shli_i32(var, var, shift);
584         }
585         break;
586     case 1: /* LSR */
587         if (shift == 0) {
588             if (flags) {
589                 tcg_gen_shri_i32(cpu_CF, var, 31);
590             }
591             tcg_gen_movi_i32(var, 0);
592         } else {
593             if (flags)
594                 shifter_out_im(var, shift - 1);
595             tcg_gen_shri_i32(var, var, shift);
596         }
597         break;
598     case 2: /* ASR */
599         if (shift == 0)
600             shift = 32;
601         if (flags)
602             shifter_out_im(var, shift - 1);
603         if (shift == 32)
604           shift = 31;
605         tcg_gen_sari_i32(var, var, shift);
606         break;
607     case 3: /* ROR/RRX */
608         if (shift != 0) {
609             if (flags)
610                 shifter_out_im(var, shift - 1);
611             tcg_gen_rotri_i32(var, var, shift); break;
612         } else {
613             TCGv_i32 tmp = tcg_temp_new_i32();
614             tcg_gen_shli_i32(tmp, cpu_CF, 31);
615             if (flags)
616                 shifter_out_im(var, 0);
617             tcg_gen_shri_i32(var, var, 1);
618             tcg_gen_or_i32(var, var, tmp);
619         }
620     }
621 };
622 
623 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
624                                      TCGv_i32 shift, int flags)
625 {
626     if (flags) {
627         switch (shiftop) {
628         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
629         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
630         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
631         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
632         }
633     } else {
634         switch (shiftop) {
635         case 0:
636             gen_shl(var, var, shift);
637             break;
638         case 1:
639             gen_shr(var, var, shift);
640             break;
641         case 2:
642             gen_sar(var, var, shift);
643             break;
644         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
645                 tcg_gen_rotr_i32(var, var, shift); break;
646         }
647     }
648 }
649 
650 /*
651  * Generate a conditional based on ARM condition code cc.
652  * This is common between ARM and Aarch64 targets.
653  */
654 void arm_test_cc(DisasCompare *cmp, int cc)
655 {
656     TCGv_i32 value;
657     TCGCond cond;
658 
659     switch (cc) {
660     case 0: /* eq: Z */
661     case 1: /* ne: !Z */
662         cond = TCG_COND_EQ;
663         value = cpu_ZF;
664         break;
665 
666     case 2: /* cs: C */
667     case 3: /* cc: !C */
668         cond = TCG_COND_NE;
669         value = cpu_CF;
670         break;
671 
672     case 4: /* mi: N */
673     case 5: /* pl: !N */
674         cond = TCG_COND_LT;
675         value = cpu_NF;
676         break;
677 
678     case 6: /* vs: V */
679     case 7: /* vc: !V */
680         cond = TCG_COND_LT;
681         value = cpu_VF;
682         break;
683 
684     case 8: /* hi: C && !Z */
685     case 9: /* ls: !C || Z -> !(C && !Z) */
686         cond = TCG_COND_NE;
687         value = tcg_temp_new_i32();
688         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
689            ZF is non-zero for !Z; so AND the two subexpressions.  */
690         tcg_gen_neg_i32(value, cpu_CF);
691         tcg_gen_and_i32(value, value, cpu_ZF);
692         break;
693 
694     case 10: /* ge: N == V -> N ^ V == 0 */
695     case 11: /* lt: N != V -> N ^ V != 0 */
696         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
697         cond = TCG_COND_GE;
698         value = tcg_temp_new_i32();
699         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
700         break;
701 
702     case 12: /* gt: !Z && N == V */
703     case 13: /* le: Z || N != V */
704         cond = TCG_COND_NE;
705         value = tcg_temp_new_i32();
706         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
707          * the sign bit then AND with ZF to yield the result.  */
708         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
709         tcg_gen_sari_i32(value, value, 31);
710         tcg_gen_andc_i32(value, cpu_ZF, value);
711         break;
712 
713     case 14: /* always */
714     case 15: /* always */
715         /* Use the ALWAYS condition, which will fold early.
716          * It doesn't matter what we use for the value.  */
717         cond = TCG_COND_ALWAYS;
718         value = cpu_ZF;
719         goto no_invert;
720 
721     default:
722         fprintf(stderr, "Bad condition code 0x%x\n", cc);
723         abort();
724     }
725 
726     if (cc & 1) {
727         cond = tcg_invert_cond(cond);
728     }
729 
730  no_invert:
731     cmp->cond = cond;
732     cmp->value = value;
733 }
734 
735 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
736 {
737     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
738 }
739 
740 void arm_gen_test_cc(int cc, TCGLabel *label)
741 {
742     DisasCompare cmp;
743     arm_test_cc(&cmp, cc);
744     arm_jump_cc(&cmp, label);
745 }
746 
747 void gen_set_condexec(DisasContext *s)
748 {
749     if (s->condexec_mask) {
750         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
751 
752         store_cpu_field_constant(val, condexec_bits);
753     }
754 }
755 
756 void gen_update_pc(DisasContext *s, target_long diff)
757 {
758     gen_pc_plus_diff(s, cpu_R[15], diff);
759     s->pc_save = s->pc_curr + diff;
760 }
761 
762 /* Set PC and Thumb state from var.  var is marked as dead.  */
763 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
764 {
765     s->base.is_jmp = DISAS_JUMP;
766     tcg_gen_andi_i32(cpu_R[15], var, ~1);
767     tcg_gen_andi_i32(var, var, 1);
768     store_cpu_field(var, thumb);
769     s->pc_save = -1;
770 }
771 
772 /*
773  * Set PC and Thumb state from var. var is marked as dead.
774  * For M-profile CPUs, include logic to detect exception-return
775  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
776  * and BX reg, and no others, and happens only for code in Handler mode.
777  * The Security Extension also requires us to check for the FNC_RETURN
778  * which signals a function return from non-secure state; this can happen
779  * in both Handler and Thread mode.
780  * To avoid having to do multiple comparisons in inline generated code,
781  * we make the check we do here loose, so it will match for EXC_RETURN
782  * in Thread mode. For system emulation do_v7m_exception_exit() checks
783  * for these spurious cases and returns without doing anything (giving
784  * the same behaviour as for a branch to a non-magic address).
785  *
786  * In linux-user mode it is unclear what the right behaviour for an
787  * attempted FNC_RETURN should be, because in real hardware this will go
788  * directly to Secure code (ie not the Linux kernel) which will then treat
789  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
790  * attempt behave the way it would on a CPU without the security extension,
791  * which is to say "like a normal branch". That means we can simply treat
792  * all branches as normal with no magic address behaviour.
793  */
794 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
795 {
796     /* Generate the same code here as for a simple bx, but flag via
797      * s->base.is_jmp that we need to do the rest of the work later.
798      */
799     gen_bx(s, var);
800 #ifndef CONFIG_USER_ONLY
801     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
802         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
803         s->base.is_jmp = DISAS_BX_EXCRET;
804     }
805 #endif
806 }
807 
808 static inline void gen_bx_excret_final_code(DisasContext *s)
809 {
810     /* Generate the code to finish possible exception return and end the TB */
811     DisasLabel excret_label = gen_disas_label(s);
812     uint32_t min_magic;
813 
814     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
815         /* Covers FNC_RETURN and EXC_RETURN magic */
816         min_magic = FNC_RETURN_MIN_MAGIC;
817     } else {
818         /* EXC_RETURN magic only */
819         min_magic = EXC_RETURN_MIN_MAGIC;
820     }
821 
822     /* Is the new PC value in the magic range indicating exception return? */
823     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
824     /* No: end the TB as we would for a DISAS_JMP */
825     if (s->ss_active) {
826         gen_singlestep_exception(s);
827     } else {
828         tcg_gen_exit_tb(NULL, 0);
829     }
830     set_disas_label(s, excret_label);
831     /* Yes: this is an exception return.
832      * At this point in runtime env->regs[15] and env->thumb will hold
833      * the exception-return magic number, which do_v7m_exception_exit()
834      * will read. Nothing else will be able to see those values because
835      * the cpu-exec main loop guarantees that we will always go straight
836      * from raising the exception to the exception-handling code.
837      *
838      * gen_ss_advance(s) does nothing on M profile currently but
839      * calling it is conceptually the right thing as we have executed
840      * this instruction (compare SWI, HVC, SMC handling).
841      */
842     gen_ss_advance(s);
843     gen_exception_internal(EXCP_EXCEPTION_EXIT);
844 }
845 
846 static inline void gen_bxns(DisasContext *s, int rm)
847 {
848     TCGv_i32 var = load_reg(s, rm);
849 
850     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
851      * we need to sync state before calling it, but:
852      *  - we don't need to do gen_update_pc() because the bxns helper will
853      *    always set the PC itself
854      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
855      *    unless it's outside an IT block or the last insn in an IT block,
856      *    so we know that condexec == 0 (already set at the top of the TB)
857      *    is correct in the non-UNPREDICTABLE cases, and we can choose
858      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
859      */
860     gen_helper_v7m_bxns(cpu_env, var);
861     s->base.is_jmp = DISAS_EXIT;
862 }
863 
864 static inline void gen_blxns(DisasContext *s, int rm)
865 {
866     TCGv_i32 var = load_reg(s, rm);
867 
868     /* We don't need to sync condexec state, for the same reason as bxns.
869      * We do however need to set the PC, because the blxns helper reads it.
870      * The blxns helper may throw an exception.
871      */
872     gen_update_pc(s, curr_insn_len(s));
873     gen_helper_v7m_blxns(cpu_env, var);
874     s->base.is_jmp = DISAS_EXIT;
875 }
876 
877 /* Variant of store_reg which uses branch&exchange logic when storing
878    to r15 in ARM architecture v7 and above. The source must be a temporary
879    and will be marked as dead. */
880 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
881 {
882     if (reg == 15 && ENABLE_ARCH_7) {
883         gen_bx(s, var);
884     } else {
885         store_reg(s, reg, var);
886     }
887 }
888 
889 /* Variant of store_reg which uses branch&exchange logic when storing
890  * to r15 in ARM architecture v5T and above. This is used for storing
891  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
892  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
893 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
894 {
895     if (reg == 15 && ENABLE_ARCH_5) {
896         gen_bx_excret(s, var);
897     } else {
898         store_reg(s, reg, var);
899     }
900 }
901 
902 #ifdef CONFIG_USER_ONLY
903 #define IS_USER_ONLY 1
904 #else
905 #define IS_USER_ONLY 0
906 #endif
907 
908 MemOp pow2_align(unsigned i)
909 {
910     static const MemOp mop_align[] = {
911         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
912         /*
913          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
914          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
915          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
916          */
917         MO_ALIGN_16
918     };
919     g_assert(i < ARRAY_SIZE(mop_align));
920     return mop_align[i];
921 }
922 
923 /*
924  * Abstractions of "generate code to do a guest load/store for
925  * AArch32", where a vaddr is always 32 bits (and is zero
926  * extended if we're a 64 bit core) and  data is also
927  * 32 bits unless specifically doing a 64 bit access.
928  * These functions work like tcg_gen_qemu_{ld,st}* except
929  * that the address argument is TCGv_i32 rather than TCGv.
930  */
931 
932 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
933 {
934     TCGv addr = tcg_temp_new();
935     tcg_gen_extu_i32_tl(addr, a32);
936 
937     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
938     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
939         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
940     }
941     return addr;
942 }
943 
944 /*
945  * Internal routines are used for NEON cases where the endianness
946  * and/or alignment has already been taken into account and manipulated.
947  */
948 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
949                               TCGv_i32 a32, int index, MemOp opc)
950 {
951     TCGv addr = gen_aa32_addr(s, a32, opc);
952     tcg_gen_qemu_ld_i32(val, addr, index, opc);
953 }
954 
955 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
956                               TCGv_i32 a32, int index, MemOp opc)
957 {
958     TCGv addr = gen_aa32_addr(s, a32, opc);
959     tcg_gen_qemu_st_i32(val, addr, index, opc);
960 }
961 
962 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
963                               TCGv_i32 a32, int index, MemOp opc)
964 {
965     TCGv addr = gen_aa32_addr(s, a32, opc);
966 
967     tcg_gen_qemu_ld_i64(val, addr, index, opc);
968 
969     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
970     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
971         tcg_gen_rotri_i64(val, val, 32);
972     }
973 }
974 
975 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
976                               TCGv_i32 a32, int index, MemOp opc)
977 {
978     TCGv addr = gen_aa32_addr(s, a32, opc);
979 
980     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
981     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
982         TCGv_i64 tmp = tcg_temp_new_i64();
983         tcg_gen_rotri_i64(tmp, val, 32);
984         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
985     } else {
986         tcg_gen_qemu_st_i64(val, addr, index, opc);
987     }
988 }
989 
990 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
991                      int index, MemOp opc)
992 {
993     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
994 }
995 
996 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
997                      int index, MemOp opc)
998 {
999     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1000 }
1001 
1002 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003                      int index, MemOp opc)
1004 {
1005     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1006 }
1007 
1008 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1009                      int index, MemOp opc)
1010 {
1011     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1012 }
1013 
1014 #define DO_GEN_LD(SUFF, OPC)                                            \
1015     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1016                                          TCGv_i32 a32, int index)       \
1017     {                                                                   \
1018         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1019     }
1020 
1021 #define DO_GEN_ST(SUFF, OPC)                                            \
1022     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1023                                          TCGv_i32 a32, int index)       \
1024     {                                                                   \
1025         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1026     }
1027 
1028 static inline void gen_hvc(DisasContext *s, int imm16)
1029 {
1030     /* The pre HVC helper handles cases when HVC gets trapped
1031      * as an undefined insn by runtime configuration (ie before
1032      * the insn really executes).
1033      */
1034     gen_update_pc(s, 0);
1035     gen_helper_pre_hvc(cpu_env);
1036     /* Otherwise we will treat this as a real exception which
1037      * happens after execution of the insn. (The distinction matters
1038      * for the PC value reported to the exception handler and also
1039      * for single stepping.)
1040      */
1041     s->svc_imm = imm16;
1042     gen_update_pc(s, curr_insn_len(s));
1043     s->base.is_jmp = DISAS_HVC;
1044 }
1045 
1046 static inline void gen_smc(DisasContext *s)
1047 {
1048     /* As with HVC, we may take an exception either before or after
1049      * the insn executes.
1050      */
1051     gen_update_pc(s, 0);
1052     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1053     gen_update_pc(s, curr_insn_len(s));
1054     s->base.is_jmp = DISAS_SMC;
1055 }
1056 
1057 static void gen_exception_internal_insn(DisasContext *s, int excp)
1058 {
1059     gen_set_condexec(s);
1060     gen_update_pc(s, 0);
1061     gen_exception_internal(excp);
1062     s->base.is_jmp = DISAS_NORETURN;
1063 }
1064 
1065 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1066 {
1067     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1068                                           tcg_constant_i32(syndrome), tcg_el);
1069 }
1070 
1071 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1072 {
1073     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1074 }
1075 
1076 static void gen_exception(int excp, uint32_t syndrome)
1077 {
1078     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1079                                        tcg_constant_i32(syndrome));
1080 }
1081 
1082 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1083                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1084 {
1085     if (s->aarch64) {
1086         gen_a64_update_pc(s, pc_diff);
1087     } else {
1088         gen_set_condexec(s);
1089         gen_update_pc(s, pc_diff);
1090     }
1091     gen_exception_el_v(excp, syn, tcg_el);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094 
1095 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1096                            uint32_t syn, uint32_t target_el)
1097 {
1098     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1099                             tcg_constant_i32(target_el));
1100 }
1101 
1102 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1103                         int excp, uint32_t syn)
1104 {
1105     if (s->aarch64) {
1106         gen_a64_update_pc(s, pc_diff);
1107     } else {
1108         gen_set_condexec(s);
1109         gen_update_pc(s, pc_diff);
1110     }
1111     gen_exception(excp, syn);
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1116 {
1117     gen_set_condexec(s);
1118     gen_update_pc(s, 0);
1119     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1120     s->base.is_jmp = DISAS_NORETURN;
1121 }
1122 
1123 void unallocated_encoding(DisasContext *s)
1124 {
1125     /* Unallocated and reserved encodings are uncategorized */
1126     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1127 }
1128 
1129 /* Force a TB lookup after an instruction that changes the CPU state.  */
1130 void gen_lookup_tb(DisasContext *s)
1131 {
1132     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1133     s->base.is_jmp = DISAS_EXIT;
1134 }
1135 
1136 static inline void gen_hlt(DisasContext *s, int imm)
1137 {
1138     /* HLT. This has two purposes.
1139      * Architecturally, it is an external halting debug instruction.
1140      * Since QEMU doesn't implement external debug, we treat this as
1141      * it is required for halting debug disabled: it will UNDEF.
1142      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1143      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1144      * must trigger semihosting even for ARMv7 and earlier, where
1145      * HLT was an undefined encoding.
1146      * In system mode, we don't allow userspace access to
1147      * semihosting, to provide some semblance of security
1148      * (and for consistency with our 32-bit semihosting).
1149      */
1150     if (semihosting_enabled(s->current_el == 0) &&
1151         (imm == (s->thumb ? 0x3c : 0xf000))) {
1152         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1153         return;
1154     }
1155 
1156     unallocated_encoding(s);
1157 }
1158 
1159 /*
1160  * Return the offset of a "full" NEON Dreg.
1161  */
1162 long neon_full_reg_offset(unsigned reg)
1163 {
1164     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1165 }
1166 
1167 /*
1168  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1169  * where 0 is the least significant end of the register.
1170  */
1171 long neon_element_offset(int reg, int element, MemOp memop)
1172 {
1173     int element_size = 1 << (memop & MO_SIZE);
1174     int ofs = element * element_size;
1175 #if HOST_BIG_ENDIAN
1176     /*
1177      * Calculate the offset assuming fully little-endian,
1178      * then XOR to account for the order of the 8-byte units.
1179      */
1180     if (element_size < 8) {
1181         ofs ^= 8 - element_size;
1182     }
1183 #endif
1184     return neon_full_reg_offset(reg) + ofs;
1185 }
1186 
1187 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1188 long vfp_reg_offset(bool dp, unsigned reg)
1189 {
1190     if (dp) {
1191         return neon_element_offset(reg, 0, MO_64);
1192     } else {
1193         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1194     }
1195 }
1196 
1197 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1198 {
1199     long off = neon_element_offset(reg, ele, memop);
1200 
1201     switch (memop) {
1202     case MO_SB:
1203         tcg_gen_ld8s_i32(dest, cpu_env, off);
1204         break;
1205     case MO_UB:
1206         tcg_gen_ld8u_i32(dest, cpu_env, off);
1207         break;
1208     case MO_SW:
1209         tcg_gen_ld16s_i32(dest, cpu_env, off);
1210         break;
1211     case MO_UW:
1212         tcg_gen_ld16u_i32(dest, cpu_env, off);
1213         break;
1214     case MO_UL:
1215     case MO_SL:
1216         tcg_gen_ld_i32(dest, cpu_env, off);
1217         break;
1218     default:
1219         g_assert_not_reached();
1220     }
1221 }
1222 
1223 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1224 {
1225     long off = neon_element_offset(reg, ele, memop);
1226 
1227     switch (memop) {
1228     case MO_SL:
1229         tcg_gen_ld32s_i64(dest, cpu_env, off);
1230         break;
1231     case MO_UL:
1232         tcg_gen_ld32u_i64(dest, cpu_env, off);
1233         break;
1234     case MO_UQ:
1235         tcg_gen_ld_i64(dest, cpu_env, off);
1236         break;
1237     default:
1238         g_assert_not_reached();
1239     }
1240 }
1241 
1242 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1243 {
1244     long off = neon_element_offset(reg, ele, memop);
1245 
1246     switch (memop) {
1247     case MO_8:
1248         tcg_gen_st8_i32(src, cpu_env, off);
1249         break;
1250     case MO_16:
1251         tcg_gen_st16_i32(src, cpu_env, off);
1252         break;
1253     case MO_32:
1254         tcg_gen_st_i32(src, cpu_env, off);
1255         break;
1256     default:
1257         g_assert_not_reached();
1258     }
1259 }
1260 
1261 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1262 {
1263     long off = neon_element_offset(reg, ele, memop);
1264 
1265     switch (memop) {
1266     case MO_32:
1267         tcg_gen_st32_i64(src, cpu_env, off);
1268         break;
1269     case MO_64:
1270         tcg_gen_st_i64(src, cpu_env, off);
1271         break;
1272     default:
1273         g_assert_not_reached();
1274     }
1275 }
1276 
1277 #define ARM_CP_RW_BIT   (1 << 20)
1278 
1279 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1280 {
1281     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1282 }
1283 
1284 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1285 {
1286     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1287 }
1288 
1289 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1290 {
1291     TCGv_i32 var = tcg_temp_new_i32();
1292     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1293     return var;
1294 }
1295 
1296 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1297 {
1298     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1299 }
1300 
1301 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1302 {
1303     iwmmxt_store_reg(cpu_M0, rn);
1304 }
1305 
1306 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1307 {
1308     iwmmxt_load_reg(cpu_M0, rn);
1309 }
1310 
1311 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1312 {
1313     iwmmxt_load_reg(cpu_V1, rn);
1314     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1315 }
1316 
1317 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1318 {
1319     iwmmxt_load_reg(cpu_V1, rn);
1320     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1321 }
1322 
1323 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1324 {
1325     iwmmxt_load_reg(cpu_V1, rn);
1326     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1327 }
1328 
1329 #define IWMMXT_OP(name) \
1330 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1331 { \
1332     iwmmxt_load_reg(cpu_V1, rn); \
1333     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1334 }
1335 
1336 #define IWMMXT_OP_ENV(name) \
1337 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1338 { \
1339     iwmmxt_load_reg(cpu_V1, rn); \
1340     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1341 }
1342 
1343 #define IWMMXT_OP_ENV_SIZE(name) \
1344 IWMMXT_OP_ENV(name##b) \
1345 IWMMXT_OP_ENV(name##w) \
1346 IWMMXT_OP_ENV(name##l)
1347 
1348 #define IWMMXT_OP_ENV1(name) \
1349 static inline void gen_op_iwmmxt_##name##_M0(void) \
1350 { \
1351     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1352 }
1353 
1354 IWMMXT_OP(maddsq)
1355 IWMMXT_OP(madduq)
1356 IWMMXT_OP(sadb)
1357 IWMMXT_OP(sadw)
1358 IWMMXT_OP(mulslw)
1359 IWMMXT_OP(mulshw)
1360 IWMMXT_OP(mululw)
1361 IWMMXT_OP(muluhw)
1362 IWMMXT_OP(macsw)
1363 IWMMXT_OP(macuw)
1364 
1365 IWMMXT_OP_ENV_SIZE(unpackl)
1366 IWMMXT_OP_ENV_SIZE(unpackh)
1367 
1368 IWMMXT_OP_ENV1(unpacklub)
1369 IWMMXT_OP_ENV1(unpackluw)
1370 IWMMXT_OP_ENV1(unpacklul)
1371 IWMMXT_OP_ENV1(unpackhub)
1372 IWMMXT_OP_ENV1(unpackhuw)
1373 IWMMXT_OP_ENV1(unpackhul)
1374 IWMMXT_OP_ENV1(unpacklsb)
1375 IWMMXT_OP_ENV1(unpacklsw)
1376 IWMMXT_OP_ENV1(unpacklsl)
1377 IWMMXT_OP_ENV1(unpackhsb)
1378 IWMMXT_OP_ENV1(unpackhsw)
1379 IWMMXT_OP_ENV1(unpackhsl)
1380 
1381 IWMMXT_OP_ENV_SIZE(cmpeq)
1382 IWMMXT_OP_ENV_SIZE(cmpgtu)
1383 IWMMXT_OP_ENV_SIZE(cmpgts)
1384 
1385 IWMMXT_OP_ENV_SIZE(mins)
1386 IWMMXT_OP_ENV_SIZE(minu)
1387 IWMMXT_OP_ENV_SIZE(maxs)
1388 IWMMXT_OP_ENV_SIZE(maxu)
1389 
1390 IWMMXT_OP_ENV_SIZE(subn)
1391 IWMMXT_OP_ENV_SIZE(addn)
1392 IWMMXT_OP_ENV_SIZE(subu)
1393 IWMMXT_OP_ENV_SIZE(addu)
1394 IWMMXT_OP_ENV_SIZE(subs)
1395 IWMMXT_OP_ENV_SIZE(adds)
1396 
1397 IWMMXT_OP_ENV(avgb0)
1398 IWMMXT_OP_ENV(avgb1)
1399 IWMMXT_OP_ENV(avgw0)
1400 IWMMXT_OP_ENV(avgw1)
1401 
1402 IWMMXT_OP_ENV(packuw)
1403 IWMMXT_OP_ENV(packul)
1404 IWMMXT_OP_ENV(packuq)
1405 IWMMXT_OP_ENV(packsw)
1406 IWMMXT_OP_ENV(packsl)
1407 IWMMXT_OP_ENV(packsq)
1408 
1409 static void gen_op_iwmmxt_set_mup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 2);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_set_cup(void)
1418 {
1419     TCGv_i32 tmp;
1420     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1421     tcg_gen_ori_i32(tmp, tmp, 1);
1422     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1423 }
1424 
1425 static void gen_op_iwmmxt_setpsr_nz(void)
1426 {
1427     TCGv_i32 tmp = tcg_temp_new_i32();
1428     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1429     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1430 }
1431 
1432 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1433 {
1434     iwmmxt_load_reg(cpu_V1, rn);
1435     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1436     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1437 }
1438 
1439 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1440                                      TCGv_i32 dest)
1441 {
1442     int rd;
1443     uint32_t offset;
1444     TCGv_i32 tmp;
1445 
1446     rd = (insn >> 16) & 0xf;
1447     tmp = load_reg(s, rd);
1448 
1449     offset = (insn & 0xff) << ((insn >> 7) & 2);
1450     if (insn & (1 << 24)) {
1451         /* Pre indexed */
1452         if (insn & (1 << 23))
1453             tcg_gen_addi_i32(tmp, tmp, offset);
1454         else
1455             tcg_gen_addi_i32(tmp, tmp, -offset);
1456         tcg_gen_mov_i32(dest, tmp);
1457         if (insn & (1 << 21)) {
1458             store_reg(s, rd, tmp);
1459         }
1460     } else if (insn & (1 << 21)) {
1461         /* Post indexed */
1462         tcg_gen_mov_i32(dest, tmp);
1463         if (insn & (1 << 23))
1464             tcg_gen_addi_i32(tmp, tmp, offset);
1465         else
1466             tcg_gen_addi_i32(tmp, tmp, -offset);
1467         store_reg(s, rd, tmp);
1468     } else if (!(insn & (1 << 23)))
1469         return 1;
1470     return 0;
1471 }
1472 
1473 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1474 {
1475     int rd = (insn >> 0) & 0xf;
1476     TCGv_i32 tmp;
1477 
1478     if (insn & (1 << 8)) {
1479         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1480             return 1;
1481         } else {
1482             tmp = iwmmxt_load_creg(rd);
1483         }
1484     } else {
1485         tmp = tcg_temp_new_i32();
1486         iwmmxt_load_reg(cpu_V0, rd);
1487         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1488     }
1489     tcg_gen_andi_i32(tmp, tmp, mask);
1490     tcg_gen_mov_i32(dest, tmp);
1491     return 0;
1492 }
1493 
1494 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1495    (ie. an undefined instruction).  */
1496 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1497 {
1498     int rd, wrd;
1499     int rdhi, rdlo, rd0, rd1, i;
1500     TCGv_i32 addr;
1501     TCGv_i32 tmp, tmp2, tmp3;
1502 
1503     if ((insn & 0x0e000e00) == 0x0c000000) {
1504         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1505             wrd = insn & 0xf;
1506             rdlo = (insn >> 12) & 0xf;
1507             rdhi = (insn >> 16) & 0xf;
1508             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1509                 iwmmxt_load_reg(cpu_V0, wrd);
1510                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1511                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1512             } else {                                    /* TMCRR */
1513                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1514                 iwmmxt_store_reg(cpu_V0, wrd);
1515                 gen_op_iwmmxt_set_mup();
1516             }
1517             return 0;
1518         }
1519 
1520         wrd = (insn >> 12) & 0xf;
1521         addr = tcg_temp_new_i32();
1522         if (gen_iwmmxt_address(s, insn, addr)) {
1523             return 1;
1524         }
1525         if (insn & ARM_CP_RW_BIT) {
1526             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1527                 tmp = tcg_temp_new_i32();
1528                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1529                 iwmmxt_store_creg(wrd, tmp);
1530             } else {
1531                 i = 1;
1532                 if (insn & (1 << 8)) {
1533                     if (insn & (1 << 22)) {             /* WLDRD */
1534                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1535                         i = 0;
1536                     } else {                            /* WLDRW wRd */
1537                         tmp = tcg_temp_new_i32();
1538                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1539                     }
1540                 } else {
1541                     tmp = tcg_temp_new_i32();
1542                     if (insn & (1 << 22)) {             /* WLDRH */
1543                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1544                     } else {                            /* WLDRB */
1545                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1546                     }
1547                 }
1548                 if (i) {
1549                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1550                 }
1551                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1552             }
1553         } else {
1554             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1555                 tmp = iwmmxt_load_creg(wrd);
1556                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1557             } else {
1558                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1559                 tmp = tcg_temp_new_i32();
1560                 if (insn & (1 << 8)) {
1561                     if (insn & (1 << 22)) {             /* WSTRD */
1562                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1563                     } else {                            /* WSTRW wRd */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 } else {
1568                     if (insn & (1 << 22)) {             /* WSTRH */
1569                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1570                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1571                     } else {                            /* WSTRB */
1572                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1573                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1574                     }
1575                 }
1576             }
1577         }
1578         return 0;
1579     }
1580 
1581     if ((insn & 0x0f000000) != 0x0e000000)
1582         return 1;
1583 
1584     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1585     case 0x000:                                                 /* WOR */
1586         wrd = (insn >> 12) & 0xf;
1587         rd0 = (insn >> 0) & 0xf;
1588         rd1 = (insn >> 16) & 0xf;
1589         gen_op_iwmmxt_movq_M0_wRn(rd0);
1590         gen_op_iwmmxt_orq_M0_wRn(rd1);
1591         gen_op_iwmmxt_setpsr_nz();
1592         gen_op_iwmmxt_movq_wRn_M0(wrd);
1593         gen_op_iwmmxt_set_mup();
1594         gen_op_iwmmxt_set_cup();
1595         break;
1596     case 0x011:                                                 /* TMCR */
1597         if (insn & 0xf)
1598             return 1;
1599         rd = (insn >> 12) & 0xf;
1600         wrd = (insn >> 16) & 0xf;
1601         switch (wrd) {
1602         case ARM_IWMMXT_wCID:
1603         case ARM_IWMMXT_wCASF:
1604             break;
1605         case ARM_IWMMXT_wCon:
1606             gen_op_iwmmxt_set_cup();
1607             /* Fall through.  */
1608         case ARM_IWMMXT_wCSSF:
1609             tmp = iwmmxt_load_creg(wrd);
1610             tmp2 = load_reg(s, rd);
1611             tcg_gen_andc_i32(tmp, tmp, tmp2);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         case ARM_IWMMXT_wCGR0:
1615         case ARM_IWMMXT_wCGR1:
1616         case ARM_IWMMXT_wCGR2:
1617         case ARM_IWMMXT_wCGR3:
1618             gen_op_iwmmxt_set_cup();
1619             tmp = load_reg(s, rd);
1620             iwmmxt_store_creg(wrd, tmp);
1621             break;
1622         default:
1623             return 1;
1624         }
1625         break;
1626     case 0x100:                                                 /* WXOR */
1627         wrd = (insn >> 12) & 0xf;
1628         rd0 = (insn >> 0) & 0xf;
1629         rd1 = (insn >> 16) & 0xf;
1630         gen_op_iwmmxt_movq_M0_wRn(rd0);
1631         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1632         gen_op_iwmmxt_setpsr_nz();
1633         gen_op_iwmmxt_movq_wRn_M0(wrd);
1634         gen_op_iwmmxt_set_mup();
1635         gen_op_iwmmxt_set_cup();
1636         break;
1637     case 0x111:                                                 /* TMRC */
1638         if (insn & 0xf)
1639             return 1;
1640         rd = (insn >> 12) & 0xf;
1641         wrd = (insn >> 16) & 0xf;
1642         tmp = iwmmxt_load_creg(wrd);
1643         store_reg(s, rd, tmp);
1644         break;
1645     case 0x300:                                                 /* WANDN */
1646         wrd = (insn >> 12) & 0xf;
1647         rd0 = (insn >> 0) & 0xf;
1648         rd1 = (insn >> 16) & 0xf;
1649         gen_op_iwmmxt_movq_M0_wRn(rd0);
1650         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1651         gen_op_iwmmxt_andq_M0_wRn(rd1);
1652         gen_op_iwmmxt_setpsr_nz();
1653         gen_op_iwmmxt_movq_wRn_M0(wrd);
1654         gen_op_iwmmxt_set_mup();
1655         gen_op_iwmmxt_set_cup();
1656         break;
1657     case 0x200:                                                 /* WAND */
1658         wrd = (insn >> 12) & 0xf;
1659         rd0 = (insn >> 0) & 0xf;
1660         rd1 = (insn >> 16) & 0xf;
1661         gen_op_iwmmxt_movq_M0_wRn(rd0);
1662         gen_op_iwmmxt_andq_M0_wRn(rd1);
1663         gen_op_iwmmxt_setpsr_nz();
1664         gen_op_iwmmxt_movq_wRn_M0(wrd);
1665         gen_op_iwmmxt_set_mup();
1666         gen_op_iwmmxt_set_cup();
1667         break;
1668     case 0x810: case 0xa10:                             /* WMADD */
1669         wrd = (insn >> 12) & 0xf;
1670         rd0 = (insn >> 0) & 0xf;
1671         rd1 = (insn >> 16) & 0xf;
1672         gen_op_iwmmxt_movq_M0_wRn(rd0);
1673         if (insn & (1 << 21))
1674             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1675         else
1676             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1677         gen_op_iwmmxt_movq_wRn_M0(wrd);
1678         gen_op_iwmmxt_set_mup();
1679         break;
1680     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1681         wrd = (insn >> 12) & 0xf;
1682         rd0 = (insn >> 16) & 0xf;
1683         rd1 = (insn >> 0) & 0xf;
1684         gen_op_iwmmxt_movq_M0_wRn(rd0);
1685         switch ((insn >> 22) & 3) {
1686         case 0:
1687             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1688             break;
1689         case 1:
1690             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1691             break;
1692         case 2:
1693             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1694             break;
1695         case 3:
1696             return 1;
1697         }
1698         gen_op_iwmmxt_movq_wRn_M0(wrd);
1699         gen_op_iwmmxt_set_mup();
1700         gen_op_iwmmxt_set_cup();
1701         break;
1702     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1703         wrd = (insn >> 12) & 0xf;
1704         rd0 = (insn >> 16) & 0xf;
1705         rd1 = (insn >> 0) & 0xf;
1706         gen_op_iwmmxt_movq_M0_wRn(rd0);
1707         switch ((insn >> 22) & 3) {
1708         case 0:
1709             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1710             break;
1711         case 1:
1712             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1713             break;
1714         case 2:
1715             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1716             break;
1717         case 3:
1718             return 1;
1719         }
1720         gen_op_iwmmxt_movq_wRn_M0(wrd);
1721         gen_op_iwmmxt_set_mup();
1722         gen_op_iwmmxt_set_cup();
1723         break;
1724     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1725         wrd = (insn >> 12) & 0xf;
1726         rd0 = (insn >> 16) & 0xf;
1727         rd1 = (insn >> 0) & 0xf;
1728         gen_op_iwmmxt_movq_M0_wRn(rd0);
1729         if (insn & (1 << 22))
1730             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1731         else
1732             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1733         if (!(insn & (1 << 20)))
1734             gen_op_iwmmxt_addl_M0_wRn(wrd);
1735         gen_op_iwmmxt_movq_wRn_M0(wrd);
1736         gen_op_iwmmxt_set_mup();
1737         break;
1738     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1739         wrd = (insn >> 12) & 0xf;
1740         rd0 = (insn >> 16) & 0xf;
1741         rd1 = (insn >> 0) & 0xf;
1742         gen_op_iwmmxt_movq_M0_wRn(rd0);
1743         if (insn & (1 << 21)) {
1744             if (insn & (1 << 20))
1745                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1746             else
1747                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1748         } else {
1749             if (insn & (1 << 20))
1750                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1751             else
1752                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1753         }
1754         gen_op_iwmmxt_movq_wRn_M0(wrd);
1755         gen_op_iwmmxt_set_mup();
1756         break;
1757     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1758         wrd = (insn >> 12) & 0xf;
1759         rd0 = (insn >> 16) & 0xf;
1760         rd1 = (insn >> 0) & 0xf;
1761         gen_op_iwmmxt_movq_M0_wRn(rd0);
1762         if (insn & (1 << 21))
1763             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1764         else
1765             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1766         if (!(insn & (1 << 20))) {
1767             iwmmxt_load_reg(cpu_V1, wrd);
1768             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1769         }
1770         gen_op_iwmmxt_movq_wRn_M0(wrd);
1771         gen_op_iwmmxt_set_mup();
1772         break;
1773     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         switch ((insn >> 22) & 3) {
1779         case 0:
1780             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1781             break;
1782         case 1:
1783             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1784             break;
1785         case 2:
1786             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1787             break;
1788         case 3:
1789             return 1;
1790         }
1791         gen_op_iwmmxt_movq_wRn_M0(wrd);
1792         gen_op_iwmmxt_set_mup();
1793         gen_op_iwmmxt_set_cup();
1794         break;
1795     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1796         wrd = (insn >> 12) & 0xf;
1797         rd0 = (insn >> 16) & 0xf;
1798         rd1 = (insn >> 0) & 0xf;
1799         gen_op_iwmmxt_movq_M0_wRn(rd0);
1800         if (insn & (1 << 22)) {
1801             if (insn & (1 << 20))
1802                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1803             else
1804                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1805         } else {
1806             if (insn & (1 << 20))
1807                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1808             else
1809                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1810         }
1811         gen_op_iwmmxt_movq_wRn_M0(wrd);
1812         gen_op_iwmmxt_set_mup();
1813         gen_op_iwmmxt_set_cup();
1814         break;
1815     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1816         wrd = (insn >> 12) & 0xf;
1817         rd0 = (insn >> 16) & 0xf;
1818         rd1 = (insn >> 0) & 0xf;
1819         gen_op_iwmmxt_movq_M0_wRn(rd0);
1820         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1821         tcg_gen_andi_i32(tmp, tmp, 7);
1822         iwmmxt_load_reg(cpu_V1, rd1);
1823         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1824         gen_op_iwmmxt_movq_wRn_M0(wrd);
1825         gen_op_iwmmxt_set_mup();
1826         break;
1827     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1828         if (((insn >> 6) & 3) == 3)
1829             return 1;
1830         rd = (insn >> 12) & 0xf;
1831         wrd = (insn >> 16) & 0xf;
1832         tmp = load_reg(s, rd);
1833         gen_op_iwmmxt_movq_M0_wRn(wrd);
1834         switch ((insn >> 6) & 3) {
1835         case 0:
1836             tmp2 = tcg_constant_i32(0xff);
1837             tmp3 = tcg_constant_i32((insn & 7) << 3);
1838             break;
1839         case 1:
1840             tmp2 = tcg_constant_i32(0xffff);
1841             tmp3 = tcg_constant_i32((insn & 3) << 4);
1842             break;
1843         case 2:
1844             tmp2 = tcg_constant_i32(0xffffffff);
1845             tmp3 = tcg_constant_i32((insn & 1) << 5);
1846             break;
1847         default:
1848             g_assert_not_reached();
1849         }
1850         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1851         gen_op_iwmmxt_movq_wRn_M0(wrd);
1852         gen_op_iwmmxt_set_mup();
1853         break;
1854     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1855         rd = (insn >> 12) & 0xf;
1856         wrd = (insn >> 16) & 0xf;
1857         if (rd == 15 || ((insn >> 22) & 3) == 3)
1858             return 1;
1859         gen_op_iwmmxt_movq_M0_wRn(wrd);
1860         tmp = tcg_temp_new_i32();
1861         switch ((insn >> 22) & 3) {
1862         case 0:
1863             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1864             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1865             if (insn & 8) {
1866                 tcg_gen_ext8s_i32(tmp, tmp);
1867             } else {
1868                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1869             }
1870             break;
1871         case 1:
1872             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1873             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1874             if (insn & 8) {
1875                 tcg_gen_ext16s_i32(tmp, tmp);
1876             } else {
1877                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1878             }
1879             break;
1880         case 2:
1881             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1882             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1883             break;
1884         }
1885         store_reg(s, rd, tmp);
1886         break;
1887     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1888         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1889             return 1;
1890         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1891         switch ((insn >> 22) & 3) {
1892         case 0:
1893             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1894             break;
1895         case 1:
1896             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1897             break;
1898         case 2:
1899             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1900             break;
1901         }
1902         tcg_gen_shli_i32(tmp, tmp, 28);
1903         gen_set_nzcv(tmp);
1904         break;
1905     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1906         if (((insn >> 6) & 3) == 3)
1907             return 1;
1908         rd = (insn >> 12) & 0xf;
1909         wrd = (insn >> 16) & 0xf;
1910         tmp = load_reg(s, rd);
1911         switch ((insn >> 6) & 3) {
1912         case 0:
1913             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1914             break;
1915         case 1:
1916             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1917             break;
1918         case 2:
1919             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1920             break;
1921         }
1922         gen_op_iwmmxt_movq_wRn_M0(wrd);
1923         gen_op_iwmmxt_set_mup();
1924         break;
1925     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1926         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1927             return 1;
1928         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1929         tmp2 = tcg_temp_new_i32();
1930         tcg_gen_mov_i32(tmp2, tmp);
1931         switch ((insn >> 22) & 3) {
1932         case 0:
1933             for (i = 0; i < 7; i ++) {
1934                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1935                 tcg_gen_and_i32(tmp, tmp, tmp2);
1936             }
1937             break;
1938         case 1:
1939             for (i = 0; i < 3; i ++) {
1940                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1941                 tcg_gen_and_i32(tmp, tmp, tmp2);
1942             }
1943             break;
1944         case 2:
1945             tcg_gen_shli_i32(tmp2, tmp2, 16);
1946             tcg_gen_and_i32(tmp, tmp, tmp2);
1947             break;
1948         }
1949         gen_set_nzcv(tmp);
1950         break;
1951     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1952         wrd = (insn >> 12) & 0xf;
1953         rd0 = (insn >> 16) & 0xf;
1954         gen_op_iwmmxt_movq_M0_wRn(rd0);
1955         switch ((insn >> 22) & 3) {
1956         case 0:
1957             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1958             break;
1959         case 1:
1960             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1961             break;
1962         case 2:
1963             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1964             break;
1965         case 3:
1966             return 1;
1967         }
1968         gen_op_iwmmxt_movq_wRn_M0(wrd);
1969         gen_op_iwmmxt_set_mup();
1970         break;
1971     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1972         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1973             return 1;
1974         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1975         tmp2 = tcg_temp_new_i32();
1976         tcg_gen_mov_i32(tmp2, tmp);
1977         switch ((insn >> 22) & 3) {
1978         case 0:
1979             for (i = 0; i < 7; i ++) {
1980                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1981                 tcg_gen_or_i32(tmp, tmp, tmp2);
1982             }
1983             break;
1984         case 1:
1985             for (i = 0; i < 3; i ++) {
1986                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1987                 tcg_gen_or_i32(tmp, tmp, tmp2);
1988             }
1989             break;
1990         case 2:
1991             tcg_gen_shli_i32(tmp2, tmp2, 16);
1992             tcg_gen_or_i32(tmp, tmp, tmp2);
1993             break;
1994         }
1995         gen_set_nzcv(tmp);
1996         break;
1997     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1998         rd = (insn >> 12) & 0xf;
1999         rd0 = (insn >> 16) & 0xf;
2000         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2001             return 1;
2002         gen_op_iwmmxt_movq_M0_wRn(rd0);
2003         tmp = tcg_temp_new_i32();
2004         switch ((insn >> 22) & 3) {
2005         case 0:
2006             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2007             break;
2008         case 1:
2009             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2010             break;
2011         case 2:
2012             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2013             break;
2014         }
2015         store_reg(s, rd, tmp);
2016         break;
2017     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2018     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2019         wrd = (insn >> 12) & 0xf;
2020         rd0 = (insn >> 16) & 0xf;
2021         rd1 = (insn >> 0) & 0xf;
2022         gen_op_iwmmxt_movq_M0_wRn(rd0);
2023         switch ((insn >> 22) & 3) {
2024         case 0:
2025             if (insn & (1 << 21))
2026                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2027             else
2028                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2029             break;
2030         case 1:
2031             if (insn & (1 << 21))
2032                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2033             else
2034                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2035             break;
2036         case 2:
2037             if (insn & (1 << 21))
2038                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2039             else
2040                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2041             break;
2042         case 3:
2043             return 1;
2044         }
2045         gen_op_iwmmxt_movq_wRn_M0(wrd);
2046         gen_op_iwmmxt_set_mup();
2047         gen_op_iwmmxt_set_cup();
2048         break;
2049     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2050     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2051         wrd = (insn >> 12) & 0xf;
2052         rd0 = (insn >> 16) & 0xf;
2053         gen_op_iwmmxt_movq_M0_wRn(rd0);
2054         switch ((insn >> 22) & 3) {
2055         case 0:
2056             if (insn & (1 << 21))
2057                 gen_op_iwmmxt_unpacklsb_M0();
2058             else
2059                 gen_op_iwmmxt_unpacklub_M0();
2060             break;
2061         case 1:
2062             if (insn & (1 << 21))
2063                 gen_op_iwmmxt_unpacklsw_M0();
2064             else
2065                 gen_op_iwmmxt_unpackluw_M0();
2066             break;
2067         case 2:
2068             if (insn & (1 << 21))
2069                 gen_op_iwmmxt_unpacklsl_M0();
2070             else
2071                 gen_op_iwmmxt_unpacklul_M0();
2072             break;
2073         case 3:
2074             return 1;
2075         }
2076         gen_op_iwmmxt_movq_wRn_M0(wrd);
2077         gen_op_iwmmxt_set_mup();
2078         gen_op_iwmmxt_set_cup();
2079         break;
2080     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2081     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2082         wrd = (insn >> 12) & 0xf;
2083         rd0 = (insn >> 16) & 0xf;
2084         gen_op_iwmmxt_movq_M0_wRn(rd0);
2085         switch ((insn >> 22) & 3) {
2086         case 0:
2087             if (insn & (1 << 21))
2088                 gen_op_iwmmxt_unpackhsb_M0();
2089             else
2090                 gen_op_iwmmxt_unpackhub_M0();
2091             break;
2092         case 1:
2093             if (insn & (1 << 21))
2094                 gen_op_iwmmxt_unpackhsw_M0();
2095             else
2096                 gen_op_iwmmxt_unpackhuw_M0();
2097             break;
2098         case 2:
2099             if (insn & (1 << 21))
2100                 gen_op_iwmmxt_unpackhsl_M0();
2101             else
2102                 gen_op_iwmmxt_unpackhul_M0();
2103             break;
2104         case 3:
2105             return 1;
2106         }
2107         gen_op_iwmmxt_movq_wRn_M0(wrd);
2108         gen_op_iwmmxt_set_mup();
2109         gen_op_iwmmxt_set_cup();
2110         break;
2111     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2112     case 0x214: case 0x614: case 0xa14: case 0xe14:
2113         if (((insn >> 22) & 3) == 0)
2114             return 1;
2115         wrd = (insn >> 12) & 0xf;
2116         rd0 = (insn >> 16) & 0xf;
2117         gen_op_iwmmxt_movq_M0_wRn(rd0);
2118         tmp = tcg_temp_new_i32();
2119         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2120             return 1;
2121         }
2122         switch ((insn >> 22) & 3) {
2123         case 1:
2124             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2125             break;
2126         case 2:
2127             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2128             break;
2129         case 3:
2130             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2131             break;
2132         }
2133         gen_op_iwmmxt_movq_wRn_M0(wrd);
2134         gen_op_iwmmxt_set_mup();
2135         gen_op_iwmmxt_set_cup();
2136         break;
2137     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2138     case 0x014: case 0x414: case 0x814: case 0xc14:
2139         if (((insn >> 22) & 3) == 0)
2140             return 1;
2141         wrd = (insn >> 12) & 0xf;
2142         rd0 = (insn >> 16) & 0xf;
2143         gen_op_iwmmxt_movq_M0_wRn(rd0);
2144         tmp = tcg_temp_new_i32();
2145         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2146             return 1;
2147         }
2148         switch ((insn >> 22) & 3) {
2149         case 1:
2150             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2151             break;
2152         case 2:
2153             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2154             break;
2155         case 3:
2156             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2157             break;
2158         }
2159         gen_op_iwmmxt_movq_wRn_M0(wrd);
2160         gen_op_iwmmxt_set_mup();
2161         gen_op_iwmmxt_set_cup();
2162         break;
2163     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2164     case 0x114: case 0x514: case 0x914: case 0xd14:
2165         if (((insn >> 22) & 3) == 0)
2166             return 1;
2167         wrd = (insn >> 12) & 0xf;
2168         rd0 = (insn >> 16) & 0xf;
2169         gen_op_iwmmxt_movq_M0_wRn(rd0);
2170         tmp = tcg_temp_new_i32();
2171         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2172             return 1;
2173         }
2174         switch ((insn >> 22) & 3) {
2175         case 1:
2176             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2177             break;
2178         case 2:
2179             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2180             break;
2181         case 3:
2182             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2183             break;
2184         }
2185         gen_op_iwmmxt_movq_wRn_M0(wrd);
2186         gen_op_iwmmxt_set_mup();
2187         gen_op_iwmmxt_set_cup();
2188         break;
2189     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2190     case 0x314: case 0x714: case 0xb14: case 0xf14:
2191         if (((insn >> 22) & 3) == 0)
2192             return 1;
2193         wrd = (insn >> 12) & 0xf;
2194         rd0 = (insn >> 16) & 0xf;
2195         gen_op_iwmmxt_movq_M0_wRn(rd0);
2196         tmp = tcg_temp_new_i32();
2197         switch ((insn >> 22) & 3) {
2198         case 1:
2199             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2200                 return 1;
2201             }
2202             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2203             break;
2204         case 2:
2205             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2206                 return 1;
2207             }
2208             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2209             break;
2210         case 3:
2211             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2212                 return 1;
2213             }
2214             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2215             break;
2216         }
2217         gen_op_iwmmxt_movq_wRn_M0(wrd);
2218         gen_op_iwmmxt_set_mup();
2219         gen_op_iwmmxt_set_cup();
2220         break;
2221     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2222     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2223         wrd = (insn >> 12) & 0xf;
2224         rd0 = (insn >> 16) & 0xf;
2225         rd1 = (insn >> 0) & 0xf;
2226         gen_op_iwmmxt_movq_M0_wRn(rd0);
2227         switch ((insn >> 22) & 3) {
2228         case 0:
2229             if (insn & (1 << 21))
2230                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2231             else
2232                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2233             break;
2234         case 1:
2235             if (insn & (1 << 21))
2236                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2237             else
2238                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2239             break;
2240         case 2:
2241             if (insn & (1 << 21))
2242                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2243             else
2244                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2245             break;
2246         case 3:
2247             return 1;
2248         }
2249         gen_op_iwmmxt_movq_wRn_M0(wrd);
2250         gen_op_iwmmxt_set_mup();
2251         break;
2252     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2253     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2254         wrd = (insn >> 12) & 0xf;
2255         rd0 = (insn >> 16) & 0xf;
2256         rd1 = (insn >> 0) & 0xf;
2257         gen_op_iwmmxt_movq_M0_wRn(rd0);
2258         switch ((insn >> 22) & 3) {
2259         case 0:
2260             if (insn & (1 << 21))
2261                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2262             else
2263                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2264             break;
2265         case 1:
2266             if (insn & (1 << 21))
2267                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2268             else
2269                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2270             break;
2271         case 2:
2272             if (insn & (1 << 21))
2273                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2274             else
2275                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2276             break;
2277         case 3:
2278             return 1;
2279         }
2280         gen_op_iwmmxt_movq_wRn_M0(wrd);
2281         gen_op_iwmmxt_set_mup();
2282         break;
2283     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2284     case 0x402: case 0x502: case 0x602: case 0x702:
2285         wrd = (insn >> 12) & 0xf;
2286         rd0 = (insn >> 16) & 0xf;
2287         rd1 = (insn >> 0) & 0xf;
2288         gen_op_iwmmxt_movq_M0_wRn(rd0);
2289         iwmmxt_load_reg(cpu_V1, rd1);
2290         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2291                                 tcg_constant_i32((insn >> 20) & 3));
2292         gen_op_iwmmxt_movq_wRn_M0(wrd);
2293         gen_op_iwmmxt_set_mup();
2294         break;
2295     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2296     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2297     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2298     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2299         wrd = (insn >> 12) & 0xf;
2300         rd0 = (insn >> 16) & 0xf;
2301         rd1 = (insn >> 0) & 0xf;
2302         gen_op_iwmmxt_movq_M0_wRn(rd0);
2303         switch ((insn >> 20) & 0xf) {
2304         case 0x0:
2305             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2306             break;
2307         case 0x1:
2308             gen_op_iwmmxt_subub_M0_wRn(rd1);
2309             break;
2310         case 0x3:
2311             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2312             break;
2313         case 0x4:
2314             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2315             break;
2316         case 0x5:
2317             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2318             break;
2319         case 0x7:
2320             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2321             break;
2322         case 0x8:
2323             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2324             break;
2325         case 0x9:
2326             gen_op_iwmmxt_subul_M0_wRn(rd1);
2327             break;
2328         case 0xb:
2329             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2330             break;
2331         default:
2332             return 1;
2333         }
2334         gen_op_iwmmxt_movq_wRn_M0(wrd);
2335         gen_op_iwmmxt_set_mup();
2336         gen_op_iwmmxt_set_cup();
2337         break;
2338     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2339     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2340     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2341     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2342         wrd = (insn >> 12) & 0xf;
2343         rd0 = (insn >> 16) & 0xf;
2344         gen_op_iwmmxt_movq_M0_wRn(rd0);
2345         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2346         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2347         gen_op_iwmmxt_movq_wRn_M0(wrd);
2348         gen_op_iwmmxt_set_mup();
2349         gen_op_iwmmxt_set_cup();
2350         break;
2351     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2352     case 0x418: case 0x518: case 0x618: case 0x718:
2353     case 0x818: case 0x918: case 0xa18: case 0xb18:
2354     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         switch ((insn >> 20) & 0xf) {
2360         case 0x0:
2361             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2362             break;
2363         case 0x1:
2364             gen_op_iwmmxt_addub_M0_wRn(rd1);
2365             break;
2366         case 0x3:
2367             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2368             break;
2369         case 0x4:
2370             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2371             break;
2372         case 0x5:
2373             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2374             break;
2375         case 0x7:
2376             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2377             break;
2378         case 0x8:
2379             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2380             break;
2381         case 0x9:
2382             gen_op_iwmmxt_addul_M0_wRn(rd1);
2383             break;
2384         case 0xb:
2385             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2386             break;
2387         default:
2388             return 1;
2389         }
2390         gen_op_iwmmxt_movq_wRn_M0(wrd);
2391         gen_op_iwmmxt_set_mup();
2392         gen_op_iwmmxt_set_cup();
2393         break;
2394     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2395     case 0x408: case 0x508: case 0x608: case 0x708:
2396     case 0x808: case 0x908: case 0xa08: case 0xb08:
2397     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2398         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2399             return 1;
2400         wrd = (insn >> 12) & 0xf;
2401         rd0 = (insn >> 16) & 0xf;
2402         rd1 = (insn >> 0) & 0xf;
2403         gen_op_iwmmxt_movq_M0_wRn(rd0);
2404         switch ((insn >> 22) & 3) {
2405         case 1:
2406             if (insn & (1 << 21))
2407                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2408             else
2409                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2410             break;
2411         case 2:
2412             if (insn & (1 << 21))
2413                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2414             else
2415                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2416             break;
2417         case 3:
2418             if (insn & (1 << 21))
2419                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2420             else
2421                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2422             break;
2423         }
2424         gen_op_iwmmxt_movq_wRn_M0(wrd);
2425         gen_op_iwmmxt_set_mup();
2426         gen_op_iwmmxt_set_cup();
2427         break;
2428     case 0x201: case 0x203: case 0x205: case 0x207:
2429     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2430     case 0x211: case 0x213: case 0x215: case 0x217:
2431     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2432         wrd = (insn >> 5) & 0xf;
2433         rd0 = (insn >> 12) & 0xf;
2434         rd1 = (insn >> 0) & 0xf;
2435         if (rd0 == 0xf || rd1 == 0xf)
2436             return 1;
2437         gen_op_iwmmxt_movq_M0_wRn(wrd);
2438         tmp = load_reg(s, rd0);
2439         tmp2 = load_reg(s, rd1);
2440         switch ((insn >> 16) & 0xf) {
2441         case 0x0:                                       /* TMIA */
2442             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2443             break;
2444         case 0x8:                                       /* TMIAPH */
2445             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2446             break;
2447         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2448             if (insn & (1 << 16))
2449                 tcg_gen_shri_i32(tmp, tmp, 16);
2450             if (insn & (1 << 17))
2451                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2452             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2453             break;
2454         default:
2455             return 1;
2456         }
2457         gen_op_iwmmxt_movq_wRn_M0(wrd);
2458         gen_op_iwmmxt_set_mup();
2459         break;
2460     default:
2461         return 1;
2462     }
2463 
2464     return 0;
2465 }
2466 
2467 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2468    (ie. an undefined instruction).  */
2469 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2470 {
2471     int acc, rd0, rd1, rdhi, rdlo;
2472     TCGv_i32 tmp, tmp2;
2473 
2474     if ((insn & 0x0ff00f10) == 0x0e200010) {
2475         /* Multiply with Internal Accumulate Format */
2476         rd0 = (insn >> 12) & 0xf;
2477         rd1 = insn & 0xf;
2478         acc = (insn >> 5) & 7;
2479 
2480         if (acc != 0)
2481             return 1;
2482 
2483         tmp = load_reg(s, rd0);
2484         tmp2 = load_reg(s, rd1);
2485         switch ((insn >> 16) & 0xf) {
2486         case 0x0:                                       /* MIA */
2487             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2488             break;
2489         case 0x8:                                       /* MIAPH */
2490             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2491             break;
2492         case 0xc:                                       /* MIABB */
2493         case 0xd:                                       /* MIABT */
2494         case 0xe:                                       /* MIATB */
2495         case 0xf:                                       /* MIATT */
2496             if (insn & (1 << 16))
2497                 tcg_gen_shri_i32(tmp, tmp, 16);
2498             if (insn & (1 << 17))
2499                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2500             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2501             break;
2502         default:
2503             return 1;
2504         }
2505 
2506         gen_op_iwmmxt_movq_wRn_M0(acc);
2507         return 0;
2508     }
2509 
2510     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2511         /* Internal Accumulator Access Format */
2512         rdhi = (insn >> 16) & 0xf;
2513         rdlo = (insn >> 12) & 0xf;
2514         acc = insn & 7;
2515 
2516         if (acc != 0)
2517             return 1;
2518 
2519         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2520             iwmmxt_load_reg(cpu_V0, acc);
2521             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2522             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2523             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2524         } else {                                        /* MAR */
2525             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2526             iwmmxt_store_reg(cpu_V0, acc);
2527         }
2528         return 0;
2529     }
2530 
2531     return 1;
2532 }
2533 
2534 static void gen_goto_ptr(void)
2535 {
2536     tcg_gen_lookup_and_goto_ptr();
2537 }
2538 
2539 /* This will end the TB but doesn't guarantee we'll return to
2540  * cpu_loop_exec. Any live exit_requests will be processed as we
2541  * enter the next TB.
2542  */
2543 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2544 {
2545     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2546         /*
2547          * For pcrel, the pc must always be up-to-date on entry to
2548          * the linked TB, so that it can use simple additions for all
2549          * further adjustments.  For !pcrel, the linked TB is compiled
2550          * to know its full virtual address, so we can delay the
2551          * update to pc to the unlinked path.  A long chain of links
2552          * can thus avoid many updates to the PC.
2553          */
2554         if (tb_cflags(s->base.tb) & CF_PCREL) {
2555             gen_update_pc(s, diff);
2556             tcg_gen_goto_tb(n);
2557         } else {
2558             tcg_gen_goto_tb(n);
2559             gen_update_pc(s, diff);
2560         }
2561         tcg_gen_exit_tb(s->base.tb, n);
2562     } else {
2563         gen_update_pc(s, diff);
2564         gen_goto_ptr();
2565     }
2566     s->base.is_jmp = DISAS_NORETURN;
2567 }
2568 
2569 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2570 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2571 {
2572     if (unlikely(s->ss_active)) {
2573         /* An indirect jump so that we still trigger the debug exception.  */
2574         gen_update_pc(s, diff);
2575         s->base.is_jmp = DISAS_JUMP;
2576         return;
2577     }
2578     switch (s->base.is_jmp) {
2579     case DISAS_NEXT:
2580     case DISAS_TOO_MANY:
2581     case DISAS_NORETURN:
2582         /*
2583          * The normal case: just go to the destination TB.
2584          * NB: NORETURN happens if we generate code like
2585          *    gen_brcondi(l);
2586          *    gen_jmp();
2587          *    gen_set_label(l);
2588          *    gen_jmp();
2589          * on the second call to gen_jmp().
2590          */
2591         gen_goto_tb(s, tbno, diff);
2592         break;
2593     case DISAS_UPDATE_NOCHAIN:
2594     case DISAS_UPDATE_EXIT:
2595         /*
2596          * We already decided we're leaving the TB for some other reason.
2597          * Avoid using goto_tb so we really do exit back to the main loop
2598          * and don't chain to another TB.
2599          */
2600         gen_update_pc(s, diff);
2601         gen_goto_ptr();
2602         s->base.is_jmp = DISAS_NORETURN;
2603         break;
2604     default:
2605         /*
2606          * We shouldn't be emitting code for a jump and also have
2607          * is_jmp set to one of the special cases like DISAS_SWI.
2608          */
2609         g_assert_not_reached();
2610     }
2611 }
2612 
2613 static inline void gen_jmp(DisasContext *s, target_long diff)
2614 {
2615     gen_jmp_tb(s, diff, 0);
2616 }
2617 
2618 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2619 {
2620     if (x)
2621         tcg_gen_sari_i32(t0, t0, 16);
2622     else
2623         gen_sxth(t0);
2624     if (y)
2625         tcg_gen_sari_i32(t1, t1, 16);
2626     else
2627         gen_sxth(t1);
2628     tcg_gen_mul_i32(t0, t0, t1);
2629 }
2630 
2631 /* Return the mask of PSR bits set by a MSR instruction.  */
2632 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2633 {
2634     uint32_t mask = 0;
2635 
2636     if (flags & (1 << 0)) {
2637         mask |= 0xff;
2638     }
2639     if (flags & (1 << 1)) {
2640         mask |= 0xff00;
2641     }
2642     if (flags & (1 << 2)) {
2643         mask |= 0xff0000;
2644     }
2645     if (flags & (1 << 3)) {
2646         mask |= 0xff000000;
2647     }
2648 
2649     /* Mask out undefined and reserved bits.  */
2650     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2651 
2652     /* Mask out execution state.  */
2653     if (!spsr) {
2654         mask &= ~CPSR_EXEC;
2655     }
2656 
2657     /* Mask out privileged bits.  */
2658     if (IS_USER(s)) {
2659         mask &= CPSR_USER;
2660     }
2661     return mask;
2662 }
2663 
2664 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2665 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2666 {
2667     TCGv_i32 tmp;
2668     if (spsr) {
2669         /* ??? This is also undefined in system mode.  */
2670         if (IS_USER(s))
2671             return 1;
2672 
2673         tmp = load_cpu_field(spsr);
2674         tcg_gen_andi_i32(tmp, tmp, ~mask);
2675         tcg_gen_andi_i32(t0, t0, mask);
2676         tcg_gen_or_i32(tmp, tmp, t0);
2677         store_cpu_field(tmp, spsr);
2678     } else {
2679         gen_set_cpsr(t0, mask);
2680     }
2681     gen_lookup_tb(s);
2682     return 0;
2683 }
2684 
2685 /* Returns nonzero if access to the PSR is not permitted.  */
2686 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2687 {
2688     TCGv_i32 tmp;
2689     tmp = tcg_temp_new_i32();
2690     tcg_gen_movi_i32(tmp, val);
2691     return gen_set_psr(s, mask, spsr, tmp);
2692 }
2693 
2694 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2695                                      int *tgtmode, int *regno)
2696 {
2697     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2698      * the target mode and register number, and identify the various
2699      * unpredictable cases.
2700      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2701      *  + executed in user mode
2702      *  + using R15 as the src/dest register
2703      *  + accessing an unimplemented register
2704      *  + accessing a register that's inaccessible at current PL/security state*
2705      *  + accessing a register that you could access with a different insn
2706      * We choose to UNDEF in all these cases.
2707      * Since we don't know which of the various AArch32 modes we are in
2708      * we have to defer some checks to runtime.
2709      * Accesses to Monitor mode registers from Secure EL1 (which implies
2710      * that EL3 is AArch64) must trap to EL3.
2711      *
2712      * If the access checks fail this function will emit code to take
2713      * an exception and return false. Otherwise it will return true,
2714      * and set *tgtmode and *regno appropriately.
2715      */
2716     /* These instructions are present only in ARMv8, or in ARMv7 with the
2717      * Virtualization Extensions.
2718      */
2719     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2720         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2721         goto undef;
2722     }
2723 
2724     if (IS_USER(s) || rn == 15) {
2725         goto undef;
2726     }
2727 
2728     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2729      * of registers into (r, sysm).
2730      */
2731     if (r) {
2732         /* SPSRs for other modes */
2733         switch (sysm) {
2734         case 0xe: /* SPSR_fiq */
2735             *tgtmode = ARM_CPU_MODE_FIQ;
2736             break;
2737         case 0x10: /* SPSR_irq */
2738             *tgtmode = ARM_CPU_MODE_IRQ;
2739             break;
2740         case 0x12: /* SPSR_svc */
2741             *tgtmode = ARM_CPU_MODE_SVC;
2742             break;
2743         case 0x14: /* SPSR_abt */
2744             *tgtmode = ARM_CPU_MODE_ABT;
2745             break;
2746         case 0x16: /* SPSR_und */
2747             *tgtmode = ARM_CPU_MODE_UND;
2748             break;
2749         case 0x1c: /* SPSR_mon */
2750             *tgtmode = ARM_CPU_MODE_MON;
2751             break;
2752         case 0x1e: /* SPSR_hyp */
2753             *tgtmode = ARM_CPU_MODE_HYP;
2754             break;
2755         default: /* unallocated */
2756             goto undef;
2757         }
2758         /* We arbitrarily assign SPSR a register number of 16. */
2759         *regno = 16;
2760     } else {
2761         /* general purpose registers for other modes */
2762         switch (sysm) {
2763         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2764             *tgtmode = ARM_CPU_MODE_USR;
2765             *regno = sysm + 8;
2766             break;
2767         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2768             *tgtmode = ARM_CPU_MODE_FIQ;
2769             *regno = sysm;
2770             break;
2771         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2772             *tgtmode = ARM_CPU_MODE_IRQ;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2776             *tgtmode = ARM_CPU_MODE_SVC;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2780             *tgtmode = ARM_CPU_MODE_ABT;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2784             *tgtmode = ARM_CPU_MODE_UND;
2785             *regno = sysm & 1 ? 13 : 14;
2786             break;
2787         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2788             *tgtmode = ARM_CPU_MODE_MON;
2789             *regno = sysm & 1 ? 13 : 14;
2790             break;
2791         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2792             *tgtmode = ARM_CPU_MODE_HYP;
2793             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2794             *regno = sysm & 1 ? 13 : 17;
2795             break;
2796         default: /* unallocated */
2797             goto undef;
2798         }
2799     }
2800 
2801     /* Catch the 'accessing inaccessible register' cases we can detect
2802      * at translate time.
2803      */
2804     switch (*tgtmode) {
2805     case ARM_CPU_MODE_MON:
2806         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2807             goto undef;
2808         }
2809         if (s->current_el == 1) {
2810             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2811              * then accesses to Mon registers trap to Secure EL2, if it exists,
2812              * otherwise EL3.
2813              */
2814             TCGv_i32 tcg_el;
2815 
2816             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2817                 dc_isar_feature(aa64_sel2, s)) {
2818                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2819                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2820                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2821                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2822             } else {
2823                 tcg_el = tcg_constant_i32(3);
2824             }
2825 
2826             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2827                                     syn_uncategorized(), tcg_el);
2828             return false;
2829         }
2830         break;
2831     case ARM_CPU_MODE_HYP:
2832         /*
2833          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2834          * (and so we can forbid accesses from EL2 or below). elr_hyp
2835          * can be accessed also from Hyp mode, so forbid accesses from
2836          * EL0 or EL1.
2837          */
2838         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2839             (s->current_el < 3 && *regno != 17)) {
2840             goto undef;
2841         }
2842         break;
2843     default:
2844         break;
2845     }
2846 
2847     return true;
2848 
2849 undef:
2850     /* If we get here then some access check did not pass */
2851     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2852     return false;
2853 }
2854 
2855 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2856 {
2857     TCGv_i32 tcg_reg;
2858     int tgtmode = 0, regno = 0;
2859 
2860     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2861         return;
2862     }
2863 
2864     /* Sync state because msr_banked() can raise exceptions */
2865     gen_set_condexec(s);
2866     gen_update_pc(s, 0);
2867     tcg_reg = load_reg(s, rn);
2868     gen_helper_msr_banked(cpu_env, tcg_reg,
2869                           tcg_constant_i32(tgtmode),
2870                           tcg_constant_i32(regno));
2871     s->base.is_jmp = DISAS_UPDATE_EXIT;
2872 }
2873 
2874 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2875 {
2876     TCGv_i32 tcg_reg;
2877     int tgtmode = 0, regno = 0;
2878 
2879     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2880         return;
2881     }
2882 
2883     /* Sync state because mrs_banked() can raise exceptions */
2884     gen_set_condexec(s);
2885     gen_update_pc(s, 0);
2886     tcg_reg = tcg_temp_new_i32();
2887     gen_helper_mrs_banked(tcg_reg, cpu_env,
2888                           tcg_constant_i32(tgtmode),
2889                           tcg_constant_i32(regno));
2890     store_reg(s, rn, tcg_reg);
2891     s->base.is_jmp = DISAS_UPDATE_EXIT;
2892 }
2893 
2894 /* Store value to PC as for an exception return (ie don't
2895  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2896  * will do the masking based on the new value of the Thumb bit.
2897  */
2898 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2899 {
2900     tcg_gen_mov_i32(cpu_R[15], pc);
2901 }
2902 
2903 /* Generate a v6 exception return.  Marks both values as dead.  */
2904 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2905 {
2906     store_pc_exc_ret(s, pc);
2907     /* The cpsr_write_eret helper will mask the low bits of PC
2908      * appropriately depending on the new Thumb bit, so it must
2909      * be called after storing the new PC.
2910      */
2911     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2912         gen_io_start();
2913     }
2914     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2915     /* Must exit loop to check un-masked IRQs */
2916     s->base.is_jmp = DISAS_EXIT;
2917 }
2918 
2919 /* Generate an old-style exception return. Marks pc as dead. */
2920 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2921 {
2922     gen_rfe(s, pc, load_cpu_field(spsr));
2923 }
2924 
2925 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2926                             uint32_t opr_sz, uint32_t max_sz,
2927                             gen_helper_gvec_3_ptr *fn)
2928 {
2929     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2930 
2931     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2932     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2933                        opr_sz, max_sz, 0, fn);
2934 }
2935 
2936 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2947                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2948 {
2949     static gen_helper_gvec_3_ptr * const fns[2] = {
2950         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2951     };
2952     tcg_debug_assert(vece >= 1 && vece <= 2);
2953     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2954 }
2955 
2956 #define GEN_CMP0(NAME, COND)                                            \
2957     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2958     {                                                                   \
2959         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2960         tcg_gen_neg_i32(d, d);                                          \
2961     }                                                                   \
2962     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2963     {                                                                   \
2964         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2965         tcg_gen_neg_i64(d, d);                                          \
2966     }                                                                   \
2967     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2968     {                                                                   \
2969         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2970         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2971     }                                                                   \
2972     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2973                             uint32_t opr_sz, uint32_t max_sz)           \
2974     {                                                                   \
2975         const GVecGen2 op[4] = {                                        \
2976             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2977               .fniv = gen_##NAME##0_vec,                                \
2978               .opt_opc = vecop_list_cmp,                                \
2979               .vece = MO_8 },                                           \
2980             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2981               .fniv = gen_##NAME##0_vec,                                \
2982               .opt_opc = vecop_list_cmp,                                \
2983               .vece = MO_16 },                                          \
2984             { .fni4 = gen_##NAME##0_i32,                                \
2985               .fniv = gen_##NAME##0_vec,                                \
2986               .opt_opc = vecop_list_cmp,                                \
2987               .vece = MO_32 },                                          \
2988             { .fni8 = gen_##NAME##0_i64,                                \
2989               .fniv = gen_##NAME##0_vec,                                \
2990               .opt_opc = vecop_list_cmp,                                \
2991               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2992               .vece = MO_64 },                                          \
2993         };                                                              \
2994         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2995     }
2996 
2997 static const TCGOpcode vecop_list_cmp[] = {
2998     INDEX_op_cmp_vec, 0
2999 };
3000 
3001 GEN_CMP0(ceq, TCG_COND_EQ)
3002 GEN_CMP0(cle, TCG_COND_LE)
3003 GEN_CMP0(cge, TCG_COND_GE)
3004 GEN_CMP0(clt, TCG_COND_LT)
3005 GEN_CMP0(cgt, TCG_COND_GT)
3006 
3007 #undef GEN_CMP0
3008 
3009 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3010 {
3011     tcg_gen_vec_sar8i_i64(a, a, shift);
3012     tcg_gen_vec_add8_i64(d, d, a);
3013 }
3014 
3015 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016 {
3017     tcg_gen_vec_sar16i_i64(a, a, shift);
3018     tcg_gen_vec_add16_i64(d, d, a);
3019 }
3020 
3021 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3022 {
3023     tcg_gen_sari_i32(a, a, shift);
3024     tcg_gen_add_i32(d, d, a);
3025 }
3026 
3027 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3028 {
3029     tcg_gen_sari_i64(a, a, shift);
3030     tcg_gen_add_i64(d, d, a);
3031 }
3032 
3033 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3034 {
3035     tcg_gen_sari_vec(vece, a, a, sh);
3036     tcg_gen_add_vec(vece, d, d, a);
3037 }
3038 
3039 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3040                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3041 {
3042     static const TCGOpcode vecop_list[] = {
3043         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3044     };
3045     static const GVecGen2i ops[4] = {
3046         { .fni8 = gen_ssra8_i64,
3047           .fniv = gen_ssra_vec,
3048           .fno = gen_helper_gvec_ssra_b,
3049           .load_dest = true,
3050           .opt_opc = vecop_list,
3051           .vece = MO_8 },
3052         { .fni8 = gen_ssra16_i64,
3053           .fniv = gen_ssra_vec,
3054           .fno = gen_helper_gvec_ssra_h,
3055           .load_dest = true,
3056           .opt_opc = vecop_list,
3057           .vece = MO_16 },
3058         { .fni4 = gen_ssra32_i32,
3059           .fniv = gen_ssra_vec,
3060           .fno = gen_helper_gvec_ssra_s,
3061           .load_dest = true,
3062           .opt_opc = vecop_list,
3063           .vece = MO_32 },
3064         { .fni8 = gen_ssra64_i64,
3065           .fniv = gen_ssra_vec,
3066           .fno = gen_helper_gvec_ssra_b,
3067           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3068           .opt_opc = vecop_list,
3069           .load_dest = true,
3070           .vece = MO_64 },
3071     };
3072 
3073     /* tszimm encoding produces immediates in the range [1..esize]. */
3074     tcg_debug_assert(shift > 0);
3075     tcg_debug_assert(shift <= (8 << vece));
3076 
3077     /*
3078      * Shifts larger than the element size are architecturally valid.
3079      * Signed results in all sign bits.
3080      */
3081     shift = MIN(shift, (8 << vece) - 1);
3082     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3083 }
3084 
3085 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3086 {
3087     tcg_gen_vec_shr8i_i64(a, a, shift);
3088     tcg_gen_vec_add8_i64(d, d, a);
3089 }
3090 
3091 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3092 {
3093     tcg_gen_vec_shr16i_i64(a, a, shift);
3094     tcg_gen_vec_add16_i64(d, d, a);
3095 }
3096 
3097 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3098 {
3099     tcg_gen_shri_i32(a, a, shift);
3100     tcg_gen_add_i32(d, d, a);
3101 }
3102 
3103 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3104 {
3105     tcg_gen_shri_i64(a, a, shift);
3106     tcg_gen_add_i64(d, d, a);
3107 }
3108 
3109 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3110 {
3111     tcg_gen_shri_vec(vece, a, a, sh);
3112     tcg_gen_add_vec(vece, d, d, a);
3113 }
3114 
3115 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3116                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3117 {
3118     static const TCGOpcode vecop_list[] = {
3119         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3120     };
3121     static const GVecGen2i ops[4] = {
3122         { .fni8 = gen_usra8_i64,
3123           .fniv = gen_usra_vec,
3124           .fno = gen_helper_gvec_usra_b,
3125           .load_dest = true,
3126           .opt_opc = vecop_list,
3127           .vece = MO_8, },
3128         { .fni8 = gen_usra16_i64,
3129           .fniv = gen_usra_vec,
3130           .fno = gen_helper_gvec_usra_h,
3131           .load_dest = true,
3132           .opt_opc = vecop_list,
3133           .vece = MO_16, },
3134         { .fni4 = gen_usra32_i32,
3135           .fniv = gen_usra_vec,
3136           .fno = gen_helper_gvec_usra_s,
3137           .load_dest = true,
3138           .opt_opc = vecop_list,
3139           .vece = MO_32, },
3140         { .fni8 = gen_usra64_i64,
3141           .fniv = gen_usra_vec,
3142           .fno = gen_helper_gvec_usra_d,
3143           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3144           .load_dest = true,
3145           .opt_opc = vecop_list,
3146           .vece = MO_64, },
3147     };
3148 
3149     /* tszimm encoding produces immediates in the range [1..esize]. */
3150     tcg_debug_assert(shift > 0);
3151     tcg_debug_assert(shift <= (8 << vece));
3152 
3153     /*
3154      * Shifts larger than the element size are architecturally valid.
3155      * Unsigned results in all zeros as input to accumulate: nop.
3156      */
3157     if (shift < (8 << vece)) {
3158         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3159     } else {
3160         /* Nop, but we do need to clear the tail. */
3161         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3162     }
3163 }
3164 
3165 /*
3166  * Shift one less than the requested amount, and the low bit is
3167  * the rounding bit.  For the 8 and 16-bit operations, because we
3168  * mask the low bit, we can perform a normal integer shift instead
3169  * of a vector shift.
3170  */
3171 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172 {
3173     TCGv_i64 t = tcg_temp_new_i64();
3174 
3175     tcg_gen_shri_i64(t, a, sh - 1);
3176     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3177     tcg_gen_vec_sar8i_i64(d, a, sh);
3178     tcg_gen_vec_add8_i64(d, d, t);
3179 }
3180 
3181 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3182 {
3183     TCGv_i64 t = tcg_temp_new_i64();
3184 
3185     tcg_gen_shri_i64(t, a, sh - 1);
3186     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3187     tcg_gen_vec_sar16i_i64(d, a, sh);
3188     tcg_gen_vec_add16_i64(d, d, t);
3189 }
3190 
3191 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3192 {
3193     TCGv_i32 t;
3194 
3195     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3196     if (sh == 32) {
3197         tcg_gen_movi_i32(d, 0);
3198         return;
3199     }
3200     t = tcg_temp_new_i32();
3201     tcg_gen_extract_i32(t, a, sh - 1, 1);
3202     tcg_gen_sari_i32(d, a, sh);
3203     tcg_gen_add_i32(d, d, t);
3204 }
3205 
3206 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3207 {
3208     TCGv_i64 t = tcg_temp_new_i64();
3209 
3210     tcg_gen_extract_i64(t, a, sh - 1, 1);
3211     tcg_gen_sari_i64(d, a, sh);
3212     tcg_gen_add_i64(d, d, t);
3213 }
3214 
3215 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3216 {
3217     TCGv_vec t = tcg_temp_new_vec_matching(d);
3218     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3219 
3220     tcg_gen_shri_vec(vece, t, a, sh - 1);
3221     tcg_gen_dupi_vec(vece, ones, 1);
3222     tcg_gen_and_vec(vece, t, t, ones);
3223     tcg_gen_sari_vec(vece, d, a, sh);
3224     tcg_gen_add_vec(vece, d, d, t);
3225 }
3226 
3227 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3228                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3229 {
3230     static const TCGOpcode vecop_list[] = {
3231         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3232     };
3233     static const GVecGen2i ops[4] = {
3234         { .fni8 = gen_srshr8_i64,
3235           .fniv = gen_srshr_vec,
3236           .fno = gen_helper_gvec_srshr_b,
3237           .opt_opc = vecop_list,
3238           .vece = MO_8 },
3239         { .fni8 = gen_srshr16_i64,
3240           .fniv = gen_srshr_vec,
3241           .fno = gen_helper_gvec_srshr_h,
3242           .opt_opc = vecop_list,
3243           .vece = MO_16 },
3244         { .fni4 = gen_srshr32_i32,
3245           .fniv = gen_srshr_vec,
3246           .fno = gen_helper_gvec_srshr_s,
3247           .opt_opc = vecop_list,
3248           .vece = MO_32 },
3249         { .fni8 = gen_srshr64_i64,
3250           .fniv = gen_srshr_vec,
3251           .fno = gen_helper_gvec_srshr_d,
3252           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3253           .opt_opc = vecop_list,
3254           .vece = MO_64 },
3255     };
3256 
3257     /* tszimm encoding produces immediates in the range [1..esize] */
3258     tcg_debug_assert(shift > 0);
3259     tcg_debug_assert(shift <= (8 << vece));
3260 
3261     if (shift == (8 << vece)) {
3262         /*
3263          * Shifts larger than the element size are architecturally valid.
3264          * Signed results in all sign bits.  With rounding, this produces
3265          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3266          * I.e. always zero.
3267          */
3268         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3269     } else {
3270         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3271     }
3272 }
3273 
3274 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3275 {
3276     TCGv_i64 t = tcg_temp_new_i64();
3277 
3278     gen_srshr8_i64(t, a, sh);
3279     tcg_gen_vec_add8_i64(d, d, t);
3280 }
3281 
3282 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3283 {
3284     TCGv_i64 t = tcg_temp_new_i64();
3285 
3286     gen_srshr16_i64(t, a, sh);
3287     tcg_gen_vec_add16_i64(d, d, t);
3288 }
3289 
3290 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3291 {
3292     TCGv_i32 t = tcg_temp_new_i32();
3293 
3294     gen_srshr32_i32(t, a, sh);
3295     tcg_gen_add_i32(d, d, t);
3296 }
3297 
3298 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3299 {
3300     TCGv_i64 t = tcg_temp_new_i64();
3301 
3302     gen_srshr64_i64(t, a, sh);
3303     tcg_gen_add_i64(d, d, t);
3304 }
3305 
3306 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3307 {
3308     TCGv_vec t = tcg_temp_new_vec_matching(d);
3309 
3310     gen_srshr_vec(vece, t, a, sh);
3311     tcg_gen_add_vec(vece, d, d, t);
3312 }
3313 
3314 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3315                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3316 {
3317     static const TCGOpcode vecop_list[] = {
3318         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3319     };
3320     static const GVecGen2i ops[4] = {
3321         { .fni8 = gen_srsra8_i64,
3322           .fniv = gen_srsra_vec,
3323           .fno = gen_helper_gvec_srsra_b,
3324           .opt_opc = vecop_list,
3325           .load_dest = true,
3326           .vece = MO_8 },
3327         { .fni8 = gen_srsra16_i64,
3328           .fniv = gen_srsra_vec,
3329           .fno = gen_helper_gvec_srsra_h,
3330           .opt_opc = vecop_list,
3331           .load_dest = true,
3332           .vece = MO_16 },
3333         { .fni4 = gen_srsra32_i32,
3334           .fniv = gen_srsra_vec,
3335           .fno = gen_helper_gvec_srsra_s,
3336           .opt_opc = vecop_list,
3337           .load_dest = true,
3338           .vece = MO_32 },
3339         { .fni8 = gen_srsra64_i64,
3340           .fniv = gen_srsra_vec,
3341           .fno = gen_helper_gvec_srsra_d,
3342           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3343           .opt_opc = vecop_list,
3344           .load_dest = true,
3345           .vece = MO_64 },
3346     };
3347 
3348     /* tszimm encoding produces immediates in the range [1..esize] */
3349     tcg_debug_assert(shift > 0);
3350     tcg_debug_assert(shift <= (8 << vece));
3351 
3352     /*
3353      * Shifts larger than the element size are architecturally valid.
3354      * Signed results in all sign bits.  With rounding, this produces
3355      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3356      * I.e. always zero.  With accumulation, this leaves D unchanged.
3357      */
3358     if (shift == (8 << vece)) {
3359         /* Nop, but we do need to clear the tail. */
3360         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3361     } else {
3362         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3363     }
3364 }
3365 
3366 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3367 {
3368     TCGv_i64 t = tcg_temp_new_i64();
3369 
3370     tcg_gen_shri_i64(t, a, sh - 1);
3371     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3372     tcg_gen_vec_shr8i_i64(d, a, sh);
3373     tcg_gen_vec_add8_i64(d, d, t);
3374 }
3375 
3376 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3377 {
3378     TCGv_i64 t = tcg_temp_new_i64();
3379 
3380     tcg_gen_shri_i64(t, a, sh - 1);
3381     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3382     tcg_gen_vec_shr16i_i64(d, a, sh);
3383     tcg_gen_vec_add16_i64(d, d, t);
3384 }
3385 
3386 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3387 {
3388     TCGv_i32 t;
3389 
3390     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3391     if (sh == 32) {
3392         tcg_gen_extract_i32(d, a, sh - 1, 1);
3393         return;
3394     }
3395     t = tcg_temp_new_i32();
3396     tcg_gen_extract_i32(t, a, sh - 1, 1);
3397     tcg_gen_shri_i32(d, a, sh);
3398     tcg_gen_add_i32(d, d, t);
3399 }
3400 
3401 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3402 {
3403     TCGv_i64 t = tcg_temp_new_i64();
3404 
3405     tcg_gen_extract_i64(t, a, sh - 1, 1);
3406     tcg_gen_shri_i64(d, a, sh);
3407     tcg_gen_add_i64(d, d, t);
3408 }
3409 
3410 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3411 {
3412     TCGv_vec t = tcg_temp_new_vec_matching(d);
3413     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3414 
3415     tcg_gen_shri_vec(vece, t, a, shift - 1);
3416     tcg_gen_dupi_vec(vece, ones, 1);
3417     tcg_gen_and_vec(vece, t, t, ones);
3418     tcg_gen_shri_vec(vece, d, a, shift);
3419     tcg_gen_add_vec(vece, d, d, t);
3420 }
3421 
3422 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3423                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3424 {
3425     static const TCGOpcode vecop_list[] = {
3426         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3427     };
3428     static const GVecGen2i ops[4] = {
3429         { .fni8 = gen_urshr8_i64,
3430           .fniv = gen_urshr_vec,
3431           .fno = gen_helper_gvec_urshr_b,
3432           .opt_opc = vecop_list,
3433           .vece = MO_8 },
3434         { .fni8 = gen_urshr16_i64,
3435           .fniv = gen_urshr_vec,
3436           .fno = gen_helper_gvec_urshr_h,
3437           .opt_opc = vecop_list,
3438           .vece = MO_16 },
3439         { .fni4 = gen_urshr32_i32,
3440           .fniv = gen_urshr_vec,
3441           .fno = gen_helper_gvec_urshr_s,
3442           .opt_opc = vecop_list,
3443           .vece = MO_32 },
3444         { .fni8 = gen_urshr64_i64,
3445           .fniv = gen_urshr_vec,
3446           .fno = gen_helper_gvec_urshr_d,
3447           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3448           .opt_opc = vecop_list,
3449           .vece = MO_64 },
3450     };
3451 
3452     /* tszimm encoding produces immediates in the range [1..esize] */
3453     tcg_debug_assert(shift > 0);
3454     tcg_debug_assert(shift <= (8 << vece));
3455 
3456     if (shift == (8 << vece)) {
3457         /*
3458          * Shifts larger than the element size are architecturally valid.
3459          * Unsigned results in zero.  With rounding, this produces a
3460          * copy of the most significant bit.
3461          */
3462         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3463     } else {
3464         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3465     }
3466 }
3467 
3468 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3469 {
3470     TCGv_i64 t = tcg_temp_new_i64();
3471 
3472     if (sh == 8) {
3473         tcg_gen_vec_shr8i_i64(t, a, 7);
3474     } else {
3475         gen_urshr8_i64(t, a, sh);
3476     }
3477     tcg_gen_vec_add8_i64(d, d, t);
3478 }
3479 
3480 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3481 {
3482     TCGv_i64 t = tcg_temp_new_i64();
3483 
3484     if (sh == 16) {
3485         tcg_gen_vec_shr16i_i64(t, a, 15);
3486     } else {
3487         gen_urshr16_i64(t, a, sh);
3488     }
3489     tcg_gen_vec_add16_i64(d, d, t);
3490 }
3491 
3492 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3493 {
3494     TCGv_i32 t = tcg_temp_new_i32();
3495 
3496     if (sh == 32) {
3497         tcg_gen_shri_i32(t, a, 31);
3498     } else {
3499         gen_urshr32_i32(t, a, sh);
3500     }
3501     tcg_gen_add_i32(d, d, t);
3502 }
3503 
3504 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3505 {
3506     TCGv_i64 t = tcg_temp_new_i64();
3507 
3508     if (sh == 64) {
3509         tcg_gen_shri_i64(t, a, 63);
3510     } else {
3511         gen_urshr64_i64(t, a, sh);
3512     }
3513     tcg_gen_add_i64(d, d, t);
3514 }
3515 
3516 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3517 {
3518     TCGv_vec t = tcg_temp_new_vec_matching(d);
3519 
3520     if (sh == (8 << vece)) {
3521         tcg_gen_shri_vec(vece, t, a, sh - 1);
3522     } else {
3523         gen_urshr_vec(vece, t, a, sh);
3524     }
3525     tcg_gen_add_vec(vece, d, d, t);
3526 }
3527 
3528 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3529                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3530 {
3531     static const TCGOpcode vecop_list[] = {
3532         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3533     };
3534     static const GVecGen2i ops[4] = {
3535         { .fni8 = gen_ursra8_i64,
3536           .fniv = gen_ursra_vec,
3537           .fno = gen_helper_gvec_ursra_b,
3538           .opt_opc = vecop_list,
3539           .load_dest = true,
3540           .vece = MO_8 },
3541         { .fni8 = gen_ursra16_i64,
3542           .fniv = gen_ursra_vec,
3543           .fno = gen_helper_gvec_ursra_h,
3544           .opt_opc = vecop_list,
3545           .load_dest = true,
3546           .vece = MO_16 },
3547         { .fni4 = gen_ursra32_i32,
3548           .fniv = gen_ursra_vec,
3549           .fno = gen_helper_gvec_ursra_s,
3550           .opt_opc = vecop_list,
3551           .load_dest = true,
3552           .vece = MO_32 },
3553         { .fni8 = gen_ursra64_i64,
3554           .fniv = gen_ursra_vec,
3555           .fno = gen_helper_gvec_ursra_d,
3556           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3557           .opt_opc = vecop_list,
3558           .load_dest = true,
3559           .vece = MO_64 },
3560     };
3561 
3562     /* tszimm encoding produces immediates in the range [1..esize] */
3563     tcg_debug_assert(shift > 0);
3564     tcg_debug_assert(shift <= (8 << vece));
3565 
3566     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3567 }
3568 
3569 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3570 {
3571     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3572     TCGv_i64 t = tcg_temp_new_i64();
3573 
3574     tcg_gen_shri_i64(t, a, shift);
3575     tcg_gen_andi_i64(t, t, mask);
3576     tcg_gen_andi_i64(d, d, ~mask);
3577     tcg_gen_or_i64(d, d, t);
3578 }
3579 
3580 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3581 {
3582     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3583     TCGv_i64 t = tcg_temp_new_i64();
3584 
3585     tcg_gen_shri_i64(t, a, shift);
3586     tcg_gen_andi_i64(t, t, mask);
3587     tcg_gen_andi_i64(d, d, ~mask);
3588     tcg_gen_or_i64(d, d, t);
3589 }
3590 
3591 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3592 {
3593     tcg_gen_shri_i32(a, a, shift);
3594     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3595 }
3596 
3597 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3598 {
3599     tcg_gen_shri_i64(a, a, shift);
3600     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3601 }
3602 
3603 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3604 {
3605     TCGv_vec t = tcg_temp_new_vec_matching(d);
3606     TCGv_vec m = tcg_temp_new_vec_matching(d);
3607 
3608     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3609     tcg_gen_shri_vec(vece, t, a, sh);
3610     tcg_gen_and_vec(vece, d, d, m);
3611     tcg_gen_or_vec(vece, d, d, t);
3612 }
3613 
3614 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3615                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3616 {
3617     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3618     const GVecGen2i ops[4] = {
3619         { .fni8 = gen_shr8_ins_i64,
3620           .fniv = gen_shr_ins_vec,
3621           .fno = gen_helper_gvec_sri_b,
3622           .load_dest = true,
3623           .opt_opc = vecop_list,
3624           .vece = MO_8 },
3625         { .fni8 = gen_shr16_ins_i64,
3626           .fniv = gen_shr_ins_vec,
3627           .fno = gen_helper_gvec_sri_h,
3628           .load_dest = true,
3629           .opt_opc = vecop_list,
3630           .vece = MO_16 },
3631         { .fni4 = gen_shr32_ins_i32,
3632           .fniv = gen_shr_ins_vec,
3633           .fno = gen_helper_gvec_sri_s,
3634           .load_dest = true,
3635           .opt_opc = vecop_list,
3636           .vece = MO_32 },
3637         { .fni8 = gen_shr64_ins_i64,
3638           .fniv = gen_shr_ins_vec,
3639           .fno = gen_helper_gvec_sri_d,
3640           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3641           .load_dest = true,
3642           .opt_opc = vecop_list,
3643           .vece = MO_64 },
3644     };
3645 
3646     /* tszimm encoding produces immediates in the range [1..esize]. */
3647     tcg_debug_assert(shift > 0);
3648     tcg_debug_assert(shift <= (8 << vece));
3649 
3650     /* Shift of esize leaves destination unchanged. */
3651     if (shift < (8 << vece)) {
3652         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3653     } else {
3654         /* Nop, but we do need to clear the tail. */
3655         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3656     }
3657 }
3658 
3659 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3660 {
3661     uint64_t mask = dup_const(MO_8, 0xff << shift);
3662     TCGv_i64 t = tcg_temp_new_i64();
3663 
3664     tcg_gen_shli_i64(t, a, shift);
3665     tcg_gen_andi_i64(t, t, mask);
3666     tcg_gen_andi_i64(d, d, ~mask);
3667     tcg_gen_or_i64(d, d, t);
3668 }
3669 
3670 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3671 {
3672     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3673     TCGv_i64 t = tcg_temp_new_i64();
3674 
3675     tcg_gen_shli_i64(t, a, shift);
3676     tcg_gen_andi_i64(t, t, mask);
3677     tcg_gen_andi_i64(d, d, ~mask);
3678     tcg_gen_or_i64(d, d, t);
3679 }
3680 
3681 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3682 {
3683     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3684 }
3685 
3686 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3687 {
3688     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3689 }
3690 
3691 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3692 {
3693     TCGv_vec t = tcg_temp_new_vec_matching(d);
3694     TCGv_vec m = tcg_temp_new_vec_matching(d);
3695 
3696     tcg_gen_shli_vec(vece, t, a, sh);
3697     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3698     tcg_gen_and_vec(vece, d, d, m);
3699     tcg_gen_or_vec(vece, d, d, t);
3700 }
3701 
3702 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3703                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3704 {
3705     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3706     const GVecGen2i ops[4] = {
3707         { .fni8 = gen_shl8_ins_i64,
3708           .fniv = gen_shl_ins_vec,
3709           .fno = gen_helper_gvec_sli_b,
3710           .load_dest = true,
3711           .opt_opc = vecop_list,
3712           .vece = MO_8 },
3713         { .fni8 = gen_shl16_ins_i64,
3714           .fniv = gen_shl_ins_vec,
3715           .fno = gen_helper_gvec_sli_h,
3716           .load_dest = true,
3717           .opt_opc = vecop_list,
3718           .vece = MO_16 },
3719         { .fni4 = gen_shl32_ins_i32,
3720           .fniv = gen_shl_ins_vec,
3721           .fno = gen_helper_gvec_sli_s,
3722           .load_dest = true,
3723           .opt_opc = vecop_list,
3724           .vece = MO_32 },
3725         { .fni8 = gen_shl64_ins_i64,
3726           .fniv = gen_shl_ins_vec,
3727           .fno = gen_helper_gvec_sli_d,
3728           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3729           .load_dest = true,
3730           .opt_opc = vecop_list,
3731           .vece = MO_64 },
3732     };
3733 
3734     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3735     tcg_debug_assert(shift >= 0);
3736     tcg_debug_assert(shift < (8 << vece));
3737 
3738     if (shift == 0) {
3739         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3740     } else {
3741         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3742     }
3743 }
3744 
3745 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3746 {
3747     gen_helper_neon_mul_u8(a, a, b);
3748     gen_helper_neon_add_u8(d, d, a);
3749 }
3750 
3751 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3752 {
3753     gen_helper_neon_mul_u8(a, a, b);
3754     gen_helper_neon_sub_u8(d, d, a);
3755 }
3756 
3757 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3758 {
3759     gen_helper_neon_mul_u16(a, a, b);
3760     gen_helper_neon_add_u16(d, d, a);
3761 }
3762 
3763 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3764 {
3765     gen_helper_neon_mul_u16(a, a, b);
3766     gen_helper_neon_sub_u16(d, d, a);
3767 }
3768 
3769 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3770 {
3771     tcg_gen_mul_i32(a, a, b);
3772     tcg_gen_add_i32(d, d, a);
3773 }
3774 
3775 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3776 {
3777     tcg_gen_mul_i32(a, a, b);
3778     tcg_gen_sub_i32(d, d, a);
3779 }
3780 
3781 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3782 {
3783     tcg_gen_mul_i64(a, a, b);
3784     tcg_gen_add_i64(d, d, a);
3785 }
3786 
3787 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3788 {
3789     tcg_gen_mul_i64(a, a, b);
3790     tcg_gen_sub_i64(d, d, a);
3791 }
3792 
3793 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3794 {
3795     tcg_gen_mul_vec(vece, a, a, b);
3796     tcg_gen_add_vec(vece, d, d, a);
3797 }
3798 
3799 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3800 {
3801     tcg_gen_mul_vec(vece, a, a, b);
3802     tcg_gen_sub_vec(vece, d, d, a);
3803 }
3804 
3805 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3806  * these tables are shared with AArch64 which does support them.
3807  */
3808 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3809                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3810 {
3811     static const TCGOpcode vecop_list[] = {
3812         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3813     };
3814     static const GVecGen3 ops[4] = {
3815         { .fni4 = gen_mla8_i32,
3816           .fniv = gen_mla_vec,
3817           .load_dest = true,
3818           .opt_opc = vecop_list,
3819           .vece = MO_8 },
3820         { .fni4 = gen_mla16_i32,
3821           .fniv = gen_mla_vec,
3822           .load_dest = true,
3823           .opt_opc = vecop_list,
3824           .vece = MO_16 },
3825         { .fni4 = gen_mla32_i32,
3826           .fniv = gen_mla_vec,
3827           .load_dest = true,
3828           .opt_opc = vecop_list,
3829           .vece = MO_32 },
3830         { .fni8 = gen_mla64_i64,
3831           .fniv = gen_mla_vec,
3832           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3833           .load_dest = true,
3834           .opt_opc = vecop_list,
3835           .vece = MO_64 },
3836     };
3837     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3838 }
3839 
3840 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3841                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3842 {
3843     static const TCGOpcode vecop_list[] = {
3844         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3845     };
3846     static const GVecGen3 ops[4] = {
3847         { .fni4 = gen_mls8_i32,
3848           .fniv = gen_mls_vec,
3849           .load_dest = true,
3850           .opt_opc = vecop_list,
3851           .vece = MO_8 },
3852         { .fni4 = gen_mls16_i32,
3853           .fniv = gen_mls_vec,
3854           .load_dest = true,
3855           .opt_opc = vecop_list,
3856           .vece = MO_16 },
3857         { .fni4 = gen_mls32_i32,
3858           .fniv = gen_mls_vec,
3859           .load_dest = true,
3860           .opt_opc = vecop_list,
3861           .vece = MO_32 },
3862         { .fni8 = gen_mls64_i64,
3863           .fniv = gen_mls_vec,
3864           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3865           .load_dest = true,
3866           .opt_opc = vecop_list,
3867           .vece = MO_64 },
3868     };
3869     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3870 }
3871 
3872 /* CMTST : test is "if (X & Y != 0)". */
3873 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3874 {
3875     tcg_gen_and_i32(d, a, b);
3876     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3877     tcg_gen_neg_i32(d, d);
3878 }
3879 
3880 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3881 {
3882     tcg_gen_and_i64(d, a, b);
3883     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3884     tcg_gen_neg_i64(d, d);
3885 }
3886 
3887 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3888 {
3889     tcg_gen_and_vec(vece, d, a, b);
3890     tcg_gen_dupi_vec(vece, a, 0);
3891     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3892 }
3893 
3894 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3895                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3896 {
3897     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3898     static const GVecGen3 ops[4] = {
3899         { .fni4 = gen_helper_neon_tst_u8,
3900           .fniv = gen_cmtst_vec,
3901           .opt_opc = vecop_list,
3902           .vece = MO_8 },
3903         { .fni4 = gen_helper_neon_tst_u16,
3904           .fniv = gen_cmtst_vec,
3905           .opt_opc = vecop_list,
3906           .vece = MO_16 },
3907         { .fni4 = gen_cmtst_i32,
3908           .fniv = gen_cmtst_vec,
3909           .opt_opc = vecop_list,
3910           .vece = MO_32 },
3911         { .fni8 = gen_cmtst_i64,
3912           .fniv = gen_cmtst_vec,
3913           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3914           .opt_opc = vecop_list,
3915           .vece = MO_64 },
3916     };
3917     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3918 }
3919 
3920 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3921 {
3922     TCGv_i32 lval = tcg_temp_new_i32();
3923     TCGv_i32 rval = tcg_temp_new_i32();
3924     TCGv_i32 lsh = tcg_temp_new_i32();
3925     TCGv_i32 rsh = tcg_temp_new_i32();
3926     TCGv_i32 zero = tcg_constant_i32(0);
3927     TCGv_i32 max = tcg_constant_i32(32);
3928 
3929     /*
3930      * Rely on the TCG guarantee that out of range shifts produce
3931      * unspecified results, not undefined behaviour (i.e. no trap).
3932      * Discard out-of-range results after the fact.
3933      */
3934     tcg_gen_ext8s_i32(lsh, shift);
3935     tcg_gen_neg_i32(rsh, lsh);
3936     tcg_gen_shl_i32(lval, src, lsh);
3937     tcg_gen_shr_i32(rval, src, rsh);
3938     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3939     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3940 }
3941 
3942 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3943 {
3944     TCGv_i64 lval = tcg_temp_new_i64();
3945     TCGv_i64 rval = tcg_temp_new_i64();
3946     TCGv_i64 lsh = tcg_temp_new_i64();
3947     TCGv_i64 rsh = tcg_temp_new_i64();
3948     TCGv_i64 zero = tcg_constant_i64(0);
3949     TCGv_i64 max = tcg_constant_i64(64);
3950 
3951     /*
3952      * Rely on the TCG guarantee that out of range shifts produce
3953      * unspecified results, not undefined behaviour (i.e. no trap).
3954      * Discard out-of-range results after the fact.
3955      */
3956     tcg_gen_ext8s_i64(lsh, shift);
3957     tcg_gen_neg_i64(rsh, lsh);
3958     tcg_gen_shl_i64(lval, src, lsh);
3959     tcg_gen_shr_i64(rval, src, rsh);
3960     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3961     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3962 }
3963 
3964 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3965                          TCGv_vec src, TCGv_vec shift)
3966 {
3967     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3968     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3969     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3970     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3971     TCGv_vec msk, max;
3972 
3973     tcg_gen_neg_vec(vece, rsh, shift);
3974     if (vece == MO_8) {
3975         tcg_gen_mov_vec(lsh, shift);
3976     } else {
3977         msk = tcg_temp_new_vec_matching(dst);
3978         tcg_gen_dupi_vec(vece, msk, 0xff);
3979         tcg_gen_and_vec(vece, lsh, shift, msk);
3980         tcg_gen_and_vec(vece, rsh, rsh, msk);
3981     }
3982 
3983     /*
3984      * Rely on the TCG guarantee that out of range shifts produce
3985      * unspecified results, not undefined behaviour (i.e. no trap).
3986      * Discard out-of-range results after the fact.
3987      */
3988     tcg_gen_shlv_vec(vece, lval, src, lsh);
3989     tcg_gen_shrv_vec(vece, rval, src, rsh);
3990 
3991     max = tcg_temp_new_vec_matching(dst);
3992     tcg_gen_dupi_vec(vece, max, 8 << vece);
3993 
3994     /*
3995      * The choice of LT (signed) and GEU (unsigned) are biased toward
3996      * the instructions of the x86_64 host.  For MO_8, the whole byte
3997      * is significant so we must use an unsigned compare; otherwise we
3998      * have already masked to a byte and so a signed compare works.
3999      * Other tcg hosts have a full set of comparisons and do not care.
4000      */
4001     if (vece == MO_8) {
4002         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4003         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4004         tcg_gen_andc_vec(vece, lval, lval, lsh);
4005         tcg_gen_andc_vec(vece, rval, rval, rsh);
4006     } else {
4007         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4008         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4009         tcg_gen_and_vec(vece, lval, lval, lsh);
4010         tcg_gen_and_vec(vece, rval, rval, rsh);
4011     }
4012     tcg_gen_or_vec(vece, dst, lval, rval);
4013 }
4014 
4015 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4016                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4017 {
4018     static const TCGOpcode vecop_list[] = {
4019         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4020         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4021     };
4022     static const GVecGen3 ops[4] = {
4023         { .fniv = gen_ushl_vec,
4024           .fno = gen_helper_gvec_ushl_b,
4025           .opt_opc = vecop_list,
4026           .vece = MO_8 },
4027         { .fniv = gen_ushl_vec,
4028           .fno = gen_helper_gvec_ushl_h,
4029           .opt_opc = vecop_list,
4030           .vece = MO_16 },
4031         { .fni4 = gen_ushl_i32,
4032           .fniv = gen_ushl_vec,
4033           .opt_opc = vecop_list,
4034           .vece = MO_32 },
4035         { .fni8 = gen_ushl_i64,
4036           .fniv = gen_ushl_vec,
4037           .opt_opc = vecop_list,
4038           .vece = MO_64 },
4039     };
4040     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4041 }
4042 
4043 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4044 {
4045     TCGv_i32 lval = tcg_temp_new_i32();
4046     TCGv_i32 rval = tcg_temp_new_i32();
4047     TCGv_i32 lsh = tcg_temp_new_i32();
4048     TCGv_i32 rsh = tcg_temp_new_i32();
4049     TCGv_i32 zero = tcg_constant_i32(0);
4050     TCGv_i32 max = tcg_constant_i32(31);
4051 
4052     /*
4053      * Rely on the TCG guarantee that out of range shifts produce
4054      * unspecified results, not undefined behaviour (i.e. no trap).
4055      * Discard out-of-range results after the fact.
4056      */
4057     tcg_gen_ext8s_i32(lsh, shift);
4058     tcg_gen_neg_i32(rsh, lsh);
4059     tcg_gen_shl_i32(lval, src, lsh);
4060     tcg_gen_umin_i32(rsh, rsh, max);
4061     tcg_gen_sar_i32(rval, src, rsh);
4062     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4063     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4064 }
4065 
4066 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4067 {
4068     TCGv_i64 lval = tcg_temp_new_i64();
4069     TCGv_i64 rval = tcg_temp_new_i64();
4070     TCGv_i64 lsh = tcg_temp_new_i64();
4071     TCGv_i64 rsh = tcg_temp_new_i64();
4072     TCGv_i64 zero = tcg_constant_i64(0);
4073     TCGv_i64 max = tcg_constant_i64(63);
4074 
4075     /*
4076      * Rely on the TCG guarantee that out of range shifts produce
4077      * unspecified results, not undefined behaviour (i.e. no trap).
4078      * Discard out-of-range results after the fact.
4079      */
4080     tcg_gen_ext8s_i64(lsh, shift);
4081     tcg_gen_neg_i64(rsh, lsh);
4082     tcg_gen_shl_i64(lval, src, lsh);
4083     tcg_gen_umin_i64(rsh, rsh, max);
4084     tcg_gen_sar_i64(rval, src, rsh);
4085     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4086     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4087 }
4088 
4089 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4090                          TCGv_vec src, TCGv_vec shift)
4091 {
4092     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4093     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4094     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4095     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4096     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4097 
4098     /*
4099      * Rely on the TCG guarantee that out of range shifts produce
4100      * unspecified results, not undefined behaviour (i.e. no trap).
4101      * Discard out-of-range results after the fact.
4102      */
4103     tcg_gen_neg_vec(vece, rsh, shift);
4104     if (vece == MO_8) {
4105         tcg_gen_mov_vec(lsh, shift);
4106     } else {
4107         tcg_gen_dupi_vec(vece, tmp, 0xff);
4108         tcg_gen_and_vec(vece, lsh, shift, tmp);
4109         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4110     }
4111 
4112     /* Bound rsh so out of bound right shift gets -1.  */
4113     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4114     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4115     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4116 
4117     tcg_gen_shlv_vec(vece, lval, src, lsh);
4118     tcg_gen_sarv_vec(vece, rval, src, rsh);
4119 
4120     /* Select in-bound left shift.  */
4121     tcg_gen_andc_vec(vece, lval, lval, tmp);
4122 
4123     /* Select between left and right shift.  */
4124     if (vece == MO_8) {
4125         tcg_gen_dupi_vec(vece, tmp, 0);
4126         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4127     } else {
4128         tcg_gen_dupi_vec(vece, tmp, 0x80);
4129         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4130     }
4131 }
4132 
4133 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4134                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4135 {
4136     static const TCGOpcode vecop_list[] = {
4137         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4138         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4139     };
4140     static const GVecGen3 ops[4] = {
4141         { .fniv = gen_sshl_vec,
4142           .fno = gen_helper_gvec_sshl_b,
4143           .opt_opc = vecop_list,
4144           .vece = MO_8 },
4145         { .fniv = gen_sshl_vec,
4146           .fno = gen_helper_gvec_sshl_h,
4147           .opt_opc = vecop_list,
4148           .vece = MO_16 },
4149         { .fni4 = gen_sshl_i32,
4150           .fniv = gen_sshl_vec,
4151           .opt_opc = vecop_list,
4152           .vece = MO_32 },
4153         { .fni8 = gen_sshl_i64,
4154           .fniv = gen_sshl_vec,
4155           .opt_opc = vecop_list,
4156           .vece = MO_64 },
4157     };
4158     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4159 }
4160 
4161 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4162                           TCGv_vec a, TCGv_vec b)
4163 {
4164     TCGv_vec x = tcg_temp_new_vec_matching(t);
4165     tcg_gen_add_vec(vece, x, a, b);
4166     tcg_gen_usadd_vec(vece, t, a, b);
4167     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4168     tcg_gen_or_vec(vece, sat, sat, x);
4169 }
4170 
4171 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4172                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4173 {
4174     static const TCGOpcode vecop_list[] = {
4175         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4176     };
4177     static const GVecGen4 ops[4] = {
4178         { .fniv = gen_uqadd_vec,
4179           .fno = gen_helper_gvec_uqadd_b,
4180           .write_aofs = true,
4181           .opt_opc = vecop_list,
4182           .vece = MO_8 },
4183         { .fniv = gen_uqadd_vec,
4184           .fno = gen_helper_gvec_uqadd_h,
4185           .write_aofs = true,
4186           .opt_opc = vecop_list,
4187           .vece = MO_16 },
4188         { .fniv = gen_uqadd_vec,
4189           .fno = gen_helper_gvec_uqadd_s,
4190           .write_aofs = true,
4191           .opt_opc = vecop_list,
4192           .vece = MO_32 },
4193         { .fniv = gen_uqadd_vec,
4194           .fno = gen_helper_gvec_uqadd_d,
4195           .write_aofs = true,
4196           .opt_opc = vecop_list,
4197           .vece = MO_64 },
4198     };
4199     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4200                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4201 }
4202 
4203 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4204                           TCGv_vec a, TCGv_vec b)
4205 {
4206     TCGv_vec x = tcg_temp_new_vec_matching(t);
4207     tcg_gen_add_vec(vece, x, a, b);
4208     tcg_gen_ssadd_vec(vece, t, a, b);
4209     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4210     tcg_gen_or_vec(vece, sat, sat, x);
4211 }
4212 
4213 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4214                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4215 {
4216     static const TCGOpcode vecop_list[] = {
4217         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4218     };
4219     static const GVecGen4 ops[4] = {
4220         { .fniv = gen_sqadd_vec,
4221           .fno = gen_helper_gvec_sqadd_b,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_8 },
4225         { .fniv = gen_sqadd_vec,
4226           .fno = gen_helper_gvec_sqadd_h,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_16 },
4230         { .fniv = gen_sqadd_vec,
4231           .fno = gen_helper_gvec_sqadd_s,
4232           .opt_opc = vecop_list,
4233           .write_aofs = true,
4234           .vece = MO_32 },
4235         { .fniv = gen_sqadd_vec,
4236           .fno = gen_helper_gvec_sqadd_d,
4237           .opt_opc = vecop_list,
4238           .write_aofs = true,
4239           .vece = MO_64 },
4240     };
4241     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4242                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4243 }
4244 
4245 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4246                           TCGv_vec a, TCGv_vec b)
4247 {
4248     TCGv_vec x = tcg_temp_new_vec_matching(t);
4249     tcg_gen_sub_vec(vece, x, a, b);
4250     tcg_gen_ussub_vec(vece, t, a, b);
4251     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4252     tcg_gen_or_vec(vece, sat, sat, x);
4253 }
4254 
4255 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4256                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4257 {
4258     static const TCGOpcode vecop_list[] = {
4259         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4260     };
4261     static const GVecGen4 ops[4] = {
4262         { .fniv = gen_uqsub_vec,
4263           .fno = gen_helper_gvec_uqsub_b,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_8 },
4267         { .fniv = gen_uqsub_vec,
4268           .fno = gen_helper_gvec_uqsub_h,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_16 },
4272         { .fniv = gen_uqsub_vec,
4273           .fno = gen_helper_gvec_uqsub_s,
4274           .opt_opc = vecop_list,
4275           .write_aofs = true,
4276           .vece = MO_32 },
4277         { .fniv = gen_uqsub_vec,
4278           .fno = gen_helper_gvec_uqsub_d,
4279           .opt_opc = vecop_list,
4280           .write_aofs = true,
4281           .vece = MO_64 },
4282     };
4283     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4284                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4285 }
4286 
4287 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4288                           TCGv_vec a, TCGv_vec b)
4289 {
4290     TCGv_vec x = tcg_temp_new_vec_matching(t);
4291     tcg_gen_sub_vec(vece, x, a, b);
4292     tcg_gen_sssub_vec(vece, t, a, b);
4293     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4294     tcg_gen_or_vec(vece, sat, sat, x);
4295 }
4296 
4297 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4298                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4299 {
4300     static const TCGOpcode vecop_list[] = {
4301         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4302     };
4303     static const GVecGen4 ops[4] = {
4304         { .fniv = gen_sqsub_vec,
4305           .fno = gen_helper_gvec_sqsub_b,
4306           .opt_opc = vecop_list,
4307           .write_aofs = true,
4308           .vece = MO_8 },
4309         { .fniv = gen_sqsub_vec,
4310           .fno = gen_helper_gvec_sqsub_h,
4311           .opt_opc = vecop_list,
4312           .write_aofs = true,
4313           .vece = MO_16 },
4314         { .fniv = gen_sqsub_vec,
4315           .fno = gen_helper_gvec_sqsub_s,
4316           .opt_opc = vecop_list,
4317           .write_aofs = true,
4318           .vece = MO_32 },
4319         { .fniv = gen_sqsub_vec,
4320           .fno = gen_helper_gvec_sqsub_d,
4321           .opt_opc = vecop_list,
4322           .write_aofs = true,
4323           .vece = MO_64 },
4324     };
4325     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4326                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4327 }
4328 
4329 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4330 {
4331     TCGv_i32 t = tcg_temp_new_i32();
4332 
4333     tcg_gen_sub_i32(t, a, b);
4334     tcg_gen_sub_i32(d, b, a);
4335     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4336 }
4337 
4338 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4339 {
4340     TCGv_i64 t = tcg_temp_new_i64();
4341 
4342     tcg_gen_sub_i64(t, a, b);
4343     tcg_gen_sub_i64(d, b, a);
4344     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4345 }
4346 
4347 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4348 {
4349     TCGv_vec t = tcg_temp_new_vec_matching(d);
4350 
4351     tcg_gen_smin_vec(vece, t, a, b);
4352     tcg_gen_smax_vec(vece, d, a, b);
4353     tcg_gen_sub_vec(vece, d, d, t);
4354 }
4355 
4356 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4357                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4358 {
4359     static const TCGOpcode vecop_list[] = {
4360         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4361     };
4362     static const GVecGen3 ops[4] = {
4363         { .fniv = gen_sabd_vec,
4364           .fno = gen_helper_gvec_sabd_b,
4365           .opt_opc = vecop_list,
4366           .vece = MO_8 },
4367         { .fniv = gen_sabd_vec,
4368           .fno = gen_helper_gvec_sabd_h,
4369           .opt_opc = vecop_list,
4370           .vece = MO_16 },
4371         { .fni4 = gen_sabd_i32,
4372           .fniv = gen_sabd_vec,
4373           .fno = gen_helper_gvec_sabd_s,
4374           .opt_opc = vecop_list,
4375           .vece = MO_32 },
4376         { .fni8 = gen_sabd_i64,
4377           .fniv = gen_sabd_vec,
4378           .fno = gen_helper_gvec_sabd_d,
4379           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4380           .opt_opc = vecop_list,
4381           .vece = MO_64 },
4382     };
4383     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4384 }
4385 
4386 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4387 {
4388     TCGv_i32 t = tcg_temp_new_i32();
4389 
4390     tcg_gen_sub_i32(t, a, b);
4391     tcg_gen_sub_i32(d, b, a);
4392     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4393 }
4394 
4395 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4396 {
4397     TCGv_i64 t = tcg_temp_new_i64();
4398 
4399     tcg_gen_sub_i64(t, a, b);
4400     tcg_gen_sub_i64(d, b, a);
4401     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4402 }
4403 
4404 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4405 {
4406     TCGv_vec t = tcg_temp_new_vec_matching(d);
4407 
4408     tcg_gen_umin_vec(vece, t, a, b);
4409     tcg_gen_umax_vec(vece, d, a, b);
4410     tcg_gen_sub_vec(vece, d, d, t);
4411 }
4412 
4413 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4414                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4415 {
4416     static const TCGOpcode vecop_list[] = {
4417         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4418     };
4419     static const GVecGen3 ops[4] = {
4420         { .fniv = gen_uabd_vec,
4421           .fno = gen_helper_gvec_uabd_b,
4422           .opt_opc = vecop_list,
4423           .vece = MO_8 },
4424         { .fniv = gen_uabd_vec,
4425           .fno = gen_helper_gvec_uabd_h,
4426           .opt_opc = vecop_list,
4427           .vece = MO_16 },
4428         { .fni4 = gen_uabd_i32,
4429           .fniv = gen_uabd_vec,
4430           .fno = gen_helper_gvec_uabd_s,
4431           .opt_opc = vecop_list,
4432           .vece = MO_32 },
4433         { .fni8 = gen_uabd_i64,
4434           .fniv = gen_uabd_vec,
4435           .fno = gen_helper_gvec_uabd_d,
4436           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4437           .opt_opc = vecop_list,
4438           .vece = MO_64 },
4439     };
4440     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4441 }
4442 
4443 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4444 {
4445     TCGv_i32 t = tcg_temp_new_i32();
4446     gen_sabd_i32(t, a, b);
4447     tcg_gen_add_i32(d, d, t);
4448 }
4449 
4450 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4451 {
4452     TCGv_i64 t = tcg_temp_new_i64();
4453     gen_sabd_i64(t, a, b);
4454     tcg_gen_add_i64(d, d, t);
4455 }
4456 
4457 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4458 {
4459     TCGv_vec t = tcg_temp_new_vec_matching(d);
4460     gen_sabd_vec(vece, t, a, b);
4461     tcg_gen_add_vec(vece, d, d, t);
4462 }
4463 
4464 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4465                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4466 {
4467     static const TCGOpcode vecop_list[] = {
4468         INDEX_op_sub_vec, INDEX_op_add_vec,
4469         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4470     };
4471     static const GVecGen3 ops[4] = {
4472         { .fniv = gen_saba_vec,
4473           .fno = gen_helper_gvec_saba_b,
4474           .opt_opc = vecop_list,
4475           .load_dest = true,
4476           .vece = MO_8 },
4477         { .fniv = gen_saba_vec,
4478           .fno = gen_helper_gvec_saba_h,
4479           .opt_opc = vecop_list,
4480           .load_dest = true,
4481           .vece = MO_16 },
4482         { .fni4 = gen_saba_i32,
4483           .fniv = gen_saba_vec,
4484           .fno = gen_helper_gvec_saba_s,
4485           .opt_opc = vecop_list,
4486           .load_dest = true,
4487           .vece = MO_32 },
4488         { .fni8 = gen_saba_i64,
4489           .fniv = gen_saba_vec,
4490           .fno = gen_helper_gvec_saba_d,
4491           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4492           .opt_opc = vecop_list,
4493           .load_dest = true,
4494           .vece = MO_64 },
4495     };
4496     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4497 }
4498 
4499 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4500 {
4501     TCGv_i32 t = tcg_temp_new_i32();
4502     gen_uabd_i32(t, a, b);
4503     tcg_gen_add_i32(d, d, t);
4504 }
4505 
4506 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4507 {
4508     TCGv_i64 t = tcg_temp_new_i64();
4509     gen_uabd_i64(t, a, b);
4510     tcg_gen_add_i64(d, d, t);
4511 }
4512 
4513 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4514 {
4515     TCGv_vec t = tcg_temp_new_vec_matching(d);
4516     gen_uabd_vec(vece, t, a, b);
4517     tcg_gen_add_vec(vece, d, d, t);
4518 }
4519 
4520 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4521                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4522 {
4523     static const TCGOpcode vecop_list[] = {
4524         INDEX_op_sub_vec, INDEX_op_add_vec,
4525         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4526     };
4527     static const GVecGen3 ops[4] = {
4528         { .fniv = gen_uaba_vec,
4529           .fno = gen_helper_gvec_uaba_b,
4530           .opt_opc = vecop_list,
4531           .load_dest = true,
4532           .vece = MO_8 },
4533         { .fniv = gen_uaba_vec,
4534           .fno = gen_helper_gvec_uaba_h,
4535           .opt_opc = vecop_list,
4536           .load_dest = true,
4537           .vece = MO_16 },
4538         { .fni4 = gen_uaba_i32,
4539           .fniv = gen_uaba_vec,
4540           .fno = gen_helper_gvec_uaba_s,
4541           .opt_opc = vecop_list,
4542           .load_dest = true,
4543           .vece = MO_32 },
4544         { .fni8 = gen_uaba_i64,
4545           .fniv = gen_uaba_vec,
4546           .fno = gen_helper_gvec_uaba_d,
4547           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4548           .opt_opc = vecop_list,
4549           .load_dest = true,
4550           .vece = MO_64 },
4551     };
4552     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4553 }
4554 
4555 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4556                            int opc1, int crn, int crm, int opc2,
4557                            bool isread, int rt, int rt2)
4558 {
4559     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4560     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4561     TCGv_ptr tcg_ri = NULL;
4562     bool need_exit_tb;
4563     uint32_t syndrome;
4564 
4565     /*
4566      * Note that since we are an implementation which takes an
4567      * exception on a trapped conditional instruction only if the
4568      * instruction passes its condition code check, we can take
4569      * advantage of the clause in the ARM ARM that allows us to set
4570      * the COND field in the instruction to 0xE in all cases.
4571      * We could fish the actual condition out of the insn (ARM)
4572      * or the condexec bits (Thumb) but it isn't necessary.
4573      */
4574     switch (cpnum) {
4575     case 14:
4576         if (is64) {
4577             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4578                                          isread, false);
4579         } else {
4580             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4581                                         rt, isread, false);
4582         }
4583         break;
4584     case 15:
4585         if (is64) {
4586             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4587                                          isread, false);
4588         } else {
4589             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4590                                         rt, isread, false);
4591         }
4592         break;
4593     default:
4594         /*
4595          * ARMv8 defines that only coprocessors 14 and 15 exist,
4596          * so this can only happen if this is an ARMv7 or earlier CPU,
4597          * in which case the syndrome information won't actually be
4598          * guest visible.
4599          */
4600         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4601         syndrome = syn_uncategorized();
4602         break;
4603     }
4604 
4605     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4606         /*
4607          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4608          * over the UNDEF for "no such register" or the UNDEF for "access
4609          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4610          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4611          * access_check_cp_reg(), after the checks for whether the access
4612          * configurably trapped to EL1.
4613          */
4614         uint32_t maskbit = is64 ? crm : crn;
4615 
4616         if (maskbit != 4 && maskbit != 14) {
4617             /* T4 and T14 are RES0 so never cause traps */
4618             TCGv_i32 t;
4619             DisasLabel over = gen_disas_label(s);
4620 
4621             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4622             tcg_gen_andi_i32(t, t, 1u << maskbit);
4623             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4624 
4625             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4626             /*
4627              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4628              * but since we're conditionally branching over it, we want
4629              * to assume continue-to-next-instruction.
4630              */
4631             s->base.is_jmp = DISAS_NEXT;
4632             set_disas_label(s, over);
4633         }
4634     }
4635 
4636     if (!ri) {
4637         /*
4638          * Unknown register; this might be a guest error or a QEMU
4639          * unimplemented feature.
4640          */
4641         if (is64) {
4642             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4643                           "64 bit system register cp:%d opc1: %d crm:%d "
4644                           "(%s)\n",
4645                           isread ? "read" : "write", cpnum, opc1, crm,
4646                           s->ns ? "non-secure" : "secure");
4647         } else {
4648             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4649                           "system register cp:%d opc1:%d crn:%d crm:%d "
4650                           "opc2:%d (%s)\n",
4651                           isread ? "read" : "write", cpnum, opc1, crn,
4652                           crm, opc2, s->ns ? "non-secure" : "secure");
4653         }
4654         unallocated_encoding(s);
4655         return;
4656     }
4657 
4658     /* Check access permissions */
4659     if (!cp_access_ok(s->current_el, ri, isread)) {
4660         unallocated_encoding(s);
4661         return;
4662     }
4663 
4664     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4665         (ri->fgt && s->fgt_active) ||
4666         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4667         /*
4668          * Emit code to perform further access permissions checks at
4669          * runtime; this may result in an exception.
4670          * Note that on XScale all cp0..c13 registers do an access check
4671          * call in order to handle c15_cpar.
4672          */
4673         gen_set_condexec(s);
4674         gen_update_pc(s, 0);
4675         tcg_ri = tcg_temp_new_ptr();
4676         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4677                                        tcg_constant_i32(key),
4678                                        tcg_constant_i32(syndrome),
4679                                        tcg_constant_i32(isread));
4680     } else if (ri->type & ARM_CP_RAISES_EXC) {
4681         /*
4682          * The readfn or writefn might raise an exception;
4683          * synchronize the CPU state in case it does.
4684          */
4685         gen_set_condexec(s);
4686         gen_update_pc(s, 0);
4687     }
4688 
4689     /* Handle special cases first */
4690     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4691     case 0:
4692         break;
4693     case ARM_CP_NOP:
4694         return;
4695     case ARM_CP_WFI:
4696         if (isread) {
4697             unallocated_encoding(s);
4698         } else {
4699             gen_update_pc(s, curr_insn_len(s));
4700             s->base.is_jmp = DISAS_WFI;
4701         }
4702         return;
4703     default:
4704         g_assert_not_reached();
4705     }
4706 
4707     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4708         gen_io_start();
4709     }
4710 
4711     if (isread) {
4712         /* Read */
4713         if (is64) {
4714             TCGv_i64 tmp64;
4715             TCGv_i32 tmp;
4716             if (ri->type & ARM_CP_CONST) {
4717                 tmp64 = tcg_constant_i64(ri->resetvalue);
4718             } else if (ri->readfn) {
4719                 if (!tcg_ri) {
4720                     tcg_ri = gen_lookup_cp_reg(key);
4721                 }
4722                 tmp64 = tcg_temp_new_i64();
4723                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4724             } else {
4725                 tmp64 = tcg_temp_new_i64();
4726                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4727             }
4728             tmp = tcg_temp_new_i32();
4729             tcg_gen_extrl_i64_i32(tmp, tmp64);
4730             store_reg(s, rt, tmp);
4731             tmp = tcg_temp_new_i32();
4732             tcg_gen_extrh_i64_i32(tmp, tmp64);
4733             store_reg(s, rt2, tmp);
4734         } else {
4735             TCGv_i32 tmp;
4736             if (ri->type & ARM_CP_CONST) {
4737                 tmp = tcg_constant_i32(ri->resetvalue);
4738             } else if (ri->readfn) {
4739                 if (!tcg_ri) {
4740                     tcg_ri = gen_lookup_cp_reg(key);
4741                 }
4742                 tmp = tcg_temp_new_i32();
4743                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4744             } else {
4745                 tmp = load_cpu_offset(ri->fieldoffset);
4746             }
4747             if (rt == 15) {
4748                 /* Destination register of r15 for 32 bit loads sets
4749                  * the condition codes from the high 4 bits of the value
4750                  */
4751                 gen_set_nzcv(tmp);
4752             } else {
4753                 store_reg(s, rt, tmp);
4754             }
4755         }
4756     } else {
4757         /* Write */
4758         if (ri->type & ARM_CP_CONST) {
4759             /* If not forbidden by access permissions, treat as WI */
4760             return;
4761         }
4762 
4763         if (is64) {
4764             TCGv_i32 tmplo, tmphi;
4765             TCGv_i64 tmp64 = tcg_temp_new_i64();
4766             tmplo = load_reg(s, rt);
4767             tmphi = load_reg(s, rt2);
4768             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4769             if (ri->writefn) {
4770                 if (!tcg_ri) {
4771                     tcg_ri = gen_lookup_cp_reg(key);
4772                 }
4773                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4774             } else {
4775                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4776             }
4777         } else {
4778             TCGv_i32 tmp = load_reg(s, rt);
4779             if (ri->writefn) {
4780                 if (!tcg_ri) {
4781                     tcg_ri = gen_lookup_cp_reg(key);
4782                 }
4783                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4784             } else {
4785                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4786             }
4787         }
4788     }
4789 
4790     /* I/O operations must end the TB here (whether read or write) */
4791     need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4792                     (ri->type & ARM_CP_IO));
4793 
4794     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4795         /*
4796          * A write to any coprocessor register that ends a TB
4797          * must rebuild the hflags for the next TB.
4798          */
4799         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4800         /*
4801          * We default to ending the TB on a coprocessor register write,
4802          * but allow this to be suppressed by the register definition
4803          * (usually only necessary to work around guest bugs).
4804          */
4805         need_exit_tb = true;
4806     }
4807     if (need_exit_tb) {
4808         gen_lookup_tb(s);
4809     }
4810 }
4811 
4812 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4813 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4814 {
4815     int cpnum = (insn >> 8) & 0xf;
4816 
4817     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4818         unallocated_encoding(s);
4819     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4820         if (disas_iwmmxt_insn(s, insn)) {
4821             unallocated_encoding(s);
4822         }
4823     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4824         if (disas_dsp_insn(s, insn)) {
4825             unallocated_encoding(s);
4826         }
4827     }
4828 }
4829 
4830 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4831 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4832 {
4833     TCGv_i32 tmp;
4834     tmp = tcg_temp_new_i32();
4835     tcg_gen_extrl_i64_i32(tmp, val);
4836     store_reg(s, rlow, tmp);
4837     tmp = tcg_temp_new_i32();
4838     tcg_gen_extrh_i64_i32(tmp, val);
4839     store_reg(s, rhigh, tmp);
4840 }
4841 
4842 /* load and add a 64-bit value from a register pair.  */
4843 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4844 {
4845     TCGv_i64 tmp;
4846     TCGv_i32 tmpl;
4847     TCGv_i32 tmph;
4848 
4849     /* Load 64-bit value rd:rn.  */
4850     tmpl = load_reg(s, rlow);
4851     tmph = load_reg(s, rhigh);
4852     tmp = tcg_temp_new_i64();
4853     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4854     tcg_gen_add_i64(val, val, tmp);
4855 }
4856 
4857 /* Set N and Z flags from hi|lo.  */
4858 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4859 {
4860     tcg_gen_mov_i32(cpu_NF, hi);
4861     tcg_gen_or_i32(cpu_ZF, lo, hi);
4862 }
4863 
4864 /* Load/Store exclusive instructions are implemented by remembering
4865    the value/address loaded, and seeing if these are the same
4866    when the store is performed.  This should be sufficient to implement
4867    the architecturally mandated semantics, and avoids having to monitor
4868    regular stores.  The compare vs the remembered value is done during
4869    the cmpxchg operation, but we must compare the addresses manually.  */
4870 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4871                                TCGv_i32 addr, int size)
4872 {
4873     TCGv_i32 tmp = tcg_temp_new_i32();
4874     MemOp opc = size | MO_ALIGN | s->be_data;
4875 
4876     s->is_ldex = true;
4877 
4878     if (size == 3) {
4879         TCGv_i32 tmp2 = tcg_temp_new_i32();
4880         TCGv_i64 t64 = tcg_temp_new_i64();
4881 
4882         /*
4883          * For AArch32, architecturally the 32-bit word at the lowest
4884          * address is always Rt and the one at addr+4 is Rt2, even if
4885          * the CPU is big-endian. That means we don't want to do a
4886          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4887          * architecturally 64-bit access, but instead do a 64-bit access
4888          * using MO_BE if appropriate and then split the two halves.
4889          */
4890         TCGv taddr = gen_aa32_addr(s, addr, opc);
4891 
4892         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4893         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4894         if (s->be_data == MO_BE) {
4895             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4896         } else {
4897             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4898         }
4899         store_reg(s, rt2, tmp2);
4900     } else {
4901         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4902         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4903     }
4904 
4905     store_reg(s, rt, tmp);
4906     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4907 }
4908 
4909 static void gen_clrex(DisasContext *s)
4910 {
4911     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4912 }
4913 
4914 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4915                                 TCGv_i32 addr, int size)
4916 {
4917     TCGv_i32 t0, t1, t2;
4918     TCGv_i64 extaddr;
4919     TCGv taddr;
4920     TCGLabel *done_label;
4921     TCGLabel *fail_label;
4922     MemOp opc = size | MO_ALIGN | s->be_data;
4923 
4924     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4925          [addr] = {Rt};
4926          {Rd} = 0;
4927        } else {
4928          {Rd} = 1;
4929        } */
4930     fail_label = gen_new_label();
4931     done_label = gen_new_label();
4932     extaddr = tcg_temp_new_i64();
4933     tcg_gen_extu_i32_i64(extaddr, addr);
4934     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4935 
4936     taddr = gen_aa32_addr(s, addr, opc);
4937     t0 = tcg_temp_new_i32();
4938     t1 = load_reg(s, rt);
4939     if (size == 3) {
4940         TCGv_i64 o64 = tcg_temp_new_i64();
4941         TCGv_i64 n64 = tcg_temp_new_i64();
4942 
4943         t2 = load_reg(s, rt2);
4944 
4945         /*
4946          * For AArch32, architecturally the 32-bit word at the lowest
4947          * address is always Rt and the one at addr+4 is Rt2, even if
4948          * the CPU is big-endian. Since we're going to treat this as a
4949          * single 64-bit BE store, we need to put the two halves in the
4950          * opposite order for BE to LE, so that they end up in the right
4951          * places.  We don't want gen_aa32_st_i64, because that checks
4952          * SCTLR_B as if for an architectural 64-bit access.
4953          */
4954         if (s->be_data == MO_BE) {
4955             tcg_gen_concat_i32_i64(n64, t2, t1);
4956         } else {
4957             tcg_gen_concat_i32_i64(n64, t1, t2);
4958         }
4959 
4960         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4961                                    get_mem_index(s), opc);
4962 
4963         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4964         tcg_gen_extrl_i64_i32(t0, o64);
4965     } else {
4966         t2 = tcg_temp_new_i32();
4967         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4968         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4969         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4970     }
4971     tcg_gen_mov_i32(cpu_R[rd], t0);
4972     tcg_gen_br(done_label);
4973 
4974     gen_set_label(fail_label);
4975     tcg_gen_movi_i32(cpu_R[rd], 1);
4976     gen_set_label(done_label);
4977     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4978 }
4979 
4980 /* gen_srs:
4981  * @env: CPUARMState
4982  * @s: DisasContext
4983  * @mode: mode field from insn (which stack to store to)
4984  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4985  * @writeback: true if writeback bit set
4986  *
4987  * Generate code for the SRS (Store Return State) insn.
4988  */
4989 static void gen_srs(DisasContext *s,
4990                     uint32_t mode, uint32_t amode, bool writeback)
4991 {
4992     int32_t offset;
4993     TCGv_i32 addr, tmp;
4994     bool undef = false;
4995 
4996     /* SRS is:
4997      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4998      *   and specified mode is monitor mode
4999      * - UNDEFINED in Hyp mode
5000      * - UNPREDICTABLE in User or System mode
5001      * - UNPREDICTABLE if the specified mode is:
5002      * -- not implemented
5003      * -- not a valid mode number
5004      * -- a mode that's at a higher exception level
5005      * -- Monitor, if we are Non-secure
5006      * For the UNPREDICTABLE cases we choose to UNDEF.
5007      */
5008     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5009         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5010         return;
5011     }
5012 
5013     if (s->current_el == 0 || s->current_el == 2) {
5014         undef = true;
5015     }
5016 
5017     switch (mode) {
5018     case ARM_CPU_MODE_USR:
5019     case ARM_CPU_MODE_FIQ:
5020     case ARM_CPU_MODE_IRQ:
5021     case ARM_CPU_MODE_SVC:
5022     case ARM_CPU_MODE_ABT:
5023     case ARM_CPU_MODE_UND:
5024     case ARM_CPU_MODE_SYS:
5025         break;
5026     case ARM_CPU_MODE_HYP:
5027         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5028             undef = true;
5029         }
5030         break;
5031     case ARM_CPU_MODE_MON:
5032         /* No need to check specifically for "are we non-secure" because
5033          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5034          * so if this isn't EL3 then we must be non-secure.
5035          */
5036         if (s->current_el != 3) {
5037             undef = true;
5038         }
5039         break;
5040     default:
5041         undef = true;
5042     }
5043 
5044     if (undef) {
5045         unallocated_encoding(s);
5046         return;
5047     }
5048 
5049     addr = tcg_temp_new_i32();
5050     /* get_r13_banked() will raise an exception if called from System mode */
5051     gen_set_condexec(s);
5052     gen_update_pc(s, 0);
5053     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5054     switch (amode) {
5055     case 0: /* DA */
5056         offset = -4;
5057         break;
5058     case 1: /* IA */
5059         offset = 0;
5060         break;
5061     case 2: /* DB */
5062         offset = -8;
5063         break;
5064     case 3: /* IB */
5065         offset = 4;
5066         break;
5067     default:
5068         g_assert_not_reached();
5069     }
5070     tcg_gen_addi_i32(addr, addr, offset);
5071     tmp = load_reg(s, 14);
5072     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5073     tmp = load_cpu_field(spsr);
5074     tcg_gen_addi_i32(addr, addr, 4);
5075     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5076     if (writeback) {
5077         switch (amode) {
5078         case 0:
5079             offset = -8;
5080             break;
5081         case 1:
5082             offset = 4;
5083             break;
5084         case 2:
5085             offset = -4;
5086             break;
5087         case 3:
5088             offset = 0;
5089             break;
5090         default:
5091             g_assert_not_reached();
5092         }
5093         tcg_gen_addi_i32(addr, addr, offset);
5094         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5095     }
5096     s->base.is_jmp = DISAS_UPDATE_EXIT;
5097 }
5098 
5099 /* Skip this instruction if the ARM condition is false */
5100 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5101 {
5102     arm_gen_condlabel(s);
5103     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5104 }
5105 
5106 
5107 /*
5108  * Constant expanders used by T16/T32 decode
5109  */
5110 
5111 /* Return only the rotation part of T32ExpandImm.  */
5112 static int t32_expandimm_rot(DisasContext *s, int x)
5113 {
5114     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5115 }
5116 
5117 /* Return the unrotated immediate from T32ExpandImm.  */
5118 static int t32_expandimm_imm(DisasContext *s, int x)
5119 {
5120     int imm = extract32(x, 0, 8);
5121 
5122     switch (extract32(x, 8, 4)) {
5123     case 0: /* XY */
5124         /* Nothing to do.  */
5125         break;
5126     case 1: /* 00XY00XY */
5127         imm *= 0x00010001;
5128         break;
5129     case 2: /* XY00XY00 */
5130         imm *= 0x01000100;
5131         break;
5132     case 3: /* XYXYXYXY */
5133         imm *= 0x01010101;
5134         break;
5135     default:
5136         /* Rotated constant.  */
5137         imm |= 0x80;
5138         break;
5139     }
5140     return imm;
5141 }
5142 
5143 static int t32_branch24(DisasContext *s, int x)
5144 {
5145     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5146     x ^= !(x < 0) * (3 << 21);
5147     /* Append the final zero.  */
5148     return x << 1;
5149 }
5150 
5151 static int t16_setflags(DisasContext *s)
5152 {
5153     return s->condexec_mask == 0;
5154 }
5155 
5156 static int t16_push_list(DisasContext *s, int x)
5157 {
5158     return (x & 0xff) | (x & 0x100) << (14 - 8);
5159 }
5160 
5161 static int t16_pop_list(DisasContext *s, int x)
5162 {
5163     return (x & 0xff) | (x & 0x100) << (15 - 8);
5164 }
5165 
5166 /*
5167  * Include the generated decoders.
5168  */
5169 
5170 #include "decode-a32.c.inc"
5171 #include "decode-a32-uncond.c.inc"
5172 #include "decode-t32.c.inc"
5173 #include "decode-t16.c.inc"
5174 
5175 static bool valid_cp(DisasContext *s, int cp)
5176 {
5177     /*
5178      * Return true if this coprocessor field indicates something
5179      * that's really a possible coprocessor.
5180      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5181      * and of those only cp14 and cp15 were used for registers.
5182      * cp10 and cp11 were used for VFP and Neon, whose decode is
5183      * dealt with elsewhere. With the advent of fp16, cp9 is also
5184      * now part of VFP.
5185      * For v8A and later, the encoding has been tightened so that
5186      * only cp14 and cp15 are valid, and other values aren't considered
5187      * to be in the coprocessor-instruction space at all. v8M still
5188      * permits coprocessors 0..7.
5189      * For XScale, we must not decode the XScale cp0, cp1 space as
5190      * a standard coprocessor insn, because we want to fall through to
5191      * the legacy disas_xscale_insn() decoder after decodetree is done.
5192      */
5193     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5194         return false;
5195     }
5196 
5197     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5198         !arm_dc_feature(s, ARM_FEATURE_M)) {
5199         return cp >= 14;
5200     }
5201     return cp < 8 || cp >= 14;
5202 }
5203 
5204 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5205 {
5206     if (!valid_cp(s, a->cp)) {
5207         return false;
5208     }
5209     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5210                    false, a->rt, 0);
5211     return true;
5212 }
5213 
5214 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5215 {
5216     if (!valid_cp(s, a->cp)) {
5217         return false;
5218     }
5219     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5220                    true, a->rt, 0);
5221     return true;
5222 }
5223 
5224 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5225 {
5226     if (!valid_cp(s, a->cp)) {
5227         return false;
5228     }
5229     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5230                    false, a->rt, a->rt2);
5231     return true;
5232 }
5233 
5234 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5235 {
5236     if (!valid_cp(s, a->cp)) {
5237         return false;
5238     }
5239     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5240                    true, a->rt, a->rt2);
5241     return true;
5242 }
5243 
5244 /* Helpers to swap operands for reverse-subtract.  */
5245 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5246 {
5247     tcg_gen_sub_i32(dst, b, a);
5248 }
5249 
5250 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5251 {
5252     gen_sub_CC(dst, b, a);
5253 }
5254 
5255 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5256 {
5257     gen_sub_carry(dest, b, a);
5258 }
5259 
5260 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5261 {
5262     gen_sbc_CC(dest, b, a);
5263 }
5264 
5265 /*
5266  * Helpers for the data processing routines.
5267  *
5268  * After the computation store the results back.
5269  * This may be suppressed altogether (STREG_NONE), require a runtime
5270  * check against the stack limits (STREG_SP_CHECK), or generate an
5271  * exception return.  Oh, or store into a register.
5272  *
5273  * Always return true, indicating success for a trans_* function.
5274  */
5275 typedef enum {
5276    STREG_NONE,
5277    STREG_NORMAL,
5278    STREG_SP_CHECK,
5279    STREG_EXC_RET,
5280 } StoreRegKind;
5281 
5282 static bool store_reg_kind(DisasContext *s, int rd,
5283                             TCGv_i32 val, StoreRegKind kind)
5284 {
5285     switch (kind) {
5286     case STREG_NONE:
5287         return true;
5288     case STREG_NORMAL:
5289         /* See ALUWritePC: Interworking only from a32 mode. */
5290         if (s->thumb) {
5291             store_reg(s, rd, val);
5292         } else {
5293             store_reg_bx(s, rd, val);
5294         }
5295         return true;
5296     case STREG_SP_CHECK:
5297         store_sp_checked(s, val);
5298         return true;
5299     case STREG_EXC_RET:
5300         gen_exception_return(s, val);
5301         return true;
5302     }
5303     g_assert_not_reached();
5304 }
5305 
5306 /*
5307  * Data Processing (register)
5308  *
5309  * Operate, with set flags, one register source,
5310  * one immediate shifted register source, and a destination.
5311  */
5312 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5313                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5314                          int logic_cc, StoreRegKind kind)
5315 {
5316     TCGv_i32 tmp1, tmp2;
5317 
5318     tmp2 = load_reg(s, a->rm);
5319     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5320     tmp1 = load_reg(s, a->rn);
5321 
5322     gen(tmp1, tmp1, tmp2);
5323 
5324     if (logic_cc) {
5325         gen_logic_CC(tmp1);
5326     }
5327     return store_reg_kind(s, a->rd, tmp1, kind);
5328 }
5329 
5330 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5331                          void (*gen)(TCGv_i32, TCGv_i32),
5332                          int logic_cc, StoreRegKind kind)
5333 {
5334     TCGv_i32 tmp;
5335 
5336     tmp = load_reg(s, a->rm);
5337     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5338 
5339     gen(tmp, tmp);
5340     if (logic_cc) {
5341         gen_logic_CC(tmp);
5342     }
5343     return store_reg_kind(s, a->rd, tmp, kind);
5344 }
5345 
5346 /*
5347  * Data-processing (register-shifted register)
5348  *
5349  * Operate, with set flags, one register source,
5350  * one register shifted register source, and a destination.
5351  */
5352 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5353                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5354                          int logic_cc, StoreRegKind kind)
5355 {
5356     TCGv_i32 tmp1, tmp2;
5357 
5358     tmp1 = load_reg(s, a->rs);
5359     tmp2 = load_reg(s, a->rm);
5360     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5361     tmp1 = load_reg(s, a->rn);
5362 
5363     gen(tmp1, tmp1, tmp2);
5364 
5365     if (logic_cc) {
5366         gen_logic_CC(tmp1);
5367     }
5368     return store_reg_kind(s, a->rd, tmp1, kind);
5369 }
5370 
5371 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5372                          void (*gen)(TCGv_i32, TCGv_i32),
5373                          int logic_cc, StoreRegKind kind)
5374 {
5375     TCGv_i32 tmp1, tmp2;
5376 
5377     tmp1 = load_reg(s, a->rs);
5378     tmp2 = load_reg(s, a->rm);
5379     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5380 
5381     gen(tmp2, tmp2);
5382     if (logic_cc) {
5383         gen_logic_CC(tmp2);
5384     }
5385     return store_reg_kind(s, a->rd, tmp2, kind);
5386 }
5387 
5388 /*
5389  * Data-processing (immediate)
5390  *
5391  * Operate, with set flags, one register source,
5392  * one rotated immediate, and a destination.
5393  *
5394  * Note that logic_cc && a->rot setting CF based on the msb of the
5395  * immediate is the reason why we must pass in the unrotated form
5396  * of the immediate.
5397  */
5398 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5399                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5400                          int logic_cc, StoreRegKind kind)
5401 {
5402     TCGv_i32 tmp1;
5403     uint32_t imm;
5404 
5405     imm = ror32(a->imm, a->rot);
5406     if (logic_cc && a->rot) {
5407         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5408     }
5409     tmp1 = load_reg(s, a->rn);
5410 
5411     gen(tmp1, tmp1, tcg_constant_i32(imm));
5412 
5413     if (logic_cc) {
5414         gen_logic_CC(tmp1);
5415     }
5416     return store_reg_kind(s, a->rd, tmp1, kind);
5417 }
5418 
5419 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5420                          void (*gen)(TCGv_i32, TCGv_i32),
5421                          int logic_cc, StoreRegKind kind)
5422 {
5423     TCGv_i32 tmp;
5424     uint32_t imm;
5425 
5426     imm = ror32(a->imm, a->rot);
5427     if (logic_cc && a->rot) {
5428         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5429     }
5430 
5431     tmp = tcg_temp_new_i32();
5432     gen(tmp, tcg_constant_i32(imm));
5433 
5434     if (logic_cc) {
5435         gen_logic_CC(tmp);
5436     }
5437     return store_reg_kind(s, a->rd, tmp, kind);
5438 }
5439 
5440 #define DO_ANY3(NAME, OP, L, K)                                         \
5441     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5442     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5443     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5444     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5445     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5446     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5447 
5448 #define DO_ANY2(NAME, OP, L, K)                                         \
5449     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5450     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5451     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5452     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5453     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5454     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5455 
5456 #define DO_CMP2(NAME, OP, L)                                            \
5457     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5458     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5459     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5460     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5461     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5462     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5463 
5464 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5465 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5466 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5467 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5468 
5469 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5470 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5471 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5472 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5473 
5474 DO_CMP2(TST, tcg_gen_and_i32, true)
5475 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5476 DO_CMP2(CMN, gen_add_CC, false)
5477 DO_CMP2(CMP, gen_sub_CC, false)
5478 
5479 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5480         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5481 
5482 /*
5483  * Note for the computation of StoreRegKind we return out of the
5484  * middle of the functions that are expanded by DO_ANY3, and that
5485  * we modify a->s via that parameter before it is used by OP.
5486  */
5487 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5488         ({
5489             StoreRegKind ret = STREG_NORMAL;
5490             if (a->rd == 15 && a->s) {
5491                 /*
5492                  * See ALUExceptionReturn:
5493                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5494                  * In Hyp mode, UNDEFINED.
5495                  */
5496                 if (IS_USER(s) || s->current_el == 2) {
5497                     unallocated_encoding(s);
5498                     return true;
5499                 }
5500                 /* There is no writeback of nzcv to PSTATE.  */
5501                 a->s = 0;
5502                 ret = STREG_EXC_RET;
5503             } else if (a->rd == 13 && a->rn == 13) {
5504                 ret = STREG_SP_CHECK;
5505             }
5506             ret;
5507         }))
5508 
5509 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5510         ({
5511             StoreRegKind ret = STREG_NORMAL;
5512             if (a->rd == 15 && a->s) {
5513                 /*
5514                  * See ALUExceptionReturn:
5515                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5516                  * In Hyp mode, UNDEFINED.
5517                  */
5518                 if (IS_USER(s) || s->current_el == 2) {
5519                     unallocated_encoding(s);
5520                     return true;
5521                 }
5522                 /* There is no writeback of nzcv to PSTATE.  */
5523                 a->s = 0;
5524                 ret = STREG_EXC_RET;
5525             } else if (a->rd == 13) {
5526                 ret = STREG_SP_CHECK;
5527             }
5528             ret;
5529         }))
5530 
5531 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5532 
5533 /*
5534  * ORN is only available with T32, so there is no register-shifted-register
5535  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5536  */
5537 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5538 {
5539     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5540 }
5541 
5542 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5543 {
5544     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5545 }
5546 
5547 #undef DO_ANY3
5548 #undef DO_ANY2
5549 #undef DO_CMP2
5550 
5551 static bool trans_ADR(DisasContext *s, arg_ri *a)
5552 {
5553     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5554     return true;
5555 }
5556 
5557 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5558 {
5559     if (!ENABLE_ARCH_6T2) {
5560         return false;
5561     }
5562 
5563     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5564     return true;
5565 }
5566 
5567 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5568 {
5569     TCGv_i32 tmp;
5570 
5571     if (!ENABLE_ARCH_6T2) {
5572         return false;
5573     }
5574 
5575     tmp = load_reg(s, a->rd);
5576     tcg_gen_ext16u_i32(tmp, tmp);
5577     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5578     store_reg(s, a->rd, tmp);
5579     return true;
5580 }
5581 
5582 /*
5583  * v8.1M MVE wide-shifts
5584  */
5585 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5586                           WideShiftImmFn *fn)
5587 {
5588     TCGv_i64 rda;
5589     TCGv_i32 rdalo, rdahi;
5590 
5591     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5592         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5593         return false;
5594     }
5595     if (a->rdahi == 15) {
5596         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5597         return false;
5598     }
5599     if (!dc_isar_feature(aa32_mve, s) ||
5600         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5601         a->rdahi == 13) {
5602         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5603         unallocated_encoding(s);
5604         return true;
5605     }
5606 
5607     if (a->shim == 0) {
5608         a->shim = 32;
5609     }
5610 
5611     rda = tcg_temp_new_i64();
5612     rdalo = load_reg(s, a->rdalo);
5613     rdahi = load_reg(s, a->rdahi);
5614     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5615 
5616     fn(rda, rda, a->shim);
5617 
5618     tcg_gen_extrl_i64_i32(rdalo, rda);
5619     tcg_gen_extrh_i64_i32(rdahi, rda);
5620     store_reg(s, a->rdalo, rdalo);
5621     store_reg(s, a->rdahi, rdahi);
5622 
5623     return true;
5624 }
5625 
5626 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5627 {
5628     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5629 }
5630 
5631 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5632 {
5633     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5634 }
5635 
5636 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5637 {
5638     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5639 }
5640 
5641 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5642 {
5643     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5644 }
5645 
5646 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5647 {
5648     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5649 }
5650 
5651 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5652 {
5653     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5654 }
5655 
5656 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5657 {
5658     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5659 }
5660 
5661 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5662 {
5663     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5664 }
5665 
5666 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5667 {
5668     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5669 }
5670 
5671 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5672 {
5673     TCGv_i64 rda;
5674     TCGv_i32 rdalo, rdahi;
5675 
5676     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5677         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5678         return false;
5679     }
5680     if (a->rdahi == 15) {
5681         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5682         return false;
5683     }
5684     if (!dc_isar_feature(aa32_mve, s) ||
5685         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5686         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5687         a->rm == a->rdahi || a->rm == a->rdalo) {
5688         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5689         unallocated_encoding(s);
5690         return true;
5691     }
5692 
5693     rda = tcg_temp_new_i64();
5694     rdalo = load_reg(s, a->rdalo);
5695     rdahi = load_reg(s, a->rdahi);
5696     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5697 
5698     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5699     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5700 
5701     tcg_gen_extrl_i64_i32(rdalo, rda);
5702     tcg_gen_extrh_i64_i32(rdahi, rda);
5703     store_reg(s, a->rdalo, rdalo);
5704     store_reg(s, a->rdahi, rdahi);
5705 
5706     return true;
5707 }
5708 
5709 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5710 {
5711     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5712 }
5713 
5714 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5715 {
5716     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5717 }
5718 
5719 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5720 {
5721     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5722 }
5723 
5724 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5725 {
5726     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5727 }
5728 
5729 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5730 {
5731     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5732 }
5733 
5734 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5735 {
5736     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5737 }
5738 
5739 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5740 {
5741     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5742         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5743         return false;
5744     }
5745     if (!dc_isar_feature(aa32_mve, s) ||
5746         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5747         a->rda == 13 || a->rda == 15) {
5748         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5749         unallocated_encoding(s);
5750         return true;
5751     }
5752 
5753     if (a->shim == 0) {
5754         a->shim = 32;
5755     }
5756     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5757 
5758     return true;
5759 }
5760 
5761 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5762 {
5763     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5764 }
5765 
5766 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5767 {
5768     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5769 }
5770 
5771 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5772 {
5773     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5774 }
5775 
5776 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5777 {
5778     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5779 }
5780 
5781 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5782 {
5783     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5784 }
5785 
5786 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5787 {
5788     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5789 }
5790 
5791 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5792 {
5793     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5794         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5795         return false;
5796     }
5797     if (!dc_isar_feature(aa32_mve, s) ||
5798         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5799         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5800         a->rm == a->rda) {
5801         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5802         unallocated_encoding(s);
5803         return true;
5804     }
5805 
5806     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5807     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5808     return true;
5809 }
5810 
5811 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5812 {
5813     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5814 }
5815 
5816 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5817 {
5818     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5819 }
5820 
5821 /*
5822  * Multiply and multiply accumulate
5823  */
5824 
5825 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5826 {
5827     TCGv_i32 t1, t2;
5828 
5829     t1 = load_reg(s, a->rn);
5830     t2 = load_reg(s, a->rm);
5831     tcg_gen_mul_i32(t1, t1, t2);
5832     if (add) {
5833         t2 = load_reg(s, a->ra);
5834         tcg_gen_add_i32(t1, t1, t2);
5835     }
5836     if (a->s) {
5837         gen_logic_CC(t1);
5838     }
5839     store_reg(s, a->rd, t1);
5840     return true;
5841 }
5842 
5843 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5844 {
5845     return op_mla(s, a, false);
5846 }
5847 
5848 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5849 {
5850     return op_mla(s, a, true);
5851 }
5852 
5853 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5854 {
5855     TCGv_i32 t1, t2;
5856 
5857     if (!ENABLE_ARCH_6T2) {
5858         return false;
5859     }
5860     t1 = load_reg(s, a->rn);
5861     t2 = load_reg(s, a->rm);
5862     tcg_gen_mul_i32(t1, t1, t2);
5863     t2 = load_reg(s, a->ra);
5864     tcg_gen_sub_i32(t1, t2, t1);
5865     store_reg(s, a->rd, t1);
5866     return true;
5867 }
5868 
5869 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5870 {
5871     TCGv_i32 t0, t1, t2, t3;
5872 
5873     t0 = load_reg(s, a->rm);
5874     t1 = load_reg(s, a->rn);
5875     if (uns) {
5876         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5877     } else {
5878         tcg_gen_muls2_i32(t0, t1, t0, t1);
5879     }
5880     if (add) {
5881         t2 = load_reg(s, a->ra);
5882         t3 = load_reg(s, a->rd);
5883         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5884     }
5885     if (a->s) {
5886         gen_logicq_cc(t0, t1);
5887     }
5888     store_reg(s, a->ra, t0);
5889     store_reg(s, a->rd, t1);
5890     return true;
5891 }
5892 
5893 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5894 {
5895     return op_mlal(s, a, true, false);
5896 }
5897 
5898 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5899 {
5900     return op_mlal(s, a, false, false);
5901 }
5902 
5903 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5904 {
5905     return op_mlal(s, a, true, true);
5906 }
5907 
5908 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5909 {
5910     return op_mlal(s, a, false, true);
5911 }
5912 
5913 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5914 {
5915     TCGv_i32 t0, t1, t2, zero;
5916 
5917     if (s->thumb
5918         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5919         : !ENABLE_ARCH_6) {
5920         return false;
5921     }
5922 
5923     t0 = load_reg(s, a->rm);
5924     t1 = load_reg(s, a->rn);
5925     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5926     zero = tcg_constant_i32(0);
5927     t2 = load_reg(s, a->ra);
5928     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5929     t2 = load_reg(s, a->rd);
5930     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5931     store_reg(s, a->ra, t0);
5932     store_reg(s, a->rd, t1);
5933     return true;
5934 }
5935 
5936 /*
5937  * Saturating addition and subtraction
5938  */
5939 
5940 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5941 {
5942     TCGv_i32 t0, t1;
5943 
5944     if (s->thumb
5945         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5946         : !ENABLE_ARCH_5TE) {
5947         return false;
5948     }
5949 
5950     t0 = load_reg(s, a->rm);
5951     t1 = load_reg(s, a->rn);
5952     if (doub) {
5953         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5954     }
5955     if (add) {
5956         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5957     } else {
5958         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5959     }
5960     store_reg(s, a->rd, t0);
5961     return true;
5962 }
5963 
5964 #define DO_QADDSUB(NAME, ADD, DOUB) \
5965 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5966 {                                                        \
5967     return op_qaddsub(s, a, ADD, DOUB);                  \
5968 }
5969 
5970 DO_QADDSUB(QADD, true, false)
5971 DO_QADDSUB(QSUB, false, false)
5972 DO_QADDSUB(QDADD, true, true)
5973 DO_QADDSUB(QDSUB, false, true)
5974 
5975 #undef DO_QADDSUB
5976 
5977 /*
5978  * Halfword multiply and multiply accumulate
5979  */
5980 
5981 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5982                        int add_long, bool nt, bool mt)
5983 {
5984     TCGv_i32 t0, t1, tl, th;
5985 
5986     if (s->thumb
5987         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5988         : !ENABLE_ARCH_5TE) {
5989         return false;
5990     }
5991 
5992     t0 = load_reg(s, a->rn);
5993     t1 = load_reg(s, a->rm);
5994     gen_mulxy(t0, t1, nt, mt);
5995 
5996     switch (add_long) {
5997     case 0:
5998         store_reg(s, a->rd, t0);
5999         break;
6000     case 1:
6001         t1 = load_reg(s, a->ra);
6002         gen_helper_add_setq(t0, cpu_env, t0, t1);
6003         store_reg(s, a->rd, t0);
6004         break;
6005     case 2:
6006         tl = load_reg(s, a->ra);
6007         th = load_reg(s, a->rd);
6008         /* Sign-extend the 32-bit product to 64 bits.  */
6009         t1 = tcg_temp_new_i32();
6010         tcg_gen_sari_i32(t1, t0, 31);
6011         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6012         store_reg(s, a->ra, tl);
6013         store_reg(s, a->rd, th);
6014         break;
6015     default:
6016         g_assert_not_reached();
6017     }
6018     return true;
6019 }
6020 
6021 #define DO_SMLAX(NAME, add, nt, mt) \
6022 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6023 {                                                          \
6024     return op_smlaxxx(s, a, add, nt, mt);                  \
6025 }
6026 
6027 DO_SMLAX(SMULBB, 0, 0, 0)
6028 DO_SMLAX(SMULBT, 0, 0, 1)
6029 DO_SMLAX(SMULTB, 0, 1, 0)
6030 DO_SMLAX(SMULTT, 0, 1, 1)
6031 
6032 DO_SMLAX(SMLABB, 1, 0, 0)
6033 DO_SMLAX(SMLABT, 1, 0, 1)
6034 DO_SMLAX(SMLATB, 1, 1, 0)
6035 DO_SMLAX(SMLATT, 1, 1, 1)
6036 
6037 DO_SMLAX(SMLALBB, 2, 0, 0)
6038 DO_SMLAX(SMLALBT, 2, 0, 1)
6039 DO_SMLAX(SMLALTB, 2, 1, 0)
6040 DO_SMLAX(SMLALTT, 2, 1, 1)
6041 
6042 #undef DO_SMLAX
6043 
6044 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6045 {
6046     TCGv_i32 t0, t1;
6047 
6048     if (!ENABLE_ARCH_5TE) {
6049         return false;
6050     }
6051 
6052     t0 = load_reg(s, a->rn);
6053     t1 = load_reg(s, a->rm);
6054     /*
6055      * Since the nominal result is product<47:16>, shift the 16-bit
6056      * input up by 16 bits, so that the result is at product<63:32>.
6057      */
6058     if (mt) {
6059         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6060     } else {
6061         tcg_gen_shli_i32(t1, t1, 16);
6062     }
6063     tcg_gen_muls2_i32(t0, t1, t0, t1);
6064     if (add) {
6065         t0 = load_reg(s, a->ra);
6066         gen_helper_add_setq(t1, cpu_env, t1, t0);
6067     }
6068     store_reg(s, a->rd, t1);
6069     return true;
6070 }
6071 
6072 #define DO_SMLAWX(NAME, add, mt) \
6073 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6074 {                                                          \
6075     return op_smlawx(s, a, add, mt);                       \
6076 }
6077 
6078 DO_SMLAWX(SMULWB, 0, 0)
6079 DO_SMLAWX(SMULWT, 0, 1)
6080 DO_SMLAWX(SMLAWB, 1, 0)
6081 DO_SMLAWX(SMLAWT, 1, 1)
6082 
6083 #undef DO_SMLAWX
6084 
6085 /*
6086  * MSR (immediate) and hints
6087  */
6088 
6089 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6090 {
6091     /*
6092      * When running single-threaded TCG code, use the helper to ensure that
6093      * the next round-robin scheduled vCPU gets a crack.  When running in
6094      * MTTCG we don't generate jumps to the helper as it won't affect the
6095      * scheduling of other vCPUs.
6096      */
6097     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6098         gen_update_pc(s, curr_insn_len(s));
6099         s->base.is_jmp = DISAS_YIELD;
6100     }
6101     return true;
6102 }
6103 
6104 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6105 {
6106     /*
6107      * When running single-threaded TCG code, use the helper to ensure that
6108      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6109      * just skip this instruction.  Currently the SEV/SEVL instructions,
6110      * which are *one* of many ways to wake the CPU from WFE, are not
6111      * implemented so we can't sleep like WFI does.
6112      */
6113     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6114         gen_update_pc(s, curr_insn_len(s));
6115         s->base.is_jmp = DISAS_WFE;
6116     }
6117     return true;
6118 }
6119 
6120 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6121 {
6122     /* For WFI, halt the vCPU until an IRQ. */
6123     gen_update_pc(s, curr_insn_len(s));
6124     s->base.is_jmp = DISAS_WFI;
6125     return true;
6126 }
6127 
6128 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6129 {
6130     /*
6131      * For M-profile, minimal-RAS ESB can be a NOP.
6132      * Without RAS, we must implement this as NOP.
6133      */
6134     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6135         /*
6136          * QEMU does not have a source of physical SErrors,
6137          * so we are only concerned with virtual SErrors.
6138          * The pseudocode in the ARM for this case is
6139          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6140          *      AArch32.vESBOperation();
6141          * Most of the condition can be evaluated at translation time.
6142          * Test for EL2 present, and defer test for SEL2 to runtime.
6143          */
6144         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6145             gen_helper_vesb(cpu_env);
6146         }
6147     }
6148     return true;
6149 }
6150 
6151 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6152 {
6153     return true;
6154 }
6155 
6156 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6157 {
6158     uint32_t val = ror32(a->imm, a->rot * 2);
6159     uint32_t mask = msr_mask(s, a->mask, a->r);
6160 
6161     if (gen_set_psr_im(s, mask, a->r, val)) {
6162         unallocated_encoding(s);
6163     }
6164     return true;
6165 }
6166 
6167 /*
6168  * Cyclic Redundancy Check
6169  */
6170 
6171 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6172 {
6173     TCGv_i32 t1, t2, t3;
6174 
6175     if (!dc_isar_feature(aa32_crc32, s)) {
6176         return false;
6177     }
6178 
6179     t1 = load_reg(s, a->rn);
6180     t2 = load_reg(s, a->rm);
6181     switch (sz) {
6182     case MO_8:
6183         gen_uxtb(t2);
6184         break;
6185     case MO_16:
6186         gen_uxth(t2);
6187         break;
6188     case MO_32:
6189         break;
6190     default:
6191         g_assert_not_reached();
6192     }
6193     t3 = tcg_constant_i32(1 << sz);
6194     if (c) {
6195         gen_helper_crc32c(t1, t1, t2, t3);
6196     } else {
6197         gen_helper_crc32(t1, t1, t2, t3);
6198     }
6199     store_reg(s, a->rd, t1);
6200     return true;
6201 }
6202 
6203 #define DO_CRC32(NAME, c, sz) \
6204 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6205     { return op_crc32(s, a, c, sz); }
6206 
6207 DO_CRC32(CRC32B, false, MO_8)
6208 DO_CRC32(CRC32H, false, MO_16)
6209 DO_CRC32(CRC32W, false, MO_32)
6210 DO_CRC32(CRC32CB, true, MO_8)
6211 DO_CRC32(CRC32CH, true, MO_16)
6212 DO_CRC32(CRC32CW, true, MO_32)
6213 
6214 #undef DO_CRC32
6215 
6216 /*
6217  * Miscellaneous instructions
6218  */
6219 
6220 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6221 {
6222     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6223         return false;
6224     }
6225     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6226     return true;
6227 }
6228 
6229 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6230 {
6231     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6232         return false;
6233     }
6234     gen_msr_banked(s, a->r, a->sysm, a->rn);
6235     return true;
6236 }
6237 
6238 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6239 {
6240     TCGv_i32 tmp;
6241 
6242     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6243         return false;
6244     }
6245     if (a->r) {
6246         if (IS_USER(s)) {
6247             unallocated_encoding(s);
6248             return true;
6249         }
6250         tmp = load_cpu_field(spsr);
6251     } else {
6252         tmp = tcg_temp_new_i32();
6253         gen_helper_cpsr_read(tmp, cpu_env);
6254     }
6255     store_reg(s, a->rd, tmp);
6256     return true;
6257 }
6258 
6259 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6260 {
6261     TCGv_i32 tmp;
6262     uint32_t mask = msr_mask(s, a->mask, a->r);
6263 
6264     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6265         return false;
6266     }
6267     tmp = load_reg(s, a->rn);
6268     if (gen_set_psr(s, mask, a->r, tmp)) {
6269         unallocated_encoding(s);
6270     }
6271     return true;
6272 }
6273 
6274 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6275 {
6276     TCGv_i32 tmp;
6277 
6278     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6279         return false;
6280     }
6281     tmp = tcg_temp_new_i32();
6282     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6283     store_reg(s, a->rd, tmp);
6284     return true;
6285 }
6286 
6287 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6288 {
6289     TCGv_i32 addr, reg;
6290 
6291     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6292         return false;
6293     }
6294     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6295     reg = load_reg(s, a->rn);
6296     gen_helper_v7m_msr(cpu_env, addr, reg);
6297     /* If we wrote to CONTROL, the EL might have changed */
6298     gen_rebuild_hflags(s, true);
6299     gen_lookup_tb(s);
6300     return true;
6301 }
6302 
6303 static bool trans_BX(DisasContext *s, arg_BX *a)
6304 {
6305     if (!ENABLE_ARCH_4T) {
6306         return false;
6307     }
6308     gen_bx_excret(s, load_reg(s, a->rm));
6309     return true;
6310 }
6311 
6312 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6313 {
6314     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6315         return false;
6316     }
6317     /*
6318      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6319      * TBFLAGS bit on a basically-never-happens case, so call a helper
6320      * function to check for the trap and raise the exception if needed
6321      * (passing it the register number for the syndrome value).
6322      * v8A doesn't have this HSTR bit.
6323      */
6324     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6325         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6326         s->current_el < 2 && s->ns) {
6327         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6328     }
6329     /* Trivial implementation equivalent to bx.  */
6330     gen_bx(s, load_reg(s, a->rm));
6331     return true;
6332 }
6333 
6334 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6335 {
6336     TCGv_i32 tmp;
6337 
6338     if (!ENABLE_ARCH_5) {
6339         return false;
6340     }
6341     tmp = load_reg(s, a->rm);
6342     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6343     gen_bx(s, tmp);
6344     return true;
6345 }
6346 
6347 /*
6348  * BXNS/BLXNS: only exist for v8M with the security extensions,
6349  * and always UNDEF if NonSecure.  We don't implement these in
6350  * the user-only mode either (in theory you can use them from
6351  * Secure User mode but they are too tied in to system emulation).
6352  */
6353 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6354 {
6355     if (!s->v8m_secure || IS_USER_ONLY) {
6356         unallocated_encoding(s);
6357     } else {
6358         gen_bxns(s, a->rm);
6359     }
6360     return true;
6361 }
6362 
6363 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6364 {
6365     if (!s->v8m_secure || IS_USER_ONLY) {
6366         unallocated_encoding(s);
6367     } else {
6368         gen_blxns(s, a->rm);
6369     }
6370     return true;
6371 }
6372 
6373 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6374 {
6375     TCGv_i32 tmp;
6376 
6377     if (!ENABLE_ARCH_5) {
6378         return false;
6379     }
6380     tmp = load_reg(s, a->rm);
6381     tcg_gen_clzi_i32(tmp, tmp, 32);
6382     store_reg(s, a->rd, tmp);
6383     return true;
6384 }
6385 
6386 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6387 {
6388     TCGv_i32 tmp;
6389 
6390     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6391         return false;
6392     }
6393     if (IS_USER(s)) {
6394         unallocated_encoding(s);
6395         return true;
6396     }
6397     if (s->current_el == 2) {
6398         /* ERET from Hyp uses ELR_Hyp, not LR */
6399         tmp = load_cpu_field_low32(elr_el[2]);
6400     } else {
6401         tmp = load_reg(s, 14);
6402     }
6403     gen_exception_return(s, tmp);
6404     return true;
6405 }
6406 
6407 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6408 {
6409     gen_hlt(s, a->imm);
6410     return true;
6411 }
6412 
6413 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6414 {
6415     if (!ENABLE_ARCH_5) {
6416         return false;
6417     }
6418     /* BKPT is OK with ECI set and leaves it untouched */
6419     s->eci_handled = true;
6420     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6421         semihosting_enabled(s->current_el == 0) &&
6422         (a->imm == 0xab)) {
6423         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6424     } else {
6425         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6426     }
6427     return true;
6428 }
6429 
6430 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6431 {
6432     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6433         return false;
6434     }
6435     if (IS_USER(s)) {
6436         unallocated_encoding(s);
6437     } else {
6438         gen_hvc(s, a->imm);
6439     }
6440     return true;
6441 }
6442 
6443 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6444 {
6445     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6446         return false;
6447     }
6448     if (IS_USER(s)) {
6449         unallocated_encoding(s);
6450     } else {
6451         gen_smc(s);
6452     }
6453     return true;
6454 }
6455 
6456 static bool trans_SG(DisasContext *s, arg_SG *a)
6457 {
6458     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6459         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6460         return false;
6461     }
6462     /*
6463      * SG (v8M only)
6464      * The bulk of the behaviour for this instruction is implemented
6465      * in v7m_handle_execute_nsc(), which deals with the insn when
6466      * it is executed by a CPU in non-secure state from memory
6467      * which is Secure & NonSecure-Callable.
6468      * Here we only need to handle the remaining cases:
6469      *  * in NS memory (including the "security extension not
6470      *    implemented" case) : NOP
6471      *  * in S memory but CPU already secure (clear IT bits)
6472      * We know that the attribute for the memory this insn is
6473      * in must match the current CPU state, because otherwise
6474      * get_phys_addr_pmsav8 would have generated an exception.
6475      */
6476     if (s->v8m_secure) {
6477         /* Like the IT insn, we don't need to generate any code */
6478         s->condexec_cond = 0;
6479         s->condexec_mask = 0;
6480     }
6481     return true;
6482 }
6483 
6484 static bool trans_TT(DisasContext *s, arg_TT *a)
6485 {
6486     TCGv_i32 addr, tmp;
6487 
6488     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6489         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6490         return false;
6491     }
6492     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6493         /* We UNDEF for these UNPREDICTABLE cases */
6494         unallocated_encoding(s);
6495         return true;
6496     }
6497     if (a->A && !s->v8m_secure) {
6498         /* This case is UNDEFINED.  */
6499         unallocated_encoding(s);
6500         return true;
6501     }
6502 
6503     addr = load_reg(s, a->rn);
6504     tmp = tcg_temp_new_i32();
6505     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6506     store_reg(s, a->rd, tmp);
6507     return true;
6508 }
6509 
6510 /*
6511  * Load/store register index
6512  */
6513 
6514 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6515 {
6516     ISSInfo ret;
6517 
6518     /* ISS not valid if writeback */
6519     if (p && !w) {
6520         ret = rd;
6521         if (curr_insn_len(s) == 2) {
6522             ret |= ISSIs16Bit;
6523         }
6524     } else {
6525         ret = ISSInvalid;
6526     }
6527     return ret;
6528 }
6529 
6530 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6531 {
6532     TCGv_i32 addr = load_reg(s, a->rn);
6533 
6534     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6535         gen_helper_v8m_stackcheck(cpu_env, addr);
6536     }
6537 
6538     if (a->p) {
6539         TCGv_i32 ofs = load_reg(s, a->rm);
6540         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6541         if (a->u) {
6542             tcg_gen_add_i32(addr, addr, ofs);
6543         } else {
6544             tcg_gen_sub_i32(addr, addr, ofs);
6545         }
6546     }
6547     return addr;
6548 }
6549 
6550 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6551                             TCGv_i32 addr, int address_offset)
6552 {
6553     if (!a->p) {
6554         TCGv_i32 ofs = load_reg(s, a->rm);
6555         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6556         if (a->u) {
6557             tcg_gen_add_i32(addr, addr, ofs);
6558         } else {
6559             tcg_gen_sub_i32(addr, addr, ofs);
6560         }
6561     } else if (!a->w) {
6562         return;
6563     }
6564     tcg_gen_addi_i32(addr, addr, address_offset);
6565     store_reg(s, a->rn, addr);
6566 }
6567 
6568 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6569                        MemOp mop, int mem_idx)
6570 {
6571     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6572     TCGv_i32 addr, tmp;
6573 
6574     addr = op_addr_rr_pre(s, a);
6575 
6576     tmp = tcg_temp_new_i32();
6577     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6578     disas_set_da_iss(s, mop, issinfo);
6579 
6580     /*
6581      * Perform base writeback before the loaded value to
6582      * ensure correct behavior with overlapping index registers.
6583      */
6584     op_addr_rr_post(s, a, addr, 0);
6585     store_reg_from_load(s, a->rt, tmp);
6586     return true;
6587 }
6588 
6589 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6590                         MemOp mop, int mem_idx)
6591 {
6592     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6593     TCGv_i32 addr, tmp;
6594 
6595     /*
6596      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6597      * is either UNPREDICTABLE or has defined behaviour
6598      */
6599     if (s->thumb && a->rn == 15) {
6600         return false;
6601     }
6602 
6603     addr = op_addr_rr_pre(s, a);
6604 
6605     tmp = load_reg(s, a->rt);
6606     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6607     disas_set_da_iss(s, mop, issinfo);
6608 
6609     op_addr_rr_post(s, a, addr, 0);
6610     return true;
6611 }
6612 
6613 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6614 {
6615     int mem_idx = get_mem_index(s);
6616     TCGv_i32 addr, tmp;
6617 
6618     if (!ENABLE_ARCH_5TE) {
6619         return false;
6620     }
6621     if (a->rt & 1) {
6622         unallocated_encoding(s);
6623         return true;
6624     }
6625     addr = op_addr_rr_pre(s, a);
6626 
6627     tmp = tcg_temp_new_i32();
6628     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6629     store_reg(s, a->rt, tmp);
6630 
6631     tcg_gen_addi_i32(addr, addr, 4);
6632 
6633     tmp = tcg_temp_new_i32();
6634     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6635     store_reg(s, a->rt + 1, tmp);
6636 
6637     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6638     op_addr_rr_post(s, a, addr, -4);
6639     return true;
6640 }
6641 
6642 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6643 {
6644     int mem_idx = get_mem_index(s);
6645     TCGv_i32 addr, tmp;
6646 
6647     if (!ENABLE_ARCH_5TE) {
6648         return false;
6649     }
6650     if (a->rt & 1) {
6651         unallocated_encoding(s);
6652         return true;
6653     }
6654     addr = op_addr_rr_pre(s, a);
6655 
6656     tmp = load_reg(s, a->rt);
6657     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6658 
6659     tcg_gen_addi_i32(addr, addr, 4);
6660 
6661     tmp = load_reg(s, a->rt + 1);
6662     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6663 
6664     op_addr_rr_post(s, a, addr, -4);
6665     return true;
6666 }
6667 
6668 /*
6669  * Load/store immediate index
6670  */
6671 
6672 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6673 {
6674     int ofs = a->imm;
6675 
6676     if (!a->u) {
6677         ofs = -ofs;
6678     }
6679 
6680     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6681         /*
6682          * Stackcheck. Here we know 'addr' is the current SP;
6683          * U is set if we're moving SP up, else down. It is
6684          * UNKNOWN whether the limit check triggers when SP starts
6685          * below the limit and ends up above it; we chose to do so.
6686          */
6687         if (!a->u) {
6688             TCGv_i32 newsp = tcg_temp_new_i32();
6689             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6690             gen_helper_v8m_stackcheck(cpu_env, newsp);
6691         } else {
6692             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6693         }
6694     }
6695 
6696     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6697 }
6698 
6699 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6700                             TCGv_i32 addr, int address_offset)
6701 {
6702     if (!a->p) {
6703         if (a->u) {
6704             address_offset += a->imm;
6705         } else {
6706             address_offset -= a->imm;
6707         }
6708     } else if (!a->w) {
6709         return;
6710     }
6711     tcg_gen_addi_i32(addr, addr, address_offset);
6712     store_reg(s, a->rn, addr);
6713 }
6714 
6715 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6716                        MemOp mop, int mem_idx)
6717 {
6718     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6719     TCGv_i32 addr, tmp;
6720 
6721     addr = op_addr_ri_pre(s, a);
6722 
6723     tmp = tcg_temp_new_i32();
6724     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6725     disas_set_da_iss(s, mop, issinfo);
6726 
6727     /*
6728      * Perform base writeback before the loaded value to
6729      * ensure correct behavior with overlapping index registers.
6730      */
6731     op_addr_ri_post(s, a, addr, 0);
6732     store_reg_from_load(s, a->rt, tmp);
6733     return true;
6734 }
6735 
6736 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6737                         MemOp mop, int mem_idx)
6738 {
6739     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6740     TCGv_i32 addr, tmp;
6741 
6742     /*
6743      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6744      * is either UNPREDICTABLE or has defined behaviour
6745      */
6746     if (s->thumb && a->rn == 15) {
6747         return false;
6748     }
6749 
6750     addr = op_addr_ri_pre(s, a);
6751 
6752     tmp = load_reg(s, a->rt);
6753     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6754     disas_set_da_iss(s, mop, issinfo);
6755 
6756     op_addr_ri_post(s, a, addr, 0);
6757     return true;
6758 }
6759 
6760 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6761 {
6762     int mem_idx = get_mem_index(s);
6763     TCGv_i32 addr, tmp;
6764 
6765     addr = op_addr_ri_pre(s, a);
6766 
6767     tmp = tcg_temp_new_i32();
6768     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6769     store_reg(s, a->rt, tmp);
6770 
6771     tcg_gen_addi_i32(addr, addr, 4);
6772 
6773     tmp = tcg_temp_new_i32();
6774     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6775     store_reg(s, rt2, tmp);
6776 
6777     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6778     op_addr_ri_post(s, a, addr, -4);
6779     return true;
6780 }
6781 
6782 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6783 {
6784     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6785         return false;
6786     }
6787     return op_ldrd_ri(s, a, a->rt + 1);
6788 }
6789 
6790 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6791 {
6792     arg_ldst_ri b = {
6793         .u = a->u, .w = a->w, .p = a->p,
6794         .rn = a->rn, .rt = a->rt, .imm = a->imm
6795     };
6796     return op_ldrd_ri(s, &b, a->rt2);
6797 }
6798 
6799 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6800 {
6801     int mem_idx = get_mem_index(s);
6802     TCGv_i32 addr, tmp;
6803 
6804     addr = op_addr_ri_pre(s, a);
6805 
6806     tmp = load_reg(s, a->rt);
6807     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6808 
6809     tcg_gen_addi_i32(addr, addr, 4);
6810 
6811     tmp = load_reg(s, rt2);
6812     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6813 
6814     op_addr_ri_post(s, a, addr, -4);
6815     return true;
6816 }
6817 
6818 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6819 {
6820     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6821         return false;
6822     }
6823     return op_strd_ri(s, a, a->rt + 1);
6824 }
6825 
6826 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6827 {
6828     arg_ldst_ri b = {
6829         .u = a->u, .w = a->w, .p = a->p,
6830         .rn = a->rn, .rt = a->rt, .imm = a->imm
6831     };
6832     return op_strd_ri(s, &b, a->rt2);
6833 }
6834 
6835 #define DO_LDST(NAME, WHICH, MEMOP) \
6836 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6837 {                                                                     \
6838     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6839 }                                                                     \
6840 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6841 {                                                                     \
6842     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6843 }                                                                     \
6844 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6845 {                                                                     \
6846     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6847 }                                                                     \
6848 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6849 {                                                                     \
6850     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6851 }
6852 
6853 DO_LDST(LDR, load, MO_UL)
6854 DO_LDST(LDRB, load, MO_UB)
6855 DO_LDST(LDRH, load, MO_UW)
6856 DO_LDST(LDRSB, load, MO_SB)
6857 DO_LDST(LDRSH, load, MO_SW)
6858 
6859 DO_LDST(STR, store, MO_UL)
6860 DO_LDST(STRB, store, MO_UB)
6861 DO_LDST(STRH, store, MO_UW)
6862 
6863 #undef DO_LDST
6864 
6865 /*
6866  * Synchronization primitives
6867  */
6868 
6869 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6870 {
6871     TCGv_i32 addr, tmp;
6872     TCGv taddr;
6873 
6874     opc |= s->be_data;
6875     addr = load_reg(s, a->rn);
6876     taddr = gen_aa32_addr(s, addr, opc);
6877 
6878     tmp = load_reg(s, a->rt2);
6879     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6880 
6881     store_reg(s, a->rt, tmp);
6882     return true;
6883 }
6884 
6885 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6886 {
6887     return op_swp(s, a, MO_UL | MO_ALIGN);
6888 }
6889 
6890 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6891 {
6892     return op_swp(s, a, MO_UB);
6893 }
6894 
6895 /*
6896  * Load/Store Exclusive and Load-Acquire/Store-Release
6897  */
6898 
6899 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6900 {
6901     TCGv_i32 addr;
6902     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6903     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6904 
6905     /* We UNDEF for these UNPREDICTABLE cases.  */
6906     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6907         || a->rd == a->rn || a->rd == a->rt
6908         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6909         || (mop == MO_64
6910             && (a->rt2 == 15
6911                 || a->rd == a->rt2
6912                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6913         unallocated_encoding(s);
6914         return true;
6915     }
6916 
6917     if (rel) {
6918         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6919     }
6920 
6921     addr = tcg_temp_new_i32();
6922     load_reg_var(s, addr, a->rn);
6923     tcg_gen_addi_i32(addr, addr, a->imm);
6924 
6925     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6926     return true;
6927 }
6928 
6929 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6930 {
6931     if (!ENABLE_ARCH_6) {
6932         return false;
6933     }
6934     return op_strex(s, a, MO_32, false);
6935 }
6936 
6937 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6938 {
6939     if (!ENABLE_ARCH_6K) {
6940         return false;
6941     }
6942     /* We UNDEF for these UNPREDICTABLE cases.  */
6943     if (a->rt & 1) {
6944         unallocated_encoding(s);
6945         return true;
6946     }
6947     a->rt2 = a->rt + 1;
6948     return op_strex(s, a, MO_64, false);
6949 }
6950 
6951 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6952 {
6953     return op_strex(s, a, MO_64, false);
6954 }
6955 
6956 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6957 {
6958     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6959         return false;
6960     }
6961     return op_strex(s, a, MO_8, false);
6962 }
6963 
6964 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6965 {
6966     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6967         return false;
6968     }
6969     return op_strex(s, a, MO_16, false);
6970 }
6971 
6972 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6973 {
6974     if (!ENABLE_ARCH_8) {
6975         return false;
6976     }
6977     return op_strex(s, a, MO_32, true);
6978 }
6979 
6980 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6981 {
6982     if (!ENABLE_ARCH_8) {
6983         return false;
6984     }
6985     /* We UNDEF for these UNPREDICTABLE cases.  */
6986     if (a->rt & 1) {
6987         unallocated_encoding(s);
6988         return true;
6989     }
6990     a->rt2 = a->rt + 1;
6991     return op_strex(s, a, MO_64, true);
6992 }
6993 
6994 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6995 {
6996     if (!ENABLE_ARCH_8) {
6997         return false;
6998     }
6999     return op_strex(s, a, MO_64, true);
7000 }
7001 
7002 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7003 {
7004     if (!ENABLE_ARCH_8) {
7005         return false;
7006     }
7007     return op_strex(s, a, MO_8, true);
7008 }
7009 
7010 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7011 {
7012     if (!ENABLE_ARCH_8) {
7013         return false;
7014     }
7015     return op_strex(s, a, MO_16, true);
7016 }
7017 
7018 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7019 {
7020     TCGv_i32 addr, tmp;
7021 
7022     if (!ENABLE_ARCH_8) {
7023         return false;
7024     }
7025     /* We UNDEF for these UNPREDICTABLE cases.  */
7026     if (a->rn == 15 || a->rt == 15) {
7027         unallocated_encoding(s);
7028         return true;
7029     }
7030 
7031     addr = load_reg(s, a->rn);
7032     tmp = load_reg(s, a->rt);
7033     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7034     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7035     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7036 
7037     return true;
7038 }
7039 
7040 static bool trans_STL(DisasContext *s, arg_STL *a)
7041 {
7042     return op_stl(s, a, MO_UL);
7043 }
7044 
7045 static bool trans_STLB(DisasContext *s, arg_STL *a)
7046 {
7047     return op_stl(s, a, MO_UB);
7048 }
7049 
7050 static bool trans_STLH(DisasContext *s, arg_STL *a)
7051 {
7052     return op_stl(s, a, MO_UW);
7053 }
7054 
7055 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7056 {
7057     TCGv_i32 addr;
7058     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7059     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7060 
7061     /* We UNDEF for these UNPREDICTABLE cases.  */
7062     if (a->rn == 15 || a->rt == 15
7063         || (!v8a && s->thumb && a->rt == 13)
7064         || (mop == MO_64
7065             && (a->rt2 == 15 || a->rt == a->rt2
7066                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7067         unallocated_encoding(s);
7068         return true;
7069     }
7070 
7071     addr = tcg_temp_new_i32();
7072     load_reg_var(s, addr, a->rn);
7073     tcg_gen_addi_i32(addr, addr, a->imm);
7074 
7075     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7076 
7077     if (acq) {
7078         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7079     }
7080     return true;
7081 }
7082 
7083 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7084 {
7085     if (!ENABLE_ARCH_6) {
7086         return false;
7087     }
7088     return op_ldrex(s, a, MO_32, false);
7089 }
7090 
7091 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7092 {
7093     if (!ENABLE_ARCH_6K) {
7094         return false;
7095     }
7096     /* We UNDEF for these UNPREDICTABLE cases.  */
7097     if (a->rt & 1) {
7098         unallocated_encoding(s);
7099         return true;
7100     }
7101     a->rt2 = a->rt + 1;
7102     return op_ldrex(s, a, MO_64, false);
7103 }
7104 
7105 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7106 {
7107     return op_ldrex(s, a, MO_64, false);
7108 }
7109 
7110 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7111 {
7112     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7113         return false;
7114     }
7115     return op_ldrex(s, a, MO_8, false);
7116 }
7117 
7118 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7119 {
7120     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7121         return false;
7122     }
7123     return op_ldrex(s, a, MO_16, false);
7124 }
7125 
7126 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7127 {
7128     if (!ENABLE_ARCH_8) {
7129         return false;
7130     }
7131     return op_ldrex(s, a, MO_32, true);
7132 }
7133 
7134 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7135 {
7136     if (!ENABLE_ARCH_8) {
7137         return false;
7138     }
7139     /* We UNDEF for these UNPREDICTABLE cases.  */
7140     if (a->rt & 1) {
7141         unallocated_encoding(s);
7142         return true;
7143     }
7144     a->rt2 = a->rt + 1;
7145     return op_ldrex(s, a, MO_64, true);
7146 }
7147 
7148 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7149 {
7150     if (!ENABLE_ARCH_8) {
7151         return false;
7152     }
7153     return op_ldrex(s, a, MO_64, true);
7154 }
7155 
7156 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7157 {
7158     if (!ENABLE_ARCH_8) {
7159         return false;
7160     }
7161     return op_ldrex(s, a, MO_8, true);
7162 }
7163 
7164 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7165 {
7166     if (!ENABLE_ARCH_8) {
7167         return false;
7168     }
7169     return op_ldrex(s, a, MO_16, true);
7170 }
7171 
7172 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7173 {
7174     TCGv_i32 addr, tmp;
7175 
7176     if (!ENABLE_ARCH_8) {
7177         return false;
7178     }
7179     /* We UNDEF for these UNPREDICTABLE cases.  */
7180     if (a->rn == 15 || a->rt == 15) {
7181         unallocated_encoding(s);
7182         return true;
7183     }
7184 
7185     addr = load_reg(s, a->rn);
7186     tmp = tcg_temp_new_i32();
7187     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7188     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7189 
7190     store_reg(s, a->rt, tmp);
7191     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7192     return true;
7193 }
7194 
7195 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7196 {
7197     return op_lda(s, a, MO_UL);
7198 }
7199 
7200 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7201 {
7202     return op_lda(s, a, MO_UB);
7203 }
7204 
7205 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7206 {
7207     return op_lda(s, a, MO_UW);
7208 }
7209 
7210 /*
7211  * Media instructions
7212  */
7213 
7214 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7215 {
7216     TCGv_i32 t1, t2;
7217 
7218     if (!ENABLE_ARCH_6) {
7219         return false;
7220     }
7221 
7222     t1 = load_reg(s, a->rn);
7223     t2 = load_reg(s, a->rm);
7224     gen_helper_usad8(t1, t1, t2);
7225     if (a->ra != 15) {
7226         t2 = load_reg(s, a->ra);
7227         tcg_gen_add_i32(t1, t1, t2);
7228     }
7229     store_reg(s, a->rd, t1);
7230     return true;
7231 }
7232 
7233 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7234 {
7235     TCGv_i32 tmp;
7236     int width = a->widthm1 + 1;
7237     int shift = a->lsb;
7238 
7239     if (!ENABLE_ARCH_6T2) {
7240         return false;
7241     }
7242     if (shift + width > 32) {
7243         /* UNPREDICTABLE; we choose to UNDEF */
7244         unallocated_encoding(s);
7245         return true;
7246     }
7247 
7248     tmp = load_reg(s, a->rn);
7249     if (u) {
7250         tcg_gen_extract_i32(tmp, tmp, shift, width);
7251     } else {
7252         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7253     }
7254     store_reg(s, a->rd, tmp);
7255     return true;
7256 }
7257 
7258 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7259 {
7260     return op_bfx(s, a, false);
7261 }
7262 
7263 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7264 {
7265     return op_bfx(s, a, true);
7266 }
7267 
7268 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7269 {
7270     int msb = a->msb, lsb = a->lsb;
7271     TCGv_i32 t_in, t_rd;
7272     int width;
7273 
7274     if (!ENABLE_ARCH_6T2) {
7275         return false;
7276     }
7277     if (msb < lsb) {
7278         /* UNPREDICTABLE; we choose to UNDEF */
7279         unallocated_encoding(s);
7280         return true;
7281     }
7282 
7283     width = msb + 1 - lsb;
7284     if (a->rn == 15) {
7285         /* BFC */
7286         t_in = tcg_constant_i32(0);
7287     } else {
7288         /* BFI */
7289         t_in = load_reg(s, a->rn);
7290     }
7291     t_rd = load_reg(s, a->rd);
7292     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7293     store_reg(s, a->rd, t_rd);
7294     return true;
7295 }
7296 
7297 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7298 {
7299     unallocated_encoding(s);
7300     return true;
7301 }
7302 
7303 /*
7304  * Parallel addition and subtraction
7305  */
7306 
7307 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7308                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7309 {
7310     TCGv_i32 t0, t1;
7311 
7312     if (s->thumb
7313         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7314         : !ENABLE_ARCH_6) {
7315         return false;
7316     }
7317 
7318     t0 = load_reg(s, a->rn);
7319     t1 = load_reg(s, a->rm);
7320 
7321     gen(t0, t0, t1);
7322 
7323     store_reg(s, a->rd, t0);
7324     return true;
7325 }
7326 
7327 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7328                              void (*gen)(TCGv_i32, TCGv_i32,
7329                                          TCGv_i32, TCGv_ptr))
7330 {
7331     TCGv_i32 t0, t1;
7332     TCGv_ptr ge;
7333 
7334     if (s->thumb
7335         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7336         : !ENABLE_ARCH_6) {
7337         return false;
7338     }
7339 
7340     t0 = load_reg(s, a->rn);
7341     t1 = load_reg(s, a->rm);
7342 
7343     ge = tcg_temp_new_ptr();
7344     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7345     gen(t0, t0, t1, ge);
7346 
7347     store_reg(s, a->rd, t0);
7348     return true;
7349 }
7350 
7351 #define DO_PAR_ADDSUB(NAME, helper) \
7352 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7353 {                                                       \
7354     return op_par_addsub(s, a, helper);                 \
7355 }
7356 
7357 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7358 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7359 {                                                       \
7360     return op_par_addsub_ge(s, a, helper);              \
7361 }
7362 
7363 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7364 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7365 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7366 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7367 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7368 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7369 
7370 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7371 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7372 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7373 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7374 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7375 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7376 
7377 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7378 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7379 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7380 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7381 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7382 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7383 
7384 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7385 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7386 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7387 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7388 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7389 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7390 
7391 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7392 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7393 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7394 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7395 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7396 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7397 
7398 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7399 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7400 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7401 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7402 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7403 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7404 
7405 #undef DO_PAR_ADDSUB
7406 #undef DO_PAR_ADDSUB_GE
7407 
7408 /*
7409  * Packing, unpacking, saturation, and reversal
7410  */
7411 
7412 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7413 {
7414     TCGv_i32 tn, tm;
7415     int shift = a->imm;
7416 
7417     if (s->thumb
7418         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7419         : !ENABLE_ARCH_6) {
7420         return false;
7421     }
7422 
7423     tn = load_reg(s, a->rn);
7424     tm = load_reg(s, a->rm);
7425     if (a->tb) {
7426         /* PKHTB */
7427         if (shift == 0) {
7428             shift = 31;
7429         }
7430         tcg_gen_sari_i32(tm, tm, shift);
7431         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7432     } else {
7433         /* PKHBT */
7434         tcg_gen_shli_i32(tm, tm, shift);
7435         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7436     }
7437     store_reg(s, a->rd, tn);
7438     return true;
7439 }
7440 
7441 static bool op_sat(DisasContext *s, arg_sat *a,
7442                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7443 {
7444     TCGv_i32 tmp;
7445     int shift = a->imm;
7446 
7447     if (!ENABLE_ARCH_6) {
7448         return false;
7449     }
7450 
7451     tmp = load_reg(s, a->rn);
7452     if (a->sh) {
7453         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7454     } else {
7455         tcg_gen_shli_i32(tmp, tmp, shift);
7456     }
7457 
7458     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7459 
7460     store_reg(s, a->rd, tmp);
7461     return true;
7462 }
7463 
7464 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7465 {
7466     return op_sat(s, a, gen_helper_ssat);
7467 }
7468 
7469 static bool trans_USAT(DisasContext *s, arg_sat *a)
7470 {
7471     return op_sat(s, a, gen_helper_usat);
7472 }
7473 
7474 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7475 {
7476     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7477         return false;
7478     }
7479     return op_sat(s, a, gen_helper_ssat16);
7480 }
7481 
7482 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7483 {
7484     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7485         return false;
7486     }
7487     return op_sat(s, a, gen_helper_usat16);
7488 }
7489 
7490 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7491                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7492                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7493 {
7494     TCGv_i32 tmp;
7495 
7496     if (!ENABLE_ARCH_6) {
7497         return false;
7498     }
7499 
7500     tmp = load_reg(s, a->rm);
7501     /*
7502      * TODO: In many cases we could do a shift instead of a rotate.
7503      * Combined with a simple extend, that becomes an extract.
7504      */
7505     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7506     gen_extract(tmp, tmp);
7507 
7508     if (a->rn != 15) {
7509         TCGv_i32 tmp2 = load_reg(s, a->rn);
7510         gen_add(tmp, tmp, tmp2);
7511     }
7512     store_reg(s, a->rd, tmp);
7513     return true;
7514 }
7515 
7516 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7517 {
7518     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7519 }
7520 
7521 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7522 {
7523     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7524 }
7525 
7526 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7527 {
7528     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7529         return false;
7530     }
7531     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7532 }
7533 
7534 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7535 {
7536     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7537 }
7538 
7539 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7540 {
7541     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7542 }
7543 
7544 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7545 {
7546     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7547         return false;
7548     }
7549     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7550 }
7551 
7552 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7553 {
7554     TCGv_i32 t1, t2, t3;
7555 
7556     if (s->thumb
7557         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7558         : !ENABLE_ARCH_6) {
7559         return false;
7560     }
7561 
7562     t1 = load_reg(s, a->rn);
7563     t2 = load_reg(s, a->rm);
7564     t3 = tcg_temp_new_i32();
7565     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7566     gen_helper_sel_flags(t1, t3, t1, t2);
7567     store_reg(s, a->rd, t1);
7568     return true;
7569 }
7570 
7571 static bool op_rr(DisasContext *s, arg_rr *a,
7572                   void (*gen)(TCGv_i32, TCGv_i32))
7573 {
7574     TCGv_i32 tmp;
7575 
7576     tmp = load_reg(s, a->rm);
7577     gen(tmp, tmp);
7578     store_reg(s, a->rd, tmp);
7579     return true;
7580 }
7581 
7582 static bool trans_REV(DisasContext *s, arg_rr *a)
7583 {
7584     if (!ENABLE_ARCH_6) {
7585         return false;
7586     }
7587     return op_rr(s, a, tcg_gen_bswap32_i32);
7588 }
7589 
7590 static bool trans_REV16(DisasContext *s, arg_rr *a)
7591 {
7592     if (!ENABLE_ARCH_6) {
7593         return false;
7594     }
7595     return op_rr(s, a, gen_rev16);
7596 }
7597 
7598 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7599 {
7600     if (!ENABLE_ARCH_6) {
7601         return false;
7602     }
7603     return op_rr(s, a, gen_revsh);
7604 }
7605 
7606 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7607 {
7608     if (!ENABLE_ARCH_6T2) {
7609         return false;
7610     }
7611     return op_rr(s, a, gen_helper_rbit);
7612 }
7613 
7614 /*
7615  * Signed multiply, signed and unsigned divide
7616  */
7617 
7618 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7619 {
7620     TCGv_i32 t1, t2;
7621 
7622     if (!ENABLE_ARCH_6) {
7623         return false;
7624     }
7625 
7626     t1 = load_reg(s, a->rn);
7627     t2 = load_reg(s, a->rm);
7628     if (m_swap) {
7629         gen_swap_half(t2, t2);
7630     }
7631     gen_smul_dual(t1, t2);
7632 
7633     if (sub) {
7634         /*
7635          * This subtraction cannot overflow, so we can do a simple
7636          * 32-bit subtraction and then a possible 32-bit saturating
7637          * addition of Ra.
7638          */
7639         tcg_gen_sub_i32(t1, t1, t2);
7640 
7641         if (a->ra != 15) {
7642             t2 = load_reg(s, a->ra);
7643             gen_helper_add_setq(t1, cpu_env, t1, t2);
7644         }
7645     } else if (a->ra == 15) {
7646         /* Single saturation-checking addition */
7647         gen_helper_add_setq(t1, cpu_env, t1, t2);
7648     } else {
7649         /*
7650          * We need to add the products and Ra together and then
7651          * determine whether the final result overflowed. Doing
7652          * this as two separate add-and-check-overflow steps incorrectly
7653          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7654          * Do all the arithmetic at 64-bits and then check for overflow.
7655          */
7656         TCGv_i64 p64, q64;
7657         TCGv_i32 t3, qf, one;
7658 
7659         p64 = tcg_temp_new_i64();
7660         q64 = tcg_temp_new_i64();
7661         tcg_gen_ext_i32_i64(p64, t1);
7662         tcg_gen_ext_i32_i64(q64, t2);
7663         tcg_gen_add_i64(p64, p64, q64);
7664         load_reg_var(s, t2, a->ra);
7665         tcg_gen_ext_i32_i64(q64, t2);
7666         tcg_gen_add_i64(p64, p64, q64);
7667 
7668         tcg_gen_extr_i64_i32(t1, t2, p64);
7669         /*
7670          * t1 is the low half of the result which goes into Rd.
7671          * We have overflow and must set Q if the high half (t2)
7672          * is different from the sign-extension of t1.
7673          */
7674         t3 = tcg_temp_new_i32();
7675         tcg_gen_sari_i32(t3, t1, 31);
7676         qf = load_cpu_field(QF);
7677         one = tcg_constant_i32(1);
7678         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7679         store_cpu_field(qf, QF);
7680     }
7681     store_reg(s, a->rd, t1);
7682     return true;
7683 }
7684 
7685 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7686 {
7687     return op_smlad(s, a, false, false);
7688 }
7689 
7690 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7691 {
7692     return op_smlad(s, a, true, false);
7693 }
7694 
7695 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7696 {
7697     return op_smlad(s, a, false, true);
7698 }
7699 
7700 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7701 {
7702     return op_smlad(s, a, true, true);
7703 }
7704 
7705 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7706 {
7707     TCGv_i32 t1, t2;
7708     TCGv_i64 l1, l2;
7709 
7710     if (!ENABLE_ARCH_6) {
7711         return false;
7712     }
7713 
7714     t1 = load_reg(s, a->rn);
7715     t2 = load_reg(s, a->rm);
7716     if (m_swap) {
7717         gen_swap_half(t2, t2);
7718     }
7719     gen_smul_dual(t1, t2);
7720 
7721     l1 = tcg_temp_new_i64();
7722     l2 = tcg_temp_new_i64();
7723     tcg_gen_ext_i32_i64(l1, t1);
7724     tcg_gen_ext_i32_i64(l2, t2);
7725 
7726     if (sub) {
7727         tcg_gen_sub_i64(l1, l1, l2);
7728     } else {
7729         tcg_gen_add_i64(l1, l1, l2);
7730     }
7731 
7732     gen_addq(s, l1, a->ra, a->rd);
7733     gen_storeq_reg(s, a->ra, a->rd, l1);
7734     return true;
7735 }
7736 
7737 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7738 {
7739     return op_smlald(s, a, false, false);
7740 }
7741 
7742 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7743 {
7744     return op_smlald(s, a, true, false);
7745 }
7746 
7747 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7748 {
7749     return op_smlald(s, a, false, true);
7750 }
7751 
7752 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7753 {
7754     return op_smlald(s, a, true, true);
7755 }
7756 
7757 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7758 {
7759     TCGv_i32 t1, t2;
7760 
7761     if (s->thumb
7762         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7763         : !ENABLE_ARCH_6) {
7764         return false;
7765     }
7766 
7767     t1 = load_reg(s, a->rn);
7768     t2 = load_reg(s, a->rm);
7769     tcg_gen_muls2_i32(t2, t1, t1, t2);
7770 
7771     if (a->ra != 15) {
7772         TCGv_i32 t3 = load_reg(s, a->ra);
7773         if (sub) {
7774             /*
7775              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7776              * a non-zero multiplicand lowpart, and the correct result
7777              * lowpart for rounding.
7778              */
7779             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7780         } else {
7781             tcg_gen_add_i32(t1, t1, t3);
7782         }
7783     }
7784     if (round) {
7785         /*
7786          * Adding 0x80000000 to the 64-bit quantity means that we have
7787          * carry in to the high word when the low word has the msb set.
7788          */
7789         tcg_gen_shri_i32(t2, t2, 31);
7790         tcg_gen_add_i32(t1, t1, t2);
7791     }
7792     store_reg(s, a->rd, t1);
7793     return true;
7794 }
7795 
7796 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7797 {
7798     return op_smmla(s, a, false, false);
7799 }
7800 
7801 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7802 {
7803     return op_smmla(s, a, true, false);
7804 }
7805 
7806 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7807 {
7808     return op_smmla(s, a, false, true);
7809 }
7810 
7811 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7812 {
7813     return op_smmla(s, a, true, true);
7814 }
7815 
7816 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7817 {
7818     TCGv_i32 t1, t2;
7819 
7820     if (s->thumb
7821         ? !dc_isar_feature(aa32_thumb_div, s)
7822         : !dc_isar_feature(aa32_arm_div, s)) {
7823         return false;
7824     }
7825 
7826     t1 = load_reg(s, a->rn);
7827     t2 = load_reg(s, a->rm);
7828     if (u) {
7829         gen_helper_udiv(t1, cpu_env, t1, t2);
7830     } else {
7831         gen_helper_sdiv(t1, cpu_env, t1, t2);
7832     }
7833     store_reg(s, a->rd, t1);
7834     return true;
7835 }
7836 
7837 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7838 {
7839     return op_div(s, a, false);
7840 }
7841 
7842 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7843 {
7844     return op_div(s, a, true);
7845 }
7846 
7847 /*
7848  * Block data transfer
7849  */
7850 
7851 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7852 {
7853     TCGv_i32 addr = load_reg(s, a->rn);
7854 
7855     if (a->b) {
7856         if (a->i) {
7857             /* pre increment */
7858             tcg_gen_addi_i32(addr, addr, 4);
7859         } else {
7860             /* pre decrement */
7861             tcg_gen_addi_i32(addr, addr, -(n * 4));
7862         }
7863     } else if (!a->i && n != 1) {
7864         /* post decrement */
7865         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7866     }
7867 
7868     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7869         /*
7870          * If the writeback is incrementing SP rather than
7871          * decrementing it, and the initial SP is below the
7872          * stack limit but the final written-back SP would
7873          * be above, then we must not perform any memory
7874          * accesses, but it is IMPDEF whether we generate
7875          * an exception. We choose to do so in this case.
7876          * At this point 'addr' is the lowest address, so
7877          * either the original SP (if incrementing) or our
7878          * final SP (if decrementing), so that's what we check.
7879          */
7880         gen_helper_v8m_stackcheck(cpu_env, addr);
7881     }
7882 
7883     return addr;
7884 }
7885 
7886 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7887                                TCGv_i32 addr, int n)
7888 {
7889     if (a->w) {
7890         /* write back */
7891         if (!a->b) {
7892             if (a->i) {
7893                 /* post increment */
7894                 tcg_gen_addi_i32(addr, addr, 4);
7895             } else {
7896                 /* post decrement */
7897                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7898             }
7899         } else if (!a->i && n != 1) {
7900             /* pre decrement */
7901             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7902         }
7903         store_reg(s, a->rn, addr);
7904     }
7905 }
7906 
7907 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7908 {
7909     int i, j, n, list, mem_idx;
7910     bool user = a->u;
7911     TCGv_i32 addr, tmp;
7912 
7913     if (user) {
7914         /* STM (user) */
7915         if (IS_USER(s)) {
7916             /* Only usable in supervisor mode.  */
7917             unallocated_encoding(s);
7918             return true;
7919         }
7920     }
7921 
7922     list = a->list;
7923     n = ctpop16(list);
7924     if (n < min_n || a->rn == 15) {
7925         unallocated_encoding(s);
7926         return true;
7927     }
7928 
7929     s->eci_handled = true;
7930 
7931     addr = op_addr_block_pre(s, a, n);
7932     mem_idx = get_mem_index(s);
7933 
7934     for (i = j = 0; i < 16; i++) {
7935         if (!(list & (1 << i))) {
7936             continue;
7937         }
7938 
7939         if (user && i != 15) {
7940             tmp = tcg_temp_new_i32();
7941             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7942         } else {
7943             tmp = load_reg(s, i);
7944         }
7945         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7946 
7947         /* No need to add after the last transfer.  */
7948         if (++j != n) {
7949             tcg_gen_addi_i32(addr, addr, 4);
7950         }
7951     }
7952 
7953     op_addr_block_post(s, a, addr, n);
7954     clear_eci_state(s);
7955     return true;
7956 }
7957 
7958 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7959 {
7960     /* BitCount(list) < 1 is UNPREDICTABLE */
7961     return op_stm(s, a, 1);
7962 }
7963 
7964 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7965 {
7966     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7967     if (a->w && (a->list & (1 << a->rn))) {
7968         unallocated_encoding(s);
7969         return true;
7970     }
7971     /* BitCount(list) < 2 is UNPREDICTABLE */
7972     return op_stm(s, a, 2);
7973 }
7974 
7975 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7976 {
7977     int i, j, n, list, mem_idx;
7978     bool loaded_base;
7979     bool user = a->u;
7980     bool exc_return = false;
7981     TCGv_i32 addr, tmp, loaded_var;
7982 
7983     if (user) {
7984         /* LDM (user), LDM (exception return) */
7985         if (IS_USER(s)) {
7986             /* Only usable in supervisor mode.  */
7987             unallocated_encoding(s);
7988             return true;
7989         }
7990         if (extract32(a->list, 15, 1)) {
7991             exc_return = true;
7992             user = false;
7993         } else {
7994             /* LDM (user) does not allow writeback.  */
7995             if (a->w) {
7996                 unallocated_encoding(s);
7997                 return true;
7998             }
7999         }
8000     }
8001 
8002     list = a->list;
8003     n = ctpop16(list);
8004     if (n < min_n || a->rn == 15) {
8005         unallocated_encoding(s);
8006         return true;
8007     }
8008 
8009     s->eci_handled = true;
8010 
8011     addr = op_addr_block_pre(s, a, n);
8012     mem_idx = get_mem_index(s);
8013     loaded_base = false;
8014     loaded_var = NULL;
8015 
8016     for (i = j = 0; i < 16; i++) {
8017         if (!(list & (1 << i))) {
8018             continue;
8019         }
8020 
8021         tmp = tcg_temp_new_i32();
8022         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8023         if (user) {
8024             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8025         } else if (i == a->rn) {
8026             loaded_var = tmp;
8027             loaded_base = true;
8028         } else if (i == 15 && exc_return) {
8029             store_pc_exc_ret(s, tmp);
8030         } else {
8031             store_reg_from_load(s, i, tmp);
8032         }
8033 
8034         /* No need to add after the last transfer.  */
8035         if (++j != n) {
8036             tcg_gen_addi_i32(addr, addr, 4);
8037         }
8038     }
8039 
8040     op_addr_block_post(s, a, addr, n);
8041 
8042     if (loaded_base) {
8043         /* Note that we reject base == pc above.  */
8044         store_reg(s, a->rn, loaded_var);
8045     }
8046 
8047     if (exc_return) {
8048         /* Restore CPSR from SPSR.  */
8049         tmp = load_cpu_field(spsr);
8050         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8051             gen_io_start();
8052         }
8053         gen_helper_cpsr_write_eret(cpu_env, tmp);
8054         /* Must exit loop to check un-masked IRQs */
8055         s->base.is_jmp = DISAS_EXIT;
8056     }
8057     clear_eci_state(s);
8058     return true;
8059 }
8060 
8061 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8062 {
8063     /*
8064      * Writeback register in register list is UNPREDICTABLE
8065      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8066      * an UNKNOWN value to the base register.
8067      */
8068     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8069         unallocated_encoding(s);
8070         return true;
8071     }
8072     /* BitCount(list) < 1 is UNPREDICTABLE */
8073     return do_ldm(s, a, 1);
8074 }
8075 
8076 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8077 {
8078     /* Writeback register in register list is UNPREDICTABLE for T32. */
8079     if (a->w && (a->list & (1 << a->rn))) {
8080         unallocated_encoding(s);
8081         return true;
8082     }
8083     /* BitCount(list) < 2 is UNPREDICTABLE */
8084     return do_ldm(s, a, 2);
8085 }
8086 
8087 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8088 {
8089     /* Writeback is conditional on the base register not being loaded.  */
8090     a->w = !(a->list & (1 << a->rn));
8091     /* BitCount(list) < 1 is UNPREDICTABLE */
8092     return do_ldm(s, a, 1);
8093 }
8094 
8095 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8096 {
8097     int i;
8098     TCGv_i32 zero;
8099 
8100     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8101         return false;
8102     }
8103 
8104     if (extract32(a->list, 13, 1)) {
8105         return false;
8106     }
8107 
8108     if (!a->list) {
8109         /* UNPREDICTABLE; we choose to UNDEF */
8110         return false;
8111     }
8112 
8113     s->eci_handled = true;
8114 
8115     zero = tcg_constant_i32(0);
8116     for (i = 0; i < 15; i++) {
8117         if (extract32(a->list, i, 1)) {
8118             /* Clear R[i] */
8119             tcg_gen_mov_i32(cpu_R[i], zero);
8120         }
8121     }
8122     if (extract32(a->list, 15, 1)) {
8123         /*
8124          * Clear APSR (by calling the MSR helper with the same argument
8125          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8126          */
8127         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8128     }
8129     clear_eci_state(s);
8130     return true;
8131 }
8132 
8133 /*
8134  * Branch, branch with link
8135  */
8136 
8137 static bool trans_B(DisasContext *s, arg_i *a)
8138 {
8139     gen_jmp(s, jmp_diff(s, a->imm));
8140     return true;
8141 }
8142 
8143 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8144 {
8145     /* This has cond from encoding, required to be outside IT block.  */
8146     if (a->cond >= 0xe) {
8147         return false;
8148     }
8149     if (s->condexec_mask) {
8150         unallocated_encoding(s);
8151         return true;
8152     }
8153     arm_skip_unless(s, a->cond);
8154     gen_jmp(s, jmp_diff(s, a->imm));
8155     return true;
8156 }
8157 
8158 static bool trans_BL(DisasContext *s, arg_i *a)
8159 {
8160     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8161     gen_jmp(s, jmp_diff(s, a->imm));
8162     return true;
8163 }
8164 
8165 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8166 {
8167     /*
8168      * BLX <imm> would be useless on M-profile; the encoding space
8169      * is used for other insns from v8.1M onward, and UNDEFs before that.
8170      */
8171     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8172         return false;
8173     }
8174 
8175     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8176     if (s->thumb && (a->imm & 2)) {
8177         return false;
8178     }
8179     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8180     store_cpu_field_constant(!s->thumb, thumb);
8181     /* This jump is computed from an aligned PC: subtract off the low bits. */
8182     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8183     return true;
8184 }
8185 
8186 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8187 {
8188     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8189     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8190     return true;
8191 }
8192 
8193 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8194 {
8195     TCGv_i32 tmp = tcg_temp_new_i32();
8196 
8197     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8198     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8199     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8200     gen_bx(s, tmp);
8201     return true;
8202 }
8203 
8204 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8205 {
8206     TCGv_i32 tmp;
8207 
8208     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8209     if (!ENABLE_ARCH_5) {
8210         return false;
8211     }
8212     tmp = tcg_temp_new_i32();
8213     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8214     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8215     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8216     gen_bx(s, tmp);
8217     return true;
8218 }
8219 
8220 static bool trans_BF(DisasContext *s, arg_BF *a)
8221 {
8222     /*
8223      * M-profile branch future insns. The architecture permits an
8224      * implementation to implement these as NOPs (equivalent to
8225      * discarding the LO_BRANCH_INFO cache immediately), and we
8226      * take that IMPDEF option because for QEMU a "real" implementation
8227      * would be complicated and wouldn't execute any faster.
8228      */
8229     if (!dc_isar_feature(aa32_lob, s)) {
8230         return false;
8231     }
8232     if (a->boff == 0) {
8233         /* SEE "Related encodings" (loop insns) */
8234         return false;
8235     }
8236     /* Handle as NOP */
8237     return true;
8238 }
8239 
8240 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8241 {
8242     /* M-profile low-overhead loop start */
8243     TCGv_i32 tmp;
8244 
8245     if (!dc_isar_feature(aa32_lob, s)) {
8246         return false;
8247     }
8248     if (a->rn == 13 || a->rn == 15) {
8249         /*
8250          * For DLSTP rn == 15 is a related encoding (LCTP); the
8251          * other cases caught by this condition are all
8252          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8253          */
8254         return false;
8255     }
8256 
8257     if (a->size != 4) {
8258         /* DLSTP */
8259         if (!dc_isar_feature(aa32_mve, s)) {
8260             return false;
8261         }
8262         if (!vfp_access_check(s)) {
8263             return true;
8264         }
8265     }
8266 
8267     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8268     tmp = load_reg(s, a->rn);
8269     store_reg(s, 14, tmp);
8270     if (a->size != 4) {
8271         /* DLSTP: set FPSCR.LTPSIZE */
8272         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8273         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8274     }
8275     return true;
8276 }
8277 
8278 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8279 {
8280     /* M-profile low-overhead while-loop start */
8281     TCGv_i32 tmp;
8282     DisasLabel nextlabel;
8283 
8284     if (!dc_isar_feature(aa32_lob, s)) {
8285         return false;
8286     }
8287     if (a->rn == 13 || a->rn == 15) {
8288         /*
8289          * For WLSTP rn == 15 is a related encoding (LE); the
8290          * other cases caught by this condition are all
8291          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8292          */
8293         return false;
8294     }
8295     if (s->condexec_mask) {
8296         /*
8297          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8298          * we choose to UNDEF, because otherwise our use of
8299          * gen_goto_tb(1) would clash with the use of TB exit 1
8300          * in the dc->condjmp condition-failed codepath in
8301          * arm_tr_tb_stop() and we'd get an assertion.
8302          */
8303         return false;
8304     }
8305     if (a->size != 4) {
8306         /* WLSTP */
8307         if (!dc_isar_feature(aa32_mve, s)) {
8308             return false;
8309         }
8310         /*
8311          * We need to check that the FPU is enabled here, but mustn't
8312          * call vfp_access_check() to do that because we don't want to
8313          * do the lazy state preservation in the "loop count is zero" case.
8314          * Do the check-and-raise-exception by hand.
8315          */
8316         if (s->fp_excp_el) {
8317             gen_exception_insn_el(s, 0, EXCP_NOCP,
8318                                   syn_uncategorized(), s->fp_excp_el);
8319             return true;
8320         }
8321     }
8322 
8323     nextlabel = gen_disas_label(s);
8324     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8325     tmp = load_reg(s, a->rn);
8326     store_reg(s, 14, tmp);
8327     if (a->size != 4) {
8328         /*
8329          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8330          * lazy state preservation, new FP context creation, etc,
8331          * that vfp_access_check() does. We know that the actual
8332          * access check will succeed (ie it won't generate code that
8333          * throws an exception) because we did that check by hand earlier.
8334          */
8335         bool ok = vfp_access_check(s);
8336         assert(ok);
8337         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8338         /*
8339          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8340          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8341          */
8342     }
8343     gen_jmp_tb(s, curr_insn_len(s), 1);
8344 
8345     set_disas_label(s, nextlabel);
8346     gen_jmp(s, jmp_diff(s, a->imm));
8347     return true;
8348 }
8349 
8350 static bool trans_LE(DisasContext *s, arg_LE *a)
8351 {
8352     /*
8353      * M-profile low-overhead loop end. The architecture permits an
8354      * implementation to discard the LO_BRANCH_INFO cache at any time,
8355      * and we take the IMPDEF option to never set it in the first place
8356      * (equivalent to always discarding it immediately), because for QEMU
8357      * a "real" implementation would be complicated and wouldn't execute
8358      * any faster.
8359      */
8360     TCGv_i32 tmp;
8361     DisasLabel loopend;
8362     bool fpu_active;
8363 
8364     if (!dc_isar_feature(aa32_lob, s)) {
8365         return false;
8366     }
8367     if (a->f && a->tp) {
8368         return false;
8369     }
8370     if (s->condexec_mask) {
8371         /*
8372          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8373          * we choose to UNDEF, because otherwise our use of
8374          * gen_goto_tb(1) would clash with the use of TB exit 1
8375          * in the dc->condjmp condition-failed codepath in
8376          * arm_tr_tb_stop() and we'd get an assertion.
8377          */
8378         return false;
8379     }
8380     if (a->tp) {
8381         /* LETP */
8382         if (!dc_isar_feature(aa32_mve, s)) {
8383             return false;
8384         }
8385         if (!vfp_access_check(s)) {
8386             s->eci_handled = true;
8387             return true;
8388         }
8389     }
8390 
8391     /* LE/LETP is OK with ECI set and leaves it untouched */
8392     s->eci_handled = true;
8393 
8394     /*
8395      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8396      * UsageFault exception for the LE insn in that case. Note that we
8397      * are not directly checking FPSCR.LTPSIZE but instead check the
8398      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8399      * not currently active (ie ActiveFPState() returns false). We
8400      * can identify not-active purely from our TB state flags, as the
8401      * FPU is active only if:
8402      *  the FPU is enabled
8403      *  AND lazy state preservation is not active
8404      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8405      *
8406      * Usually we don't need to care about this distinction between
8407      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8408      * will either take an exception or clear the conditions that make
8409      * the FPU not active. But LE is an unusual case of a non-FP insn
8410      * that looks at LTPSIZE.
8411      */
8412     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8413 
8414     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8415         /* Need to do a runtime check for LTPSIZE != 4 */
8416         DisasLabel skipexc = gen_disas_label(s);
8417         tmp = load_cpu_field(v7m.ltpsize);
8418         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8419         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8420         set_disas_label(s, skipexc);
8421     }
8422 
8423     if (a->f) {
8424         /* Loop-forever: just jump back to the loop start */
8425         gen_jmp(s, jmp_diff(s, -a->imm));
8426         return true;
8427     }
8428 
8429     /*
8430      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8431      * For LE, we know at this point that LTPSIZE must be 4 and the
8432      * loop decrement value is 1. For LETP we need to calculate the decrement
8433      * value from LTPSIZE.
8434      */
8435     loopend = gen_disas_label(s);
8436     if (!a->tp) {
8437         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8438         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8439     } else {
8440         /*
8441          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8442          * so that decr stays live after the brcondi.
8443          */
8444         TCGv_i32 decr = tcg_temp_new_i32();
8445         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8446         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8447         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8448 
8449         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8450 
8451         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8452     }
8453     /* Jump back to the loop start */
8454     gen_jmp(s, jmp_diff(s, -a->imm));
8455 
8456     set_disas_label(s, loopend);
8457     if (a->tp) {
8458         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8459         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8460     }
8461     /* End TB, continuing to following insn */
8462     gen_jmp_tb(s, curr_insn_len(s), 1);
8463     return true;
8464 }
8465 
8466 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8467 {
8468     /*
8469      * M-profile Loop Clear with Tail Predication. Since our implementation
8470      * doesn't cache branch information, all we need to do is reset
8471      * FPSCR.LTPSIZE to 4.
8472      */
8473 
8474     if (!dc_isar_feature(aa32_lob, s) ||
8475         !dc_isar_feature(aa32_mve, s)) {
8476         return false;
8477     }
8478 
8479     if (!vfp_access_check(s)) {
8480         return true;
8481     }
8482 
8483     store_cpu_field_constant(4, v7m.ltpsize);
8484     return true;
8485 }
8486 
8487 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8488 {
8489     /*
8490      * M-profile Create Vector Tail Predicate. This insn is itself
8491      * predicated and is subject to beatwise execution.
8492      */
8493     TCGv_i32 rn_shifted, masklen;
8494 
8495     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8496         return false;
8497     }
8498 
8499     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8500         return true;
8501     }
8502 
8503     /*
8504      * We pre-calculate the mask length here to avoid having
8505      * to have multiple helpers specialized for size.
8506      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8507      */
8508     rn_shifted = tcg_temp_new_i32();
8509     masklen = load_reg(s, a->rn);
8510     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8511     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8512                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8513                         rn_shifted, tcg_constant_i32(16));
8514     gen_helper_mve_vctp(cpu_env, masklen);
8515     /* This insn updates predication bits */
8516     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8517     mve_update_eci(s);
8518     return true;
8519 }
8520 
8521 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8522 {
8523     TCGv_i32 addr, tmp;
8524 
8525     tmp = load_reg(s, a->rm);
8526     if (half) {
8527         tcg_gen_add_i32(tmp, tmp, tmp);
8528     }
8529     addr = load_reg(s, a->rn);
8530     tcg_gen_add_i32(addr, addr, tmp);
8531 
8532     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8533 
8534     tcg_gen_add_i32(tmp, tmp, tmp);
8535     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8536     tcg_gen_add_i32(tmp, tmp, addr);
8537     store_reg(s, 15, tmp);
8538     return true;
8539 }
8540 
8541 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8542 {
8543     return op_tbranch(s, a, false);
8544 }
8545 
8546 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8547 {
8548     return op_tbranch(s, a, true);
8549 }
8550 
8551 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8552 {
8553     TCGv_i32 tmp = load_reg(s, a->rn);
8554 
8555     arm_gen_condlabel(s);
8556     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8557                         tmp, 0, s->condlabel.label);
8558     gen_jmp(s, jmp_diff(s, a->imm));
8559     return true;
8560 }
8561 
8562 /*
8563  * Supervisor call - both T32 & A32 come here so we need to check
8564  * which mode we are in when checking for semihosting.
8565  */
8566 
8567 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8568 {
8569     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8570 
8571     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8572         semihosting_enabled(s->current_el == 0) &&
8573         (a->imm == semihost_imm)) {
8574         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8575     } else {
8576         if (s->fgt_svc) {
8577             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8578             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8579         } else {
8580             gen_update_pc(s, curr_insn_len(s));
8581             s->svc_imm = a->imm;
8582             s->base.is_jmp = DISAS_SWI;
8583         }
8584     }
8585     return true;
8586 }
8587 
8588 /*
8589  * Unconditional system instructions
8590  */
8591 
8592 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8593 {
8594     static const int8_t pre_offset[4] = {
8595         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8596     };
8597     static const int8_t post_offset[4] = {
8598         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8599     };
8600     TCGv_i32 addr, t1, t2;
8601 
8602     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8603         return false;
8604     }
8605     if (IS_USER(s)) {
8606         unallocated_encoding(s);
8607         return true;
8608     }
8609 
8610     addr = load_reg(s, a->rn);
8611     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8612 
8613     /* Load PC into tmp and CPSR into tmp2.  */
8614     t1 = tcg_temp_new_i32();
8615     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8616     tcg_gen_addi_i32(addr, addr, 4);
8617     t2 = tcg_temp_new_i32();
8618     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8619 
8620     if (a->w) {
8621         /* Base writeback.  */
8622         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8623         store_reg(s, a->rn, addr);
8624     }
8625     gen_rfe(s, t1, t2);
8626     return true;
8627 }
8628 
8629 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8630 {
8631     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8632         return false;
8633     }
8634     gen_srs(s, a->mode, a->pu, a->w);
8635     return true;
8636 }
8637 
8638 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8639 {
8640     uint32_t mask, val;
8641 
8642     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8643         return false;
8644     }
8645     if (IS_USER(s)) {
8646         /* Implemented as NOP in user mode.  */
8647         return true;
8648     }
8649     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8650 
8651     mask = val = 0;
8652     if (a->imod & 2) {
8653         if (a->A) {
8654             mask |= CPSR_A;
8655         }
8656         if (a->I) {
8657             mask |= CPSR_I;
8658         }
8659         if (a->F) {
8660             mask |= CPSR_F;
8661         }
8662         if (a->imod & 1) {
8663             val |= mask;
8664         }
8665     }
8666     if (a->M) {
8667         mask |= CPSR_M;
8668         val |= a->mode;
8669     }
8670     if (mask) {
8671         gen_set_psr_im(s, mask, 0, val);
8672     }
8673     return true;
8674 }
8675 
8676 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8677 {
8678     TCGv_i32 tmp, addr;
8679 
8680     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8681         return false;
8682     }
8683     if (IS_USER(s)) {
8684         /* Implemented as NOP in user mode.  */
8685         return true;
8686     }
8687 
8688     tmp = tcg_constant_i32(a->im);
8689     /* FAULTMASK */
8690     if (a->F) {
8691         addr = tcg_constant_i32(19);
8692         gen_helper_v7m_msr(cpu_env, addr, tmp);
8693     }
8694     /* PRIMASK */
8695     if (a->I) {
8696         addr = tcg_constant_i32(16);
8697         gen_helper_v7m_msr(cpu_env, addr, tmp);
8698     }
8699     gen_rebuild_hflags(s, false);
8700     gen_lookup_tb(s);
8701     return true;
8702 }
8703 
8704 /*
8705  * Clear-Exclusive, Barriers
8706  */
8707 
8708 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8709 {
8710     if (s->thumb
8711         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8712         : !ENABLE_ARCH_6K) {
8713         return false;
8714     }
8715     gen_clrex(s);
8716     return true;
8717 }
8718 
8719 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8720 {
8721     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8722         return false;
8723     }
8724     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8725     return true;
8726 }
8727 
8728 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8729 {
8730     return trans_DSB(s, NULL);
8731 }
8732 
8733 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8734 {
8735     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8736         return false;
8737     }
8738     /*
8739      * We need to break the TB after this insn to execute
8740      * self-modifying code correctly and also to take
8741      * any pending interrupts immediately.
8742      */
8743     s->base.is_jmp = DISAS_TOO_MANY;
8744     return true;
8745 }
8746 
8747 static bool trans_SB(DisasContext *s, arg_SB *a)
8748 {
8749     if (!dc_isar_feature(aa32_sb, s)) {
8750         return false;
8751     }
8752     /*
8753      * TODO: There is no speculation barrier opcode
8754      * for TCG; MB and end the TB instead.
8755      */
8756     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8757     s->base.is_jmp = DISAS_TOO_MANY;
8758     return true;
8759 }
8760 
8761 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8762 {
8763     if (!ENABLE_ARCH_6) {
8764         return false;
8765     }
8766     if (a->E != (s->be_data == MO_BE)) {
8767         gen_helper_setend(cpu_env);
8768         s->base.is_jmp = DISAS_UPDATE_EXIT;
8769     }
8770     return true;
8771 }
8772 
8773 /*
8774  * Preload instructions
8775  * All are nops, contingent on the appropriate arch level.
8776  */
8777 
8778 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8779 {
8780     return ENABLE_ARCH_5TE;
8781 }
8782 
8783 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8784 {
8785     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8786 }
8787 
8788 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8789 {
8790     return ENABLE_ARCH_7;
8791 }
8792 
8793 /*
8794  * If-then
8795  */
8796 
8797 static bool trans_IT(DisasContext *s, arg_IT *a)
8798 {
8799     int cond_mask = a->cond_mask;
8800 
8801     /*
8802      * No actual code generated for this insn, just setup state.
8803      *
8804      * Combinations of firstcond and mask which set up an 0b1111
8805      * condition are UNPREDICTABLE; we take the CONSTRAINED
8806      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8807      * i.e. both meaning "execute always".
8808      */
8809     s->condexec_cond = (cond_mask >> 4) & 0xe;
8810     s->condexec_mask = cond_mask & 0x1f;
8811     return true;
8812 }
8813 
8814 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8815 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8816 {
8817     TCGv_i32 rn, rm, zero;
8818     DisasCompare c;
8819 
8820     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8821         return false;
8822     }
8823 
8824     if (a->rm == 13) {
8825         /* SEE "Related encodings" (MVE shifts) */
8826         return false;
8827     }
8828 
8829     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8830         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8831         return false;
8832     }
8833 
8834     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8835     zero = tcg_constant_i32(0);
8836     if (a->rn == 15) {
8837         rn = zero;
8838     } else {
8839         rn = load_reg(s, a->rn);
8840     }
8841     if (a->rm == 15) {
8842         rm = zero;
8843     } else {
8844         rm = load_reg(s, a->rm);
8845     }
8846 
8847     switch (a->op) {
8848     case 0: /* CSEL */
8849         break;
8850     case 1: /* CSINC */
8851         tcg_gen_addi_i32(rm, rm, 1);
8852         break;
8853     case 2: /* CSINV */
8854         tcg_gen_not_i32(rm, rm);
8855         break;
8856     case 3: /* CSNEG */
8857         tcg_gen_neg_i32(rm, rm);
8858         break;
8859     default:
8860         g_assert_not_reached();
8861     }
8862 
8863     arm_test_cc(&c, a->fcond);
8864     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
8865 
8866     store_reg(s, a->rd, rn);
8867     return true;
8868 }
8869 
8870 /*
8871  * Legacy decoder.
8872  */
8873 
8874 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8875 {
8876     unsigned int cond = insn >> 28;
8877 
8878     /* M variants do not implement ARM mode; this must raise the INVSTATE
8879      * UsageFault exception.
8880      */
8881     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8882         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8883         return;
8884     }
8885 
8886     if (s->pstate_il) {
8887         /*
8888          * Illegal execution state. This has priority over BTI
8889          * exceptions, but comes after instruction abort exceptions.
8890          */
8891         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8892         return;
8893     }
8894 
8895     if (cond == 0xf) {
8896         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8897          * choose to UNDEF. In ARMv5 and above the space is used
8898          * for miscellaneous unconditional instructions.
8899          */
8900         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8901             unallocated_encoding(s);
8902             return;
8903         }
8904 
8905         /* Unconditional instructions.  */
8906         /* TODO: Perhaps merge these into one decodetree output file.  */
8907         if (disas_a32_uncond(s, insn) ||
8908             disas_vfp_uncond(s, insn) ||
8909             disas_neon_dp(s, insn) ||
8910             disas_neon_ls(s, insn) ||
8911             disas_neon_shared(s, insn)) {
8912             return;
8913         }
8914         /* fall back to legacy decoder */
8915 
8916         if ((insn & 0x0e000f00) == 0x0c000100) {
8917             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8918                 /* iWMMXt register transfer.  */
8919                 if (extract32(s->c15_cpar, 1, 1)) {
8920                     if (!disas_iwmmxt_insn(s, insn)) {
8921                         return;
8922                     }
8923                 }
8924             }
8925         }
8926         goto illegal_op;
8927     }
8928     if (cond != 0xe) {
8929         /* if not always execute, we generate a conditional jump to
8930            next instruction */
8931         arm_skip_unless(s, cond);
8932     }
8933 
8934     /* TODO: Perhaps merge these into one decodetree output file.  */
8935     if (disas_a32(s, insn) ||
8936         disas_vfp(s, insn)) {
8937         return;
8938     }
8939     /* fall back to legacy decoder */
8940     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8941     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8942         if (((insn & 0x0c000e00) == 0x0c000000)
8943             && ((insn & 0x03000000) != 0x03000000)) {
8944             /* Coprocessor insn, coprocessor 0 or 1 */
8945             disas_xscale_insn(s, insn);
8946             return;
8947         }
8948     }
8949 
8950 illegal_op:
8951     unallocated_encoding(s);
8952 }
8953 
8954 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8955 {
8956     /*
8957      * Return true if this is a 16 bit instruction. We must be precise
8958      * about this (matching the decode).
8959      */
8960     if ((insn >> 11) < 0x1d) {
8961         /* Definitely a 16-bit instruction */
8962         return true;
8963     }
8964 
8965     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8966      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8967      * end up actually treating this as two 16-bit insns, though,
8968      * if it's half of a bl/blx pair that might span a page boundary.
8969      */
8970     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8971         arm_dc_feature(s, ARM_FEATURE_M)) {
8972         /* Thumb2 cores (including all M profile ones) always treat
8973          * 32-bit insns as 32-bit.
8974          */
8975         return false;
8976     }
8977 
8978     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8979         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8980          * is not on the next page; we merge this into a 32-bit
8981          * insn.
8982          */
8983         return false;
8984     }
8985     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8986      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8987      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8988      *  -- handle as single 16 bit insn
8989      */
8990     return true;
8991 }
8992 
8993 /* Translate a 32-bit thumb instruction. */
8994 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8995 {
8996     /*
8997      * ARMv6-M supports a limited subset of Thumb2 instructions.
8998      * Other Thumb1 architectures allow only 32-bit
8999      * combined BL/BLX prefix and suffix.
9000      */
9001     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9002         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9003         int i;
9004         bool found = false;
9005         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9006                                                0xf3b08040 /* dsb */,
9007                                                0xf3b08050 /* dmb */,
9008                                                0xf3b08060 /* isb */,
9009                                                0xf3e08000 /* mrs */,
9010                                                0xf000d000 /* bl */};
9011         static const uint32_t armv6m_mask[] = {0xffe0d000,
9012                                                0xfff0d0f0,
9013                                                0xfff0d0f0,
9014                                                0xfff0d0f0,
9015                                                0xffe0d000,
9016                                                0xf800d000};
9017 
9018         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9019             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9020                 found = true;
9021                 break;
9022             }
9023         }
9024         if (!found) {
9025             goto illegal_op;
9026         }
9027     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9028         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9029             unallocated_encoding(s);
9030             return;
9031         }
9032     }
9033 
9034     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9035         /*
9036          * NOCP takes precedence over any UNDEF for (almost) the
9037          * entire wide range of coprocessor-space encodings, so check
9038          * for it first before proceeding to actually decode eg VFP
9039          * insns. This decode also handles the few insns which are
9040          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9041          */
9042         if (disas_m_nocp(s, insn)) {
9043             return;
9044         }
9045     }
9046 
9047     if ((insn & 0xef000000) == 0xef000000) {
9048         /*
9049          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9050          * transform into
9051          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9052          */
9053         uint32_t a32_insn = (insn & 0xe2ffffff) |
9054             ((insn & (1 << 28)) >> 4) | (1 << 28);
9055 
9056         if (disas_neon_dp(s, a32_insn)) {
9057             return;
9058         }
9059     }
9060 
9061     if ((insn & 0xff100000) == 0xf9000000) {
9062         /*
9063          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9064          * transform into
9065          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9066          */
9067         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9068 
9069         if (disas_neon_ls(s, a32_insn)) {
9070             return;
9071         }
9072     }
9073 
9074     /*
9075      * TODO: Perhaps merge these into one decodetree output file.
9076      * Note disas_vfp is written for a32 with cond field in the
9077      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9078      */
9079     if (disas_t32(s, insn) ||
9080         disas_vfp_uncond(s, insn) ||
9081         disas_neon_shared(s, insn) ||
9082         disas_mve(s, insn) ||
9083         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9084         return;
9085     }
9086 
9087 illegal_op:
9088     unallocated_encoding(s);
9089 }
9090 
9091 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9092 {
9093     if (!disas_t16(s, insn)) {
9094         unallocated_encoding(s);
9095     }
9096 }
9097 
9098 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9099 {
9100     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9101      * (False positives are OK, false negatives are not.)
9102      * We know this is a Thumb insn, and our caller ensures we are
9103      * only called if dc->base.pc_next is less than 4 bytes from the page
9104      * boundary, so we cross the page if the first 16 bits indicate
9105      * that this is a 32 bit insn.
9106      */
9107     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9108 
9109     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9110 }
9111 
9112 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9113 {
9114     DisasContext *dc = container_of(dcbase, DisasContext, base);
9115     CPUARMState *env = cs->env_ptr;
9116     ARMCPU *cpu = env_archcpu(env);
9117     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9118     uint32_t condexec, core_mmu_idx;
9119 
9120     dc->isar = &cpu->isar;
9121     dc->condjmp = 0;
9122     dc->pc_save = dc->base.pc_first;
9123     dc->aarch64 = false;
9124     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9125     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9126     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9127     /*
9128      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9129      * is always the IT bits. On M-profile, some of the reserved encodings
9130      * of IT are used instead to indicate either ICI or ECI, which
9131      * indicate partial progress of a restartable insn that was interrupted
9132      * partway through by an exception:
9133      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9134      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9135      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9136      * insn, behave normally".
9137      */
9138     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9139     dc->eci_handled = false;
9140     if (condexec & 0xf) {
9141         dc->condexec_mask = (condexec & 0xf) << 1;
9142         dc->condexec_cond = condexec >> 4;
9143     } else {
9144         if (arm_feature(env, ARM_FEATURE_M)) {
9145             dc->eci = condexec >> 4;
9146         }
9147     }
9148 
9149     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9150     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9151     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9152 #if !defined(CONFIG_USER_ONLY)
9153     dc->user = (dc->current_el == 0);
9154 #endif
9155     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9156     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9157     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9158     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9159     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9160 
9161     if (arm_feature(env, ARM_FEATURE_M)) {
9162         dc->vfp_enabled = 1;
9163         dc->be_data = MO_TE;
9164         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9165         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9166         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9167         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9168         dc->v7m_new_fp_ctxt_needed =
9169             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9170         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9171         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9172     } else {
9173         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9174         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9175         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9176         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9177         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9178             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9179         } else {
9180             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9181             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9182         }
9183         dc->sme_trap_nonstreaming =
9184             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9185     }
9186     dc->cp_regs = cpu->cp_regs;
9187     dc->features = env->features;
9188 
9189     /* Single step state. The code-generation logic here is:
9190      *  SS_ACTIVE == 0:
9191      *   generate code with no special handling for single-stepping (except
9192      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9193      *   this happens anyway because those changes are all system register or
9194      *   PSTATE writes).
9195      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9196      *   emit code for one insn
9197      *   emit code to clear PSTATE.SS
9198      *   emit code to generate software step exception for completed step
9199      *   end TB (as usual for having generated an exception)
9200      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9201      *   emit code to generate a software step exception
9202      *   end the TB
9203      */
9204     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9205     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9206     dc->is_ldex = false;
9207 
9208     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9209 
9210     /* If architectural single step active, limit to 1.  */
9211     if (dc->ss_active) {
9212         dc->base.max_insns = 1;
9213     }
9214 
9215     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9216        to those left on the page.  */
9217     if (!dc->thumb) {
9218         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9219         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9220     }
9221 
9222     cpu_V0 = tcg_temp_new_i64();
9223     cpu_V1 = tcg_temp_new_i64();
9224     cpu_M0 = tcg_temp_new_i64();
9225 }
9226 
9227 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9228 {
9229     DisasContext *dc = container_of(dcbase, DisasContext, base);
9230 
9231     /* A note on handling of the condexec (IT) bits:
9232      *
9233      * We want to avoid the overhead of having to write the updated condexec
9234      * bits back to the CPUARMState for every instruction in an IT block. So:
9235      * (1) if the condexec bits are not already zero then we write
9236      * zero back into the CPUARMState now. This avoids complications trying
9237      * to do it at the end of the block. (For example if we don't do this
9238      * it's hard to identify whether we can safely skip writing condexec
9239      * at the end of the TB, which we definitely want to do for the case
9240      * where a TB doesn't do anything with the IT state at all.)
9241      * (2) if we are going to leave the TB then we call gen_set_condexec()
9242      * which will write the correct value into CPUARMState if zero is wrong.
9243      * This is done both for leaving the TB at the end, and for leaving
9244      * it because of an exception we know will happen, which is done in
9245      * gen_exception_insn(). The latter is necessary because we need to
9246      * leave the TB with the PC/IT state just prior to execution of the
9247      * instruction which caused the exception.
9248      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9249      * then the CPUARMState will be wrong and we need to reset it.
9250      * This is handled in the same way as restoration of the
9251      * PC in these situations; we save the value of the condexec bits
9252      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9253      * then uses this to restore them after an exception.
9254      *
9255      * Note that there are no instructions which can read the condexec
9256      * bits, and none which can write non-static values to them, so
9257      * we don't need to care about whether CPUARMState is correct in the
9258      * middle of a TB.
9259      */
9260 
9261     /* Reset the conditional execution bits immediately. This avoids
9262        complications trying to do it at the end of the block.  */
9263     if (dc->condexec_mask || dc->condexec_cond) {
9264         store_cpu_field_constant(0, condexec_bits);
9265     }
9266 }
9267 
9268 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9269 {
9270     DisasContext *dc = container_of(dcbase, DisasContext, base);
9271     /*
9272      * The ECI/ICI bits share PSR bits with the IT bits, so we
9273      * need to reconstitute the bits from the split-out DisasContext
9274      * fields here.
9275      */
9276     uint32_t condexec_bits;
9277     target_ulong pc_arg = dc->base.pc_next;
9278 
9279     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9280         pc_arg &= ~TARGET_PAGE_MASK;
9281     }
9282     if (dc->eci) {
9283         condexec_bits = dc->eci << 4;
9284     } else {
9285         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9286     }
9287     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9288     dc->insn_start = tcg_last_op();
9289 }
9290 
9291 static bool arm_check_kernelpage(DisasContext *dc)
9292 {
9293 #ifdef CONFIG_USER_ONLY
9294     /* Intercept jump to the magic kernel page.  */
9295     if (dc->base.pc_next >= 0xffff0000) {
9296         /* We always get here via a jump, so know we are not in a
9297            conditional execution block.  */
9298         gen_exception_internal(EXCP_KERNEL_TRAP);
9299         dc->base.is_jmp = DISAS_NORETURN;
9300         return true;
9301     }
9302 #endif
9303     return false;
9304 }
9305 
9306 static bool arm_check_ss_active(DisasContext *dc)
9307 {
9308     if (dc->ss_active && !dc->pstate_ss) {
9309         /* Singlestep state is Active-pending.
9310          * If we're in this state at the start of a TB then either
9311          *  a) we just took an exception to an EL which is being debugged
9312          *     and this is the first insn in the exception handler
9313          *  b) debug exceptions were masked and we just unmasked them
9314          *     without changing EL (eg by clearing PSTATE.D)
9315          * In either case we're going to take a swstep exception in the
9316          * "did not step an insn" case, and so the syndrome ISV and EX
9317          * bits should be zero.
9318          */
9319         assert(dc->base.num_insns == 1);
9320         gen_swstep_exception(dc, 0, 0);
9321         dc->base.is_jmp = DISAS_NORETURN;
9322         return true;
9323     }
9324 
9325     return false;
9326 }
9327 
9328 static void arm_post_translate_insn(DisasContext *dc)
9329 {
9330     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9331         if (dc->pc_save != dc->condlabel.pc_save) {
9332             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9333         }
9334         gen_set_label(dc->condlabel.label);
9335         dc->condjmp = 0;
9336     }
9337 }
9338 
9339 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9340 {
9341     DisasContext *dc = container_of(dcbase, DisasContext, base);
9342     CPUARMState *env = cpu->env_ptr;
9343     uint32_t pc = dc->base.pc_next;
9344     unsigned int insn;
9345 
9346     /* Singlestep exceptions have the highest priority. */
9347     if (arm_check_ss_active(dc)) {
9348         dc->base.pc_next = pc + 4;
9349         return;
9350     }
9351 
9352     if (pc & 3) {
9353         /*
9354          * PC alignment fault.  This has priority over the instruction abort
9355          * that we would receive from a translation fault via arm_ldl_code
9356          * (or the execution of the kernelpage entrypoint). This should only
9357          * be possible after an indirect branch, at the start of the TB.
9358          */
9359         assert(dc->base.num_insns == 1);
9360         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9361         dc->base.is_jmp = DISAS_NORETURN;
9362         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9363         return;
9364     }
9365 
9366     if (arm_check_kernelpage(dc)) {
9367         dc->base.pc_next = pc + 4;
9368         return;
9369     }
9370 
9371     dc->pc_curr = pc;
9372     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9373     dc->insn = insn;
9374     dc->base.pc_next = pc + 4;
9375     disas_arm_insn(dc, insn);
9376 
9377     arm_post_translate_insn(dc);
9378 
9379     /* ARM is a fixed-length ISA.  We performed the cross-page check
9380        in init_disas_context by adjusting max_insns.  */
9381 }
9382 
9383 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9384 {
9385     /* Return true if this Thumb insn is always unconditional,
9386      * even inside an IT block. This is true of only a very few
9387      * instructions: BKPT, HLT, and SG.
9388      *
9389      * A larger class of instructions are UNPREDICTABLE if used
9390      * inside an IT block; we do not need to detect those here, because
9391      * what we do by default (perform the cc check and update the IT
9392      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9393      * choice for those situations.
9394      *
9395      * insn is either a 16-bit or a 32-bit instruction; the two are
9396      * distinguishable because for the 16-bit case the top 16 bits
9397      * are zeroes, and that isn't a valid 32-bit encoding.
9398      */
9399     if ((insn & 0xffffff00) == 0xbe00) {
9400         /* BKPT */
9401         return true;
9402     }
9403 
9404     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9405         !arm_dc_feature(s, ARM_FEATURE_M)) {
9406         /* HLT: v8A only. This is unconditional even when it is going to
9407          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9408          * For v7 cores this was a plain old undefined encoding and so
9409          * honours its cc check. (We might be using the encoding as
9410          * a semihosting trap, but we don't change the cc check behaviour
9411          * on that account, because a debugger connected to a real v7A
9412          * core and emulating semihosting traps by catching the UNDEF
9413          * exception would also only see cases where the cc check passed.
9414          * No guest code should be trying to do a HLT semihosting trap
9415          * in an IT block anyway.
9416          */
9417         return true;
9418     }
9419 
9420     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9421         arm_dc_feature(s, ARM_FEATURE_M)) {
9422         /* SG: v8M only */
9423         return true;
9424     }
9425 
9426     return false;
9427 }
9428 
9429 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9430 {
9431     DisasContext *dc = container_of(dcbase, DisasContext, base);
9432     CPUARMState *env = cpu->env_ptr;
9433     uint32_t pc = dc->base.pc_next;
9434     uint32_t insn;
9435     bool is_16bit;
9436     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9437     TCGOp *insn_eci_rewind = NULL;
9438     target_ulong insn_eci_pc_save = -1;
9439 
9440     /* Misaligned thumb PC is architecturally impossible. */
9441     assert((dc->base.pc_next & 1) == 0);
9442 
9443     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9444         dc->base.pc_next = pc + 2;
9445         return;
9446     }
9447 
9448     dc->pc_curr = pc;
9449     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9450     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9451     pc += 2;
9452     if (!is_16bit) {
9453         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9454         insn = insn << 16 | insn2;
9455         pc += 2;
9456     }
9457     dc->base.pc_next = pc;
9458     dc->insn = insn;
9459 
9460     if (dc->pstate_il) {
9461         /*
9462          * Illegal execution state. This has priority over BTI
9463          * exceptions, but comes after instruction abort exceptions.
9464          */
9465         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9466         return;
9467     }
9468 
9469     if (dc->eci) {
9470         /*
9471          * For M-profile continuable instructions, ECI/ICI handling
9472          * falls into these cases:
9473          *  - interrupt-continuable instructions
9474          *     These are the various load/store multiple insns (both
9475          *     integer and fp). The ICI bits indicate the register
9476          *     where the load/store can resume. We make the IMPDEF
9477          *     choice to always do "instruction restart", ie ignore
9478          *     the ICI value and always execute the ldm/stm from the
9479          *     start. So all we need to do is zero PSR.ICI if the
9480          *     insn executes.
9481          *  - MVE instructions subject to beat-wise execution
9482          *     Here the ECI bits indicate which beats have already been
9483          *     executed, and we must honour this. Each insn of this
9484          *     type will handle it correctly. We will update PSR.ECI
9485          *     in the helper function for the insn (some ECI values
9486          *     mean that the following insn also has been partially
9487          *     executed).
9488          *  - Special cases which don't advance ECI
9489          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9490          *     bits untouched.
9491          *  - all other insns (the common case)
9492          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9493          *     We place a rewind-marker here. Insns in the previous
9494          *     three categories will set a flag in the DisasContext.
9495          *     If the flag isn't set after we call disas_thumb_insn()
9496          *     or disas_thumb2_insn() then we know we have a "some other
9497          *     insn" case. We will rewind to the marker (ie throwing away
9498          *     all the generated code) and instead emit "take exception".
9499          */
9500         insn_eci_rewind = tcg_last_op();
9501         insn_eci_pc_save = dc->pc_save;
9502     }
9503 
9504     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9505         uint32_t cond = dc->condexec_cond;
9506 
9507         /*
9508          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9509          * "always"; 0xf is not "never".
9510          */
9511         if (cond < 0x0e) {
9512             arm_skip_unless(dc, cond);
9513         }
9514     }
9515 
9516     if (is_16bit) {
9517         disas_thumb_insn(dc, insn);
9518     } else {
9519         disas_thumb2_insn(dc, insn);
9520     }
9521 
9522     /* Advance the Thumb condexec condition.  */
9523     if (dc->condexec_mask) {
9524         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9525                              ((dc->condexec_mask >> 4) & 1));
9526         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9527         if (dc->condexec_mask == 0) {
9528             dc->condexec_cond = 0;
9529         }
9530     }
9531 
9532     if (dc->eci && !dc->eci_handled) {
9533         /*
9534          * Insn wasn't valid for ECI/ICI at all: undo what we
9535          * just generated and instead emit an exception
9536          */
9537         tcg_remove_ops_after(insn_eci_rewind);
9538         dc->pc_save = insn_eci_pc_save;
9539         dc->condjmp = 0;
9540         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9541     }
9542 
9543     arm_post_translate_insn(dc);
9544 
9545     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9546      * will touch a new page.  This ensures that prefetch aborts occur at
9547      * the right place.
9548      *
9549      * We want to stop the TB if the next insn starts in a new page,
9550      * or if it spans between this page and the next. This means that
9551      * if we're looking at the last halfword in the page we need to
9552      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9553      * or a 32-bit Thumb insn (which won't).
9554      * This is to avoid generating a silly TB with a single 16-bit insn
9555      * in it at the end of this page (which would execute correctly
9556      * but isn't very efficient).
9557      */
9558     if (dc->base.is_jmp == DISAS_NEXT
9559         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9560             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9561                 && insn_crosses_page(env, dc)))) {
9562         dc->base.is_jmp = DISAS_TOO_MANY;
9563     }
9564 }
9565 
9566 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9567 {
9568     DisasContext *dc = container_of(dcbase, DisasContext, base);
9569 
9570     /* At this stage dc->condjmp will only be set when the skipped
9571        instruction was a conditional branch or trap, and the PC has
9572        already been written.  */
9573     gen_set_condexec(dc);
9574     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9575         /* Exception return branches need some special case code at the
9576          * end of the TB, which is complex enough that it has to
9577          * handle the single-step vs not and the condition-failed
9578          * insn codepath itself.
9579          */
9580         gen_bx_excret_final_code(dc);
9581     } else if (unlikely(dc->ss_active)) {
9582         /* Unconditional and "condition passed" instruction codepath. */
9583         switch (dc->base.is_jmp) {
9584         case DISAS_SWI:
9585             gen_ss_advance(dc);
9586             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9587             break;
9588         case DISAS_HVC:
9589             gen_ss_advance(dc);
9590             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9591             break;
9592         case DISAS_SMC:
9593             gen_ss_advance(dc);
9594             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9595             break;
9596         case DISAS_NEXT:
9597         case DISAS_TOO_MANY:
9598         case DISAS_UPDATE_EXIT:
9599         case DISAS_UPDATE_NOCHAIN:
9600             gen_update_pc(dc, curr_insn_len(dc));
9601             /* fall through */
9602         default:
9603             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9604             gen_singlestep_exception(dc);
9605             break;
9606         case DISAS_NORETURN:
9607             break;
9608         }
9609     } else {
9610         /* While branches must always occur at the end of an IT block,
9611            there are a few other things that can cause us to terminate
9612            the TB in the middle of an IT block:
9613             - Exception generating instructions (bkpt, swi, undefined).
9614             - Page boundaries.
9615             - Hardware watchpoints.
9616            Hardware breakpoints have already been handled and skip this code.
9617          */
9618         switch (dc->base.is_jmp) {
9619         case DISAS_NEXT:
9620         case DISAS_TOO_MANY:
9621             gen_goto_tb(dc, 1, curr_insn_len(dc));
9622             break;
9623         case DISAS_UPDATE_NOCHAIN:
9624             gen_update_pc(dc, curr_insn_len(dc));
9625             /* fall through */
9626         case DISAS_JUMP:
9627             gen_goto_ptr();
9628             break;
9629         case DISAS_UPDATE_EXIT:
9630             gen_update_pc(dc, curr_insn_len(dc));
9631             /* fall through */
9632         default:
9633             /* indicate that the hash table must be used to find the next TB */
9634             tcg_gen_exit_tb(NULL, 0);
9635             break;
9636         case DISAS_NORETURN:
9637             /* nothing more to generate */
9638             break;
9639         case DISAS_WFI:
9640             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9641             /*
9642              * The helper doesn't necessarily throw an exception, but we
9643              * must go back to the main loop to check for interrupts anyway.
9644              */
9645             tcg_gen_exit_tb(NULL, 0);
9646             break;
9647         case DISAS_WFE:
9648             gen_helper_wfe(cpu_env);
9649             break;
9650         case DISAS_YIELD:
9651             gen_helper_yield(cpu_env);
9652             break;
9653         case DISAS_SWI:
9654             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9655             break;
9656         case DISAS_HVC:
9657             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9658             break;
9659         case DISAS_SMC:
9660             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9661             break;
9662         }
9663     }
9664 
9665     if (dc->condjmp) {
9666         /* "Condition failed" instruction codepath for the branch/trap insn */
9667         set_disas_label(dc, dc->condlabel);
9668         gen_set_condexec(dc);
9669         if (unlikely(dc->ss_active)) {
9670             gen_update_pc(dc, curr_insn_len(dc));
9671             gen_singlestep_exception(dc);
9672         } else {
9673             gen_goto_tb(dc, 1, curr_insn_len(dc));
9674         }
9675     }
9676 }
9677 
9678 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9679                              CPUState *cpu, FILE *logfile)
9680 {
9681     DisasContext *dc = container_of(dcbase, DisasContext, base);
9682 
9683     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9684     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9685 }
9686 
9687 static const TranslatorOps arm_translator_ops = {
9688     .init_disas_context = arm_tr_init_disas_context,
9689     .tb_start           = arm_tr_tb_start,
9690     .insn_start         = arm_tr_insn_start,
9691     .translate_insn     = arm_tr_translate_insn,
9692     .tb_stop            = arm_tr_tb_stop,
9693     .disas_log          = arm_tr_disas_log,
9694 };
9695 
9696 static const TranslatorOps thumb_translator_ops = {
9697     .init_disas_context = arm_tr_init_disas_context,
9698     .tb_start           = arm_tr_tb_start,
9699     .insn_start         = arm_tr_insn_start,
9700     .translate_insn     = thumb_tr_translate_insn,
9701     .tb_stop            = arm_tr_tb_stop,
9702     .disas_log          = arm_tr_disas_log,
9703 };
9704 
9705 /* generate intermediate code for basic block 'tb'.  */
9706 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9707                            target_ulong pc, void *host_pc)
9708 {
9709     DisasContext dc = { };
9710     const TranslatorOps *ops = &arm_translator_ops;
9711     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9712 
9713     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9714         ops = &thumb_translator_ops;
9715     }
9716 #ifdef TARGET_AARCH64
9717     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9718         ops = &aarch64_translator_ops;
9719     }
9720 #endif
9721 
9722     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9723 }
9724