xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 720923ed)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "semihosting/semihost.h"
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 #include "exec/log.h"
36 #include "cpregs.h"
37 
38 
39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
41 /* currently all emulated v5 cores are also v5TE, so don't bother */
42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
49 
50 #include "translate.h"
51 #include "translate-a32.h"
52 
53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
55 /* These are TCG globals which alias CPUARMState fields */
56 static TCGv_i32 cpu_R[16];
57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
58 TCGv_i64 cpu_exclusive_addr;
59 TCGv_i64 cpu_exclusive_val;
60 
61 #include "exec/gen-icount.h"
62 
63 static const char * const regnames[] =
64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
66 
67 
68 /* initialize TCG globals.  */
69 void arm_translate_init(void)
70 {
71     int i;
72 
73     for (i = 0; i < 16; i++) {
74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
75                                           offsetof(CPUARMState, regs[i]),
76                                           regnames[i]);
77     }
78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
82 
83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
87 
88     a64_translate_init();
89 }
90 
91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
92 {
93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
94     switch (cmode) {
95     case 0: case 1:
96         /* no-op */
97         break;
98     case 2: case 3:
99         imm <<= 8;
100         break;
101     case 4: case 5:
102         imm <<= 16;
103         break;
104     case 6: case 7:
105         imm <<= 24;
106         break;
107     case 8: case 9:
108         imm |= imm << 16;
109         break;
110     case 10: case 11:
111         imm = (imm << 8) | (imm << 24);
112         break;
113     case 12:
114         imm = (imm << 8) | 0xff;
115         break;
116     case 13:
117         imm = (imm << 16) | 0xffff;
118         break;
119     case 14:
120         if (op) {
121             /*
122              * This and cmode == 15 op == 1 are the only cases where
123              * the top and bottom 32 bits of the encoded constant differ.
124              */
125             uint64_t imm64 = 0;
126             int n;
127 
128             for (n = 0; n < 8; n++) {
129                 if (imm & (1 << n)) {
130                     imm64 |= (0xffULL << (n * 8));
131                 }
132             }
133             return imm64;
134         }
135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
136         break;
137     case 15:
138         if (op) {
139             /* Reserved encoding for AArch32; valid for AArch64 */
140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
141             if (imm & 0x80) {
142                 imm64 |= 0x8000000000000000ULL;
143             }
144             if (imm & 0x40) {
145                 imm64 |= 0x3fc0000000000000ULL;
146             } else {
147                 imm64 |= 0x4000000000000000ULL;
148             }
149             return imm64;
150         }
151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
153         break;
154     }
155     if (op) {
156         imm = ~imm;
157     }
158     return dup_const(MO_32, imm);
159 }
160 
161 /* Generate a label used for skipping this instruction */
162 void arm_gen_condlabel(DisasContext *s)
163 {
164     if (!s->condjmp) {
165         s->condlabel = gen_disas_label(s);
166         s->condjmp = 1;
167     }
168 }
169 
170 /* Flags for the disas_set_da_iss info argument:
171  * lower bits hold the Rt register number, higher bits are flags.
172  */
173 typedef enum ISSInfo {
174     ISSNone = 0,
175     ISSRegMask = 0x1f,
176     ISSInvalid = (1 << 5),
177     ISSIsAcqRel = (1 << 6),
178     ISSIsWrite = (1 << 7),
179     ISSIs16Bit = (1 << 8),
180 } ISSInfo;
181 
182 /*
183  * Store var into env + offset to a member with size bytes.
184  * Free var after use.
185  */
186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
187 {
188     switch (size) {
189     case 1:
190         tcg_gen_st8_i32(var, cpu_env, offset);
191         break;
192     case 4:
193         tcg_gen_st_i32(var, cpu_env, offset);
194         break;
195     default:
196         g_assert_not_reached();
197     }
198     tcg_temp_free_i32(var);
199 }
200 
201 /* Save the syndrome information for a Data Abort */
202 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
203 {
204     uint32_t syn;
205     int sas = memop & MO_SIZE;
206     bool sse = memop & MO_SIGN;
207     bool is_acqrel = issinfo & ISSIsAcqRel;
208     bool is_write = issinfo & ISSIsWrite;
209     bool is_16bit = issinfo & ISSIs16Bit;
210     int srt = issinfo & ISSRegMask;
211 
212     if (issinfo & ISSInvalid) {
213         /* Some callsites want to conditionally provide ISS info,
214          * eg "only if this was not a writeback"
215          */
216         return;
217     }
218 
219     if (srt == 15) {
220         /* For AArch32, insns where the src/dest is R15 never generate
221          * ISS information. Catching that here saves checking at all
222          * the call sites.
223          */
224         return;
225     }
226 
227     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
228                                   0, 0, 0, is_write, 0, is_16bit);
229     disas_set_insn_syndrome(s, syn);
230 }
231 
232 static inline int get_a32_user_mem_index(DisasContext *s)
233 {
234     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
235      * insns:
236      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
237      *  otherwise, access as if at PL0.
238      */
239     switch (s->mmu_idx) {
240     case ARMMMUIdx_E3:
241     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
242     case ARMMMUIdx_E10_0:
243     case ARMMMUIdx_E10_1:
244     case ARMMMUIdx_E10_1_PAN:
245         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
246     case ARMMMUIdx_MUser:
247     case ARMMMUIdx_MPriv:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
249     case ARMMMUIdx_MUserNegPri:
250     case ARMMMUIdx_MPrivNegPri:
251         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
252     case ARMMMUIdx_MSUser:
253     case ARMMMUIdx_MSPriv:
254         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
255     case ARMMMUIdx_MSUserNegPri:
256     case ARMMMUIdx_MSPrivNegPri:
257         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
258     default:
259         g_assert_not_reached();
260     }
261 }
262 
263 /* The pc_curr difference for an architectural jump. */
264 static target_long jmp_diff(DisasContext *s, target_long diff)
265 {
266     return diff + (s->thumb ? 4 : 8);
267 }
268 
269 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
270 {
271     assert(s->pc_save != -1);
272     if (tb_cflags(s->base.tb) & CF_PCREL) {
273         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
274     } else {
275         tcg_gen_movi_i32(var, s->pc_curr + diff);
276     }
277 }
278 
279 /* Set a variable to the value of a CPU register.  */
280 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
281 {
282     if (reg == 15) {
283         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
284     } else {
285         tcg_gen_mov_i32(var, cpu_R[reg]);
286     }
287 }
288 
289 /*
290  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
291  * This is used for load/store for which use of PC implies (literal),
292  * or ADD that implies ADR.
293  */
294 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
295 {
296     TCGv_i32 tmp = tcg_temp_new_i32();
297 
298     if (reg == 15) {
299         /*
300          * This address is computed from an aligned PC:
301          * subtract off the low bits.
302          */
303         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
304     } else {
305         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
306     }
307     return tmp;
308 }
309 
310 /* Set a CPU register.  The source must be a temporary and will be
311    marked as dead.  */
312 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
313 {
314     if (reg == 15) {
315         /* In Thumb mode, we must ignore bit 0.
316          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
317          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
318          * We choose to ignore [1:0] in ARM mode for all architecture versions.
319          */
320         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
321         s->base.is_jmp = DISAS_JUMP;
322         s->pc_save = -1;
323     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
324         /* For M-profile SP bits [1:0] are always zero */
325         tcg_gen_andi_i32(var, var, ~3);
326     }
327     tcg_gen_mov_i32(cpu_R[reg], var);
328     tcg_temp_free_i32(var);
329 }
330 
331 /*
332  * Variant of store_reg which applies v8M stack-limit checks before updating
333  * SP. If the check fails this will result in an exception being taken.
334  * We disable the stack checks for CONFIG_USER_ONLY because we have
335  * no idea what the stack limits should be in that case.
336  * If stack checking is not being done this just acts like store_reg().
337  */
338 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
339 {
340 #ifndef CONFIG_USER_ONLY
341     if (s->v8m_stackcheck) {
342         gen_helper_v8m_stackcheck(cpu_env, var);
343     }
344 #endif
345     store_reg(s, 13, var);
346 }
347 
348 /* Value extensions.  */
349 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
350 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
351 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
352 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
353 
354 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
355 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
356 
357 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
358 {
359     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
360 }
361 
362 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
363 {
364     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
365 
366     if (new_el) {
367         if (m_profile) {
368             gen_helper_rebuild_hflags_m32_newel(cpu_env);
369         } else {
370             gen_helper_rebuild_hflags_a32_newel(cpu_env);
371         }
372     } else {
373         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
374         if (m_profile) {
375             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
376         } else {
377             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
378         }
379     }
380 }
381 
382 static void gen_exception_internal(int excp)
383 {
384     assert(excp_is_internal(excp));
385     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
386 }
387 
388 static void gen_singlestep_exception(DisasContext *s)
389 {
390     /* We just completed step of an insn. Move from Active-not-pending
391      * to Active-pending, and then also take the swstep exception.
392      * This corresponds to making the (IMPDEF) choice to prioritize
393      * swstep exceptions over asynchronous exceptions taken to an exception
394      * level where debug is disabled. This choice has the advantage that
395      * we do not need to maintain internal state corresponding to the
396      * ISV/EX syndrome bits between completion of the step and generation
397      * of the exception, and our syndrome information is always correct.
398      */
399     gen_ss_advance(s);
400     gen_swstep_exception(s, 1, s->is_ldex);
401     s->base.is_jmp = DISAS_NORETURN;
402 }
403 
404 void clear_eci_state(DisasContext *s)
405 {
406     /*
407      * Clear any ECI/ICI state: used when a load multiple/store
408      * multiple insn executes.
409      */
410     if (s->eci) {
411         store_cpu_field_constant(0, condexec_bits);
412         s->eci = 0;
413     }
414 }
415 
416 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
417 {
418     TCGv_i32 tmp1 = tcg_temp_new_i32();
419     TCGv_i32 tmp2 = tcg_temp_new_i32();
420     tcg_gen_ext16s_i32(tmp1, a);
421     tcg_gen_ext16s_i32(tmp2, b);
422     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
423     tcg_temp_free_i32(tmp2);
424     tcg_gen_sari_i32(a, a, 16);
425     tcg_gen_sari_i32(b, b, 16);
426     tcg_gen_mul_i32(b, b, a);
427     tcg_gen_mov_i32(a, tmp1);
428     tcg_temp_free_i32(tmp1);
429 }
430 
431 /* Byteswap each halfword.  */
432 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
433 {
434     TCGv_i32 tmp = tcg_temp_new_i32();
435     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
436     tcg_gen_shri_i32(tmp, var, 8);
437     tcg_gen_and_i32(tmp, tmp, mask);
438     tcg_gen_and_i32(var, var, mask);
439     tcg_gen_shli_i32(var, var, 8);
440     tcg_gen_or_i32(dest, var, tmp);
441     tcg_temp_free_i32(tmp);
442 }
443 
444 /* Byteswap low halfword and sign extend.  */
445 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
446 {
447     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
448 }
449 
450 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
451     tmp = (t0 ^ t1) & 0x8000;
452     t0 &= ~0x8000;
453     t1 &= ~0x8000;
454     t0 = (t0 + t1) ^ tmp;
455  */
456 
457 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
458 {
459     TCGv_i32 tmp = tcg_temp_new_i32();
460     tcg_gen_xor_i32(tmp, t0, t1);
461     tcg_gen_andi_i32(tmp, tmp, 0x8000);
462     tcg_gen_andi_i32(t0, t0, ~0x8000);
463     tcg_gen_andi_i32(t1, t1, ~0x8000);
464     tcg_gen_add_i32(t0, t0, t1);
465     tcg_gen_xor_i32(dest, t0, tmp);
466     tcg_temp_free_i32(tmp);
467 }
468 
469 /* Set N and Z flags from var.  */
470 static inline void gen_logic_CC(TCGv_i32 var)
471 {
472     tcg_gen_mov_i32(cpu_NF, var);
473     tcg_gen_mov_i32(cpu_ZF, var);
474 }
475 
476 /* dest = T0 + T1 + CF. */
477 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
478 {
479     tcg_gen_add_i32(dest, t0, t1);
480     tcg_gen_add_i32(dest, dest, cpu_CF);
481 }
482 
483 /* dest = T0 - T1 + CF - 1.  */
484 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
485 {
486     tcg_gen_sub_i32(dest, t0, t1);
487     tcg_gen_add_i32(dest, dest, cpu_CF);
488     tcg_gen_subi_i32(dest, dest, 1);
489 }
490 
491 /* dest = T0 + T1. Compute C, N, V and Z flags */
492 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
493 {
494     TCGv_i32 tmp = tcg_temp_new_i32();
495     tcg_gen_movi_i32(tmp, 0);
496     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
497     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
499     tcg_gen_xor_i32(tmp, t0, t1);
500     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
501     tcg_temp_free_i32(tmp);
502     tcg_gen_mov_i32(dest, cpu_NF);
503 }
504 
505 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
506 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
507 {
508     TCGv_i32 tmp = tcg_temp_new_i32();
509     if (TCG_TARGET_HAS_add2_i32) {
510         tcg_gen_movi_i32(tmp, 0);
511         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
512         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
513     } else {
514         TCGv_i64 q0 = tcg_temp_new_i64();
515         TCGv_i64 q1 = tcg_temp_new_i64();
516         tcg_gen_extu_i32_i64(q0, t0);
517         tcg_gen_extu_i32_i64(q1, t1);
518         tcg_gen_add_i64(q0, q0, q1);
519         tcg_gen_extu_i32_i64(q1, cpu_CF);
520         tcg_gen_add_i64(q0, q0, q1);
521         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
522         tcg_temp_free_i64(q0);
523         tcg_temp_free_i64(q1);
524     }
525     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
526     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
527     tcg_gen_xor_i32(tmp, t0, t1);
528     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
529     tcg_temp_free_i32(tmp);
530     tcg_gen_mov_i32(dest, cpu_NF);
531 }
532 
533 /* dest = T0 - T1. Compute C, N, V and Z flags */
534 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
535 {
536     TCGv_i32 tmp;
537     tcg_gen_sub_i32(cpu_NF, t0, t1);
538     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
539     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
540     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
541     tmp = tcg_temp_new_i32();
542     tcg_gen_xor_i32(tmp, t0, t1);
543     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
544     tcg_temp_free_i32(tmp);
545     tcg_gen_mov_i32(dest, cpu_NF);
546 }
547 
548 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
549 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
550 {
551     TCGv_i32 tmp = tcg_temp_new_i32();
552     tcg_gen_not_i32(tmp, t1);
553     gen_adc_CC(dest, t0, tmp);
554     tcg_temp_free_i32(tmp);
555 }
556 
557 #define GEN_SHIFT(name)                                               \
558 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
559 {                                                                     \
560     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
561     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
562     TCGv_i32 zero = tcg_constant_i32(0);                              \
563     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
564     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
565     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
566     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
567     tcg_temp_free_i32(tmpd);                                          \
568     tcg_temp_free_i32(tmp1);                                          \
569 }
570 GEN_SHIFT(shl)
571 GEN_SHIFT(shr)
572 #undef GEN_SHIFT
573 
574 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
575 {
576     TCGv_i32 tmp1 = tcg_temp_new_i32();
577 
578     tcg_gen_andi_i32(tmp1, t1, 0xff);
579     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
580     tcg_gen_sar_i32(dest, t0, tmp1);
581     tcg_temp_free_i32(tmp1);
582 }
583 
584 static void shifter_out_im(TCGv_i32 var, int shift)
585 {
586     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
587 }
588 
589 /* Shift by immediate.  Includes special handling for shift == 0.  */
590 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
591                                     int shift, int flags)
592 {
593     switch (shiftop) {
594     case 0: /* LSL */
595         if (shift != 0) {
596             if (flags)
597                 shifter_out_im(var, 32 - shift);
598             tcg_gen_shli_i32(var, var, shift);
599         }
600         break;
601     case 1: /* LSR */
602         if (shift == 0) {
603             if (flags) {
604                 tcg_gen_shri_i32(cpu_CF, var, 31);
605             }
606             tcg_gen_movi_i32(var, 0);
607         } else {
608             if (flags)
609                 shifter_out_im(var, shift - 1);
610             tcg_gen_shri_i32(var, var, shift);
611         }
612         break;
613     case 2: /* ASR */
614         if (shift == 0)
615             shift = 32;
616         if (flags)
617             shifter_out_im(var, shift - 1);
618         if (shift == 32)
619           shift = 31;
620         tcg_gen_sari_i32(var, var, shift);
621         break;
622     case 3: /* ROR/RRX */
623         if (shift != 0) {
624             if (flags)
625                 shifter_out_im(var, shift - 1);
626             tcg_gen_rotri_i32(var, var, shift); break;
627         } else {
628             TCGv_i32 tmp = tcg_temp_new_i32();
629             tcg_gen_shli_i32(tmp, cpu_CF, 31);
630             if (flags)
631                 shifter_out_im(var, 0);
632             tcg_gen_shri_i32(var, var, 1);
633             tcg_gen_or_i32(var, var, tmp);
634             tcg_temp_free_i32(tmp);
635         }
636     }
637 };
638 
639 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
640                                      TCGv_i32 shift, int flags)
641 {
642     if (flags) {
643         switch (shiftop) {
644         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
645         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
646         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
647         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
648         }
649     } else {
650         switch (shiftop) {
651         case 0:
652             gen_shl(var, var, shift);
653             break;
654         case 1:
655             gen_shr(var, var, shift);
656             break;
657         case 2:
658             gen_sar(var, var, shift);
659             break;
660         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
661                 tcg_gen_rotr_i32(var, var, shift); break;
662         }
663     }
664     tcg_temp_free_i32(shift);
665 }
666 
667 /*
668  * Generate a conditional based on ARM condition code cc.
669  * This is common between ARM and Aarch64 targets.
670  */
671 void arm_test_cc(DisasCompare *cmp, int cc)
672 {
673     TCGv_i32 value;
674     TCGCond cond;
675     bool global = true;
676 
677     switch (cc) {
678     case 0: /* eq: Z */
679     case 1: /* ne: !Z */
680         cond = TCG_COND_EQ;
681         value = cpu_ZF;
682         break;
683 
684     case 2: /* cs: C */
685     case 3: /* cc: !C */
686         cond = TCG_COND_NE;
687         value = cpu_CF;
688         break;
689 
690     case 4: /* mi: N */
691     case 5: /* pl: !N */
692         cond = TCG_COND_LT;
693         value = cpu_NF;
694         break;
695 
696     case 6: /* vs: V */
697     case 7: /* vc: !V */
698         cond = TCG_COND_LT;
699         value = cpu_VF;
700         break;
701 
702     case 8: /* hi: C && !Z */
703     case 9: /* ls: !C || Z -> !(C && !Z) */
704         cond = TCG_COND_NE;
705         value = tcg_temp_new_i32();
706         global = false;
707         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
708            ZF is non-zero for !Z; so AND the two subexpressions.  */
709         tcg_gen_neg_i32(value, cpu_CF);
710         tcg_gen_and_i32(value, value, cpu_ZF);
711         break;
712 
713     case 10: /* ge: N == V -> N ^ V == 0 */
714     case 11: /* lt: N != V -> N ^ V != 0 */
715         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
716         cond = TCG_COND_GE;
717         value = tcg_temp_new_i32();
718         global = false;
719         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
720         break;
721 
722     case 12: /* gt: !Z && N == V */
723     case 13: /* le: Z || N != V */
724         cond = TCG_COND_NE;
725         value = tcg_temp_new_i32();
726         global = false;
727         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
728          * the sign bit then AND with ZF to yield the result.  */
729         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
730         tcg_gen_sari_i32(value, value, 31);
731         tcg_gen_andc_i32(value, cpu_ZF, value);
732         break;
733 
734     case 14: /* always */
735     case 15: /* always */
736         /* Use the ALWAYS condition, which will fold early.
737          * It doesn't matter what we use for the value.  */
738         cond = TCG_COND_ALWAYS;
739         value = cpu_ZF;
740         goto no_invert;
741 
742     default:
743         fprintf(stderr, "Bad condition code 0x%x\n", cc);
744         abort();
745     }
746 
747     if (cc & 1) {
748         cond = tcg_invert_cond(cond);
749     }
750 
751  no_invert:
752     cmp->cond = cond;
753     cmp->value = value;
754     cmp->value_global = global;
755 }
756 
757 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
758 {
759     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
760 }
761 
762 void arm_gen_test_cc(int cc, TCGLabel *label)
763 {
764     DisasCompare cmp;
765     arm_test_cc(&cmp, cc);
766     arm_jump_cc(&cmp, label);
767 }
768 
769 void gen_set_condexec(DisasContext *s)
770 {
771     if (s->condexec_mask) {
772         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
773 
774         store_cpu_field_constant(val, condexec_bits);
775     }
776 }
777 
778 void gen_update_pc(DisasContext *s, target_long diff)
779 {
780     gen_pc_plus_diff(s, cpu_R[15], diff);
781     s->pc_save = s->pc_curr + diff;
782 }
783 
784 /* Set PC and Thumb state from var.  var is marked as dead.  */
785 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
786 {
787     s->base.is_jmp = DISAS_JUMP;
788     tcg_gen_andi_i32(cpu_R[15], var, ~1);
789     tcg_gen_andi_i32(var, var, 1);
790     store_cpu_field(var, thumb);
791     s->pc_save = -1;
792 }
793 
794 /*
795  * Set PC and Thumb state from var. var is marked as dead.
796  * For M-profile CPUs, include logic to detect exception-return
797  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
798  * and BX reg, and no others, and happens only for code in Handler mode.
799  * The Security Extension also requires us to check for the FNC_RETURN
800  * which signals a function return from non-secure state; this can happen
801  * in both Handler and Thread mode.
802  * To avoid having to do multiple comparisons in inline generated code,
803  * we make the check we do here loose, so it will match for EXC_RETURN
804  * in Thread mode. For system emulation do_v7m_exception_exit() checks
805  * for these spurious cases and returns without doing anything (giving
806  * the same behaviour as for a branch to a non-magic address).
807  *
808  * In linux-user mode it is unclear what the right behaviour for an
809  * attempted FNC_RETURN should be, because in real hardware this will go
810  * directly to Secure code (ie not the Linux kernel) which will then treat
811  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
812  * attempt behave the way it would on a CPU without the security extension,
813  * which is to say "like a normal branch". That means we can simply treat
814  * all branches as normal with no magic address behaviour.
815  */
816 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
817 {
818     /* Generate the same code here as for a simple bx, but flag via
819      * s->base.is_jmp that we need to do the rest of the work later.
820      */
821     gen_bx(s, var);
822 #ifndef CONFIG_USER_ONLY
823     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
824         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
825         s->base.is_jmp = DISAS_BX_EXCRET;
826     }
827 #endif
828 }
829 
830 static inline void gen_bx_excret_final_code(DisasContext *s)
831 {
832     /* Generate the code to finish possible exception return and end the TB */
833     DisasLabel excret_label = gen_disas_label(s);
834     uint32_t min_magic;
835 
836     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
837         /* Covers FNC_RETURN and EXC_RETURN magic */
838         min_magic = FNC_RETURN_MIN_MAGIC;
839     } else {
840         /* EXC_RETURN magic only */
841         min_magic = EXC_RETURN_MIN_MAGIC;
842     }
843 
844     /* Is the new PC value in the magic range indicating exception return? */
845     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
846     /* No: end the TB as we would for a DISAS_JMP */
847     if (s->ss_active) {
848         gen_singlestep_exception(s);
849     } else {
850         tcg_gen_exit_tb(NULL, 0);
851     }
852     set_disas_label(s, excret_label);
853     /* Yes: this is an exception return.
854      * At this point in runtime env->regs[15] and env->thumb will hold
855      * the exception-return magic number, which do_v7m_exception_exit()
856      * will read. Nothing else will be able to see those values because
857      * the cpu-exec main loop guarantees that we will always go straight
858      * from raising the exception to the exception-handling code.
859      *
860      * gen_ss_advance(s) does nothing on M profile currently but
861      * calling it is conceptually the right thing as we have executed
862      * this instruction (compare SWI, HVC, SMC handling).
863      */
864     gen_ss_advance(s);
865     gen_exception_internal(EXCP_EXCEPTION_EXIT);
866 }
867 
868 static inline void gen_bxns(DisasContext *s, int rm)
869 {
870     TCGv_i32 var = load_reg(s, rm);
871 
872     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
873      * we need to sync state before calling it, but:
874      *  - we don't need to do gen_update_pc() because the bxns helper will
875      *    always set the PC itself
876      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
877      *    unless it's outside an IT block or the last insn in an IT block,
878      *    so we know that condexec == 0 (already set at the top of the TB)
879      *    is correct in the non-UNPREDICTABLE cases, and we can choose
880      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
881      */
882     gen_helper_v7m_bxns(cpu_env, var);
883     tcg_temp_free_i32(var);
884     s->base.is_jmp = DISAS_EXIT;
885 }
886 
887 static inline void gen_blxns(DisasContext *s, int rm)
888 {
889     TCGv_i32 var = load_reg(s, rm);
890 
891     /* We don't need to sync condexec state, for the same reason as bxns.
892      * We do however need to set the PC, because the blxns helper reads it.
893      * The blxns helper may throw an exception.
894      */
895     gen_update_pc(s, curr_insn_len(s));
896     gen_helper_v7m_blxns(cpu_env, var);
897     tcg_temp_free_i32(var);
898     s->base.is_jmp = DISAS_EXIT;
899 }
900 
901 /* Variant of store_reg which uses branch&exchange logic when storing
902    to r15 in ARM architecture v7 and above. The source must be a temporary
903    and will be marked as dead. */
904 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
905 {
906     if (reg == 15 && ENABLE_ARCH_7) {
907         gen_bx(s, var);
908     } else {
909         store_reg(s, reg, var);
910     }
911 }
912 
913 /* Variant of store_reg which uses branch&exchange logic when storing
914  * to r15 in ARM architecture v5T and above. This is used for storing
915  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
916  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
917 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
918 {
919     if (reg == 15 && ENABLE_ARCH_5) {
920         gen_bx_excret(s, var);
921     } else {
922         store_reg(s, reg, var);
923     }
924 }
925 
926 #ifdef CONFIG_USER_ONLY
927 #define IS_USER_ONLY 1
928 #else
929 #define IS_USER_ONLY 0
930 #endif
931 
932 MemOp pow2_align(unsigned i)
933 {
934     static const MemOp mop_align[] = {
935         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
936         /*
937          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
938          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
939          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
940          */
941         MO_ALIGN_16
942     };
943     g_assert(i < ARRAY_SIZE(mop_align));
944     return mop_align[i];
945 }
946 
947 /*
948  * Abstractions of "generate code to do a guest load/store for
949  * AArch32", where a vaddr is always 32 bits (and is zero
950  * extended if we're a 64 bit core) and  data is also
951  * 32 bits unless specifically doing a 64 bit access.
952  * These functions work like tcg_gen_qemu_{ld,st}* except
953  * that the address argument is TCGv_i32 rather than TCGv.
954  */
955 
956 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
957 {
958     TCGv addr = tcg_temp_new();
959     tcg_gen_extu_i32_tl(addr, a32);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
963         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
964     }
965     return addr;
966 }
967 
968 /*
969  * Internal routines are used for NEON cases where the endianness
970  * and/or alignment has already been taken into account and manipulated.
971  */
972 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
973                               TCGv_i32 a32, int index, MemOp opc)
974 {
975     TCGv addr = gen_aa32_addr(s, a32, opc);
976     tcg_gen_qemu_ld_i32(val, addr, index, opc);
977     tcg_temp_free(addr);
978 }
979 
980 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
981                               TCGv_i32 a32, int index, MemOp opc)
982 {
983     TCGv addr = gen_aa32_addr(s, a32, opc);
984     tcg_gen_qemu_st_i32(val, addr, index, opc);
985     tcg_temp_free(addr);
986 }
987 
988 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
989                               TCGv_i32 a32, int index, MemOp opc)
990 {
991     TCGv addr = gen_aa32_addr(s, a32, opc);
992 
993     tcg_gen_qemu_ld_i64(val, addr, index, opc);
994 
995     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
996     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
997         tcg_gen_rotri_i64(val, val, 32);
998     }
999     tcg_temp_free(addr);
1000 }
1001 
1002 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
1003                               TCGv_i32 a32, int index, MemOp opc)
1004 {
1005     TCGv addr = gen_aa32_addr(s, a32, opc);
1006 
1007     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1008     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1009         TCGv_i64 tmp = tcg_temp_new_i64();
1010         tcg_gen_rotri_i64(tmp, val, 32);
1011         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1012         tcg_temp_free_i64(tmp);
1013     } else {
1014         tcg_gen_qemu_st_i64(val, addr, index, opc);
1015     }
1016     tcg_temp_free(addr);
1017 }
1018 
1019 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1020                      int index, MemOp opc)
1021 {
1022     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1023 }
1024 
1025 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1026                      int index, MemOp opc)
1027 {
1028     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1029 }
1030 
1031 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1032                      int index, MemOp opc)
1033 {
1034     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1035 }
1036 
1037 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1038                      int index, MemOp opc)
1039 {
1040     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1041 }
1042 
1043 #define DO_GEN_LD(SUFF, OPC)                                            \
1044     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1045                                          TCGv_i32 a32, int index)       \
1046     {                                                                   \
1047         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1048     }
1049 
1050 #define DO_GEN_ST(SUFF, OPC)                                            \
1051     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1052                                          TCGv_i32 a32, int index)       \
1053     {                                                                   \
1054         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1055     }
1056 
1057 static inline void gen_hvc(DisasContext *s, int imm16)
1058 {
1059     /* The pre HVC helper handles cases when HVC gets trapped
1060      * as an undefined insn by runtime configuration (ie before
1061      * the insn really executes).
1062      */
1063     gen_update_pc(s, 0);
1064     gen_helper_pre_hvc(cpu_env);
1065     /* Otherwise we will treat this as a real exception which
1066      * happens after execution of the insn. (The distinction matters
1067      * for the PC value reported to the exception handler and also
1068      * for single stepping.)
1069      */
1070     s->svc_imm = imm16;
1071     gen_update_pc(s, curr_insn_len(s));
1072     s->base.is_jmp = DISAS_HVC;
1073 }
1074 
1075 static inline void gen_smc(DisasContext *s)
1076 {
1077     /* As with HVC, we may take an exception either before or after
1078      * the insn executes.
1079      */
1080     gen_update_pc(s, 0);
1081     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1082     gen_update_pc(s, curr_insn_len(s));
1083     s->base.is_jmp = DISAS_SMC;
1084 }
1085 
1086 static void gen_exception_internal_insn(DisasContext *s, int excp)
1087 {
1088     gen_set_condexec(s);
1089     gen_update_pc(s, 0);
1090     gen_exception_internal(excp);
1091     s->base.is_jmp = DISAS_NORETURN;
1092 }
1093 
1094 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1095 {
1096     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1097                                           tcg_constant_i32(syndrome), tcg_el);
1098 }
1099 
1100 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1101 {
1102     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1103 }
1104 
1105 static void gen_exception(int excp, uint32_t syndrome)
1106 {
1107     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1108                                        tcg_constant_i32(syndrome));
1109 }
1110 
1111 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1112                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1113 {
1114     if (s->aarch64) {
1115         gen_a64_update_pc(s, pc_diff);
1116     } else {
1117         gen_set_condexec(s);
1118         gen_update_pc(s, pc_diff);
1119     }
1120     gen_exception_el_v(excp, syn, tcg_el);
1121     s->base.is_jmp = DISAS_NORETURN;
1122 }
1123 
1124 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1125                            uint32_t syn, uint32_t target_el)
1126 {
1127     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1128                             tcg_constant_i32(target_el));
1129 }
1130 
1131 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1132                         int excp, uint32_t syn)
1133 {
1134     if (s->aarch64) {
1135         gen_a64_update_pc(s, pc_diff);
1136     } else {
1137         gen_set_condexec(s);
1138         gen_update_pc(s, pc_diff);
1139     }
1140     gen_exception(excp, syn);
1141     s->base.is_jmp = DISAS_NORETURN;
1142 }
1143 
1144 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1145 {
1146     gen_set_condexec(s);
1147     gen_update_pc(s, 0);
1148     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1149     s->base.is_jmp = DISAS_NORETURN;
1150 }
1151 
1152 void unallocated_encoding(DisasContext *s)
1153 {
1154     /* Unallocated and reserved encodings are uncategorized */
1155     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1156 }
1157 
1158 /* Force a TB lookup after an instruction that changes the CPU state.  */
1159 void gen_lookup_tb(DisasContext *s)
1160 {
1161     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1162     s->base.is_jmp = DISAS_EXIT;
1163 }
1164 
1165 static inline void gen_hlt(DisasContext *s, int imm)
1166 {
1167     /* HLT. This has two purposes.
1168      * Architecturally, it is an external halting debug instruction.
1169      * Since QEMU doesn't implement external debug, we treat this as
1170      * it is required for halting debug disabled: it will UNDEF.
1171      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1172      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1173      * must trigger semihosting even for ARMv7 and earlier, where
1174      * HLT was an undefined encoding.
1175      * In system mode, we don't allow userspace access to
1176      * semihosting, to provide some semblance of security
1177      * (and for consistency with our 32-bit semihosting).
1178      */
1179     if (semihosting_enabled(s->current_el == 0) &&
1180         (imm == (s->thumb ? 0x3c : 0xf000))) {
1181         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1182         return;
1183     }
1184 
1185     unallocated_encoding(s);
1186 }
1187 
1188 /*
1189  * Return the offset of a "full" NEON Dreg.
1190  */
1191 long neon_full_reg_offset(unsigned reg)
1192 {
1193     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1194 }
1195 
1196 /*
1197  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1198  * where 0 is the least significant end of the register.
1199  */
1200 long neon_element_offset(int reg, int element, MemOp memop)
1201 {
1202     int element_size = 1 << (memop & MO_SIZE);
1203     int ofs = element * element_size;
1204 #if HOST_BIG_ENDIAN
1205     /*
1206      * Calculate the offset assuming fully little-endian,
1207      * then XOR to account for the order of the 8-byte units.
1208      */
1209     if (element_size < 8) {
1210         ofs ^= 8 - element_size;
1211     }
1212 #endif
1213     return neon_full_reg_offset(reg) + ofs;
1214 }
1215 
1216 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1217 long vfp_reg_offset(bool dp, unsigned reg)
1218 {
1219     if (dp) {
1220         return neon_element_offset(reg, 0, MO_64);
1221     } else {
1222         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1223     }
1224 }
1225 
1226 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1227 {
1228     long off = neon_element_offset(reg, ele, memop);
1229 
1230     switch (memop) {
1231     case MO_SB:
1232         tcg_gen_ld8s_i32(dest, cpu_env, off);
1233         break;
1234     case MO_UB:
1235         tcg_gen_ld8u_i32(dest, cpu_env, off);
1236         break;
1237     case MO_SW:
1238         tcg_gen_ld16s_i32(dest, cpu_env, off);
1239         break;
1240     case MO_UW:
1241         tcg_gen_ld16u_i32(dest, cpu_env, off);
1242         break;
1243     case MO_UL:
1244     case MO_SL:
1245         tcg_gen_ld_i32(dest, cpu_env, off);
1246         break;
1247     default:
1248         g_assert_not_reached();
1249     }
1250 }
1251 
1252 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1253 {
1254     long off = neon_element_offset(reg, ele, memop);
1255 
1256     switch (memop) {
1257     case MO_SL:
1258         tcg_gen_ld32s_i64(dest, cpu_env, off);
1259         break;
1260     case MO_UL:
1261         tcg_gen_ld32u_i64(dest, cpu_env, off);
1262         break;
1263     case MO_UQ:
1264         tcg_gen_ld_i64(dest, cpu_env, off);
1265         break;
1266     default:
1267         g_assert_not_reached();
1268     }
1269 }
1270 
1271 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1272 {
1273     long off = neon_element_offset(reg, ele, memop);
1274 
1275     switch (memop) {
1276     case MO_8:
1277         tcg_gen_st8_i32(src, cpu_env, off);
1278         break;
1279     case MO_16:
1280         tcg_gen_st16_i32(src, cpu_env, off);
1281         break;
1282     case MO_32:
1283         tcg_gen_st_i32(src, cpu_env, off);
1284         break;
1285     default:
1286         g_assert_not_reached();
1287     }
1288 }
1289 
1290 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1291 {
1292     long off = neon_element_offset(reg, ele, memop);
1293 
1294     switch (memop) {
1295     case MO_32:
1296         tcg_gen_st32_i64(src, cpu_env, off);
1297         break;
1298     case MO_64:
1299         tcg_gen_st_i64(src, cpu_env, off);
1300         break;
1301     default:
1302         g_assert_not_reached();
1303     }
1304 }
1305 
1306 #define ARM_CP_RW_BIT   (1 << 20)
1307 
1308 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1309 {
1310     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1311 }
1312 
1313 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1314 {
1315     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1316 }
1317 
1318 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1319 {
1320     TCGv_i32 var = tcg_temp_new_i32();
1321     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1322     return var;
1323 }
1324 
1325 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1326 {
1327     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1328     tcg_temp_free_i32(var);
1329 }
1330 
1331 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1332 {
1333     iwmmxt_store_reg(cpu_M0, rn);
1334 }
1335 
1336 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1337 {
1338     iwmmxt_load_reg(cpu_M0, rn);
1339 }
1340 
1341 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1342 {
1343     iwmmxt_load_reg(cpu_V1, rn);
1344     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1345 }
1346 
1347 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1348 {
1349     iwmmxt_load_reg(cpu_V1, rn);
1350     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1351 }
1352 
1353 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358 
1359 #define IWMMXT_OP(name) \
1360 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1361 { \
1362     iwmmxt_load_reg(cpu_V1, rn); \
1363     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1364 }
1365 
1366 #define IWMMXT_OP_ENV(name) \
1367 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1368 { \
1369     iwmmxt_load_reg(cpu_V1, rn); \
1370     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1371 }
1372 
1373 #define IWMMXT_OP_ENV_SIZE(name) \
1374 IWMMXT_OP_ENV(name##b) \
1375 IWMMXT_OP_ENV(name##w) \
1376 IWMMXT_OP_ENV(name##l)
1377 
1378 #define IWMMXT_OP_ENV1(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0(void) \
1380 { \
1381     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1382 }
1383 
1384 IWMMXT_OP(maddsq)
1385 IWMMXT_OP(madduq)
1386 IWMMXT_OP(sadb)
1387 IWMMXT_OP(sadw)
1388 IWMMXT_OP(mulslw)
1389 IWMMXT_OP(mulshw)
1390 IWMMXT_OP(mululw)
1391 IWMMXT_OP(muluhw)
1392 IWMMXT_OP(macsw)
1393 IWMMXT_OP(macuw)
1394 
1395 IWMMXT_OP_ENV_SIZE(unpackl)
1396 IWMMXT_OP_ENV_SIZE(unpackh)
1397 
1398 IWMMXT_OP_ENV1(unpacklub)
1399 IWMMXT_OP_ENV1(unpackluw)
1400 IWMMXT_OP_ENV1(unpacklul)
1401 IWMMXT_OP_ENV1(unpackhub)
1402 IWMMXT_OP_ENV1(unpackhuw)
1403 IWMMXT_OP_ENV1(unpackhul)
1404 IWMMXT_OP_ENV1(unpacklsb)
1405 IWMMXT_OP_ENV1(unpacklsw)
1406 IWMMXT_OP_ENV1(unpacklsl)
1407 IWMMXT_OP_ENV1(unpackhsb)
1408 IWMMXT_OP_ENV1(unpackhsw)
1409 IWMMXT_OP_ENV1(unpackhsl)
1410 
1411 IWMMXT_OP_ENV_SIZE(cmpeq)
1412 IWMMXT_OP_ENV_SIZE(cmpgtu)
1413 IWMMXT_OP_ENV_SIZE(cmpgts)
1414 
1415 IWMMXT_OP_ENV_SIZE(mins)
1416 IWMMXT_OP_ENV_SIZE(minu)
1417 IWMMXT_OP_ENV_SIZE(maxs)
1418 IWMMXT_OP_ENV_SIZE(maxu)
1419 
1420 IWMMXT_OP_ENV_SIZE(subn)
1421 IWMMXT_OP_ENV_SIZE(addn)
1422 IWMMXT_OP_ENV_SIZE(subu)
1423 IWMMXT_OP_ENV_SIZE(addu)
1424 IWMMXT_OP_ENV_SIZE(subs)
1425 IWMMXT_OP_ENV_SIZE(adds)
1426 
1427 IWMMXT_OP_ENV(avgb0)
1428 IWMMXT_OP_ENV(avgb1)
1429 IWMMXT_OP_ENV(avgw0)
1430 IWMMXT_OP_ENV(avgw1)
1431 
1432 IWMMXT_OP_ENV(packuw)
1433 IWMMXT_OP_ENV(packul)
1434 IWMMXT_OP_ENV(packuq)
1435 IWMMXT_OP_ENV(packsw)
1436 IWMMXT_OP_ENV(packsl)
1437 IWMMXT_OP_ENV(packsq)
1438 
1439 static void gen_op_iwmmxt_set_mup(void)
1440 {
1441     TCGv_i32 tmp;
1442     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1443     tcg_gen_ori_i32(tmp, tmp, 2);
1444     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1445 }
1446 
1447 static void gen_op_iwmmxt_set_cup(void)
1448 {
1449     TCGv_i32 tmp;
1450     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1451     tcg_gen_ori_i32(tmp, tmp, 1);
1452     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1453 }
1454 
1455 static void gen_op_iwmmxt_setpsr_nz(void)
1456 {
1457     TCGv_i32 tmp = tcg_temp_new_i32();
1458     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1459     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1460 }
1461 
1462 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1463 {
1464     iwmmxt_load_reg(cpu_V1, rn);
1465     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1466     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1467 }
1468 
1469 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1470                                      TCGv_i32 dest)
1471 {
1472     int rd;
1473     uint32_t offset;
1474     TCGv_i32 tmp;
1475 
1476     rd = (insn >> 16) & 0xf;
1477     tmp = load_reg(s, rd);
1478 
1479     offset = (insn & 0xff) << ((insn >> 7) & 2);
1480     if (insn & (1 << 24)) {
1481         /* Pre indexed */
1482         if (insn & (1 << 23))
1483             tcg_gen_addi_i32(tmp, tmp, offset);
1484         else
1485             tcg_gen_addi_i32(tmp, tmp, -offset);
1486         tcg_gen_mov_i32(dest, tmp);
1487         if (insn & (1 << 21))
1488             store_reg(s, rd, tmp);
1489         else
1490             tcg_temp_free_i32(tmp);
1491     } else if (insn & (1 << 21)) {
1492         /* Post indexed */
1493         tcg_gen_mov_i32(dest, tmp);
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         store_reg(s, rd, tmp);
1499     } else if (!(insn & (1 << 23)))
1500         return 1;
1501     return 0;
1502 }
1503 
1504 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1505 {
1506     int rd = (insn >> 0) & 0xf;
1507     TCGv_i32 tmp;
1508 
1509     if (insn & (1 << 8)) {
1510         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1511             return 1;
1512         } else {
1513             tmp = iwmmxt_load_creg(rd);
1514         }
1515     } else {
1516         tmp = tcg_temp_new_i32();
1517         iwmmxt_load_reg(cpu_V0, rd);
1518         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1519     }
1520     tcg_gen_andi_i32(tmp, tmp, mask);
1521     tcg_gen_mov_i32(dest, tmp);
1522     tcg_temp_free_i32(tmp);
1523     return 0;
1524 }
1525 
1526 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1527    (ie. an undefined instruction).  */
1528 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1529 {
1530     int rd, wrd;
1531     int rdhi, rdlo, rd0, rd1, i;
1532     TCGv_i32 addr;
1533     TCGv_i32 tmp, tmp2, tmp3;
1534 
1535     if ((insn & 0x0e000e00) == 0x0c000000) {
1536         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1537             wrd = insn & 0xf;
1538             rdlo = (insn >> 12) & 0xf;
1539             rdhi = (insn >> 16) & 0xf;
1540             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1541                 iwmmxt_load_reg(cpu_V0, wrd);
1542                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1543                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1544             } else {                                    /* TMCRR */
1545                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1546                 iwmmxt_store_reg(cpu_V0, wrd);
1547                 gen_op_iwmmxt_set_mup();
1548             }
1549             return 0;
1550         }
1551 
1552         wrd = (insn >> 12) & 0xf;
1553         addr = tcg_temp_new_i32();
1554         if (gen_iwmmxt_address(s, insn, addr)) {
1555             tcg_temp_free_i32(addr);
1556             return 1;
1557         }
1558         if (insn & ARM_CP_RW_BIT) {
1559             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1560                 tmp = tcg_temp_new_i32();
1561                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1562                 iwmmxt_store_creg(wrd, tmp);
1563             } else {
1564                 i = 1;
1565                 if (insn & (1 << 8)) {
1566                     if (insn & (1 << 22)) {             /* WLDRD */
1567                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1568                         i = 0;
1569                     } else {                            /* WLDRW wRd */
1570                         tmp = tcg_temp_new_i32();
1571                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1572                     }
1573                 } else {
1574                     tmp = tcg_temp_new_i32();
1575                     if (insn & (1 << 22)) {             /* WLDRH */
1576                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1577                     } else {                            /* WLDRB */
1578                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1579                     }
1580                 }
1581                 if (i) {
1582                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1583                     tcg_temp_free_i32(tmp);
1584                 }
1585                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1586             }
1587         } else {
1588             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1589                 tmp = iwmmxt_load_creg(wrd);
1590                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1591             } else {
1592                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1593                 tmp = tcg_temp_new_i32();
1594                 if (insn & (1 << 8)) {
1595                     if (insn & (1 << 22)) {             /* WSTRD */
1596                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1597                     } else {                            /* WSTRW wRd */
1598                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1599                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1600                     }
1601                 } else {
1602                     if (insn & (1 << 22)) {             /* WSTRH */
1603                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1604                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1605                     } else {                            /* WSTRB */
1606                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1607                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1608                     }
1609                 }
1610             }
1611             tcg_temp_free_i32(tmp);
1612         }
1613         tcg_temp_free_i32(addr);
1614         return 0;
1615     }
1616 
1617     if ((insn & 0x0f000000) != 0x0e000000)
1618         return 1;
1619 
1620     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1621     case 0x000:                                                 /* WOR */
1622         wrd = (insn >> 12) & 0xf;
1623         rd0 = (insn >> 0) & 0xf;
1624         rd1 = (insn >> 16) & 0xf;
1625         gen_op_iwmmxt_movq_M0_wRn(rd0);
1626         gen_op_iwmmxt_orq_M0_wRn(rd1);
1627         gen_op_iwmmxt_setpsr_nz();
1628         gen_op_iwmmxt_movq_wRn_M0(wrd);
1629         gen_op_iwmmxt_set_mup();
1630         gen_op_iwmmxt_set_cup();
1631         break;
1632     case 0x011:                                                 /* TMCR */
1633         if (insn & 0xf)
1634             return 1;
1635         rd = (insn >> 12) & 0xf;
1636         wrd = (insn >> 16) & 0xf;
1637         switch (wrd) {
1638         case ARM_IWMMXT_wCID:
1639         case ARM_IWMMXT_wCASF:
1640             break;
1641         case ARM_IWMMXT_wCon:
1642             gen_op_iwmmxt_set_cup();
1643             /* Fall through.  */
1644         case ARM_IWMMXT_wCSSF:
1645             tmp = iwmmxt_load_creg(wrd);
1646             tmp2 = load_reg(s, rd);
1647             tcg_gen_andc_i32(tmp, tmp, tmp2);
1648             tcg_temp_free_i32(tmp2);
1649             iwmmxt_store_creg(wrd, tmp);
1650             break;
1651         case ARM_IWMMXT_wCGR0:
1652         case ARM_IWMMXT_wCGR1:
1653         case ARM_IWMMXT_wCGR2:
1654         case ARM_IWMMXT_wCGR3:
1655             gen_op_iwmmxt_set_cup();
1656             tmp = load_reg(s, rd);
1657             iwmmxt_store_creg(wrd, tmp);
1658             break;
1659         default:
1660             return 1;
1661         }
1662         break;
1663     case 0x100:                                                 /* WXOR */
1664         wrd = (insn >> 12) & 0xf;
1665         rd0 = (insn >> 0) & 0xf;
1666         rd1 = (insn >> 16) & 0xf;
1667         gen_op_iwmmxt_movq_M0_wRn(rd0);
1668         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1669         gen_op_iwmmxt_setpsr_nz();
1670         gen_op_iwmmxt_movq_wRn_M0(wrd);
1671         gen_op_iwmmxt_set_mup();
1672         gen_op_iwmmxt_set_cup();
1673         break;
1674     case 0x111:                                                 /* TMRC */
1675         if (insn & 0xf)
1676             return 1;
1677         rd = (insn >> 12) & 0xf;
1678         wrd = (insn >> 16) & 0xf;
1679         tmp = iwmmxt_load_creg(wrd);
1680         store_reg(s, rd, tmp);
1681         break;
1682     case 0x300:                                                 /* WANDN */
1683         wrd = (insn >> 12) & 0xf;
1684         rd0 = (insn >> 0) & 0xf;
1685         rd1 = (insn >> 16) & 0xf;
1686         gen_op_iwmmxt_movq_M0_wRn(rd0);
1687         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1688         gen_op_iwmmxt_andq_M0_wRn(rd1);
1689         gen_op_iwmmxt_setpsr_nz();
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x200:                                                 /* WAND */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         gen_op_iwmmxt_andq_M0_wRn(rd1);
1700         gen_op_iwmmxt_setpsr_nz();
1701         gen_op_iwmmxt_movq_wRn_M0(wrd);
1702         gen_op_iwmmxt_set_mup();
1703         gen_op_iwmmxt_set_cup();
1704         break;
1705     case 0x810: case 0xa10:                             /* WMADD */
1706         wrd = (insn >> 12) & 0xf;
1707         rd0 = (insn >> 0) & 0xf;
1708         rd1 = (insn >> 16) & 0xf;
1709         gen_op_iwmmxt_movq_M0_wRn(rd0);
1710         if (insn & (1 << 21))
1711             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1712         else
1713             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1714         gen_op_iwmmxt_movq_wRn_M0(wrd);
1715         gen_op_iwmmxt_set_mup();
1716         break;
1717     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 16) & 0xf;
1720         rd1 = (insn >> 0) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         switch ((insn >> 22) & 3) {
1723         case 0:
1724             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1725             break;
1726         case 1:
1727             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1728             break;
1729         case 2:
1730             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1731             break;
1732         case 3:
1733             return 1;
1734         }
1735         gen_op_iwmmxt_movq_wRn_M0(wrd);
1736         gen_op_iwmmxt_set_mup();
1737         gen_op_iwmmxt_set_cup();
1738         break;
1739     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1740         wrd = (insn >> 12) & 0xf;
1741         rd0 = (insn >> 16) & 0xf;
1742         rd1 = (insn >> 0) & 0xf;
1743         gen_op_iwmmxt_movq_M0_wRn(rd0);
1744         switch ((insn >> 22) & 3) {
1745         case 0:
1746             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1747             break;
1748         case 1:
1749             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1750             break;
1751         case 2:
1752             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1753             break;
1754         case 3:
1755             return 1;
1756         }
1757         gen_op_iwmmxt_movq_wRn_M0(wrd);
1758         gen_op_iwmmxt_set_mup();
1759         gen_op_iwmmxt_set_cup();
1760         break;
1761     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1762         wrd = (insn >> 12) & 0xf;
1763         rd0 = (insn >> 16) & 0xf;
1764         rd1 = (insn >> 0) & 0xf;
1765         gen_op_iwmmxt_movq_M0_wRn(rd0);
1766         if (insn & (1 << 22))
1767             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1768         else
1769             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1770         if (!(insn & (1 << 20)))
1771             gen_op_iwmmxt_addl_M0_wRn(wrd);
1772         gen_op_iwmmxt_movq_wRn_M0(wrd);
1773         gen_op_iwmmxt_set_mup();
1774         break;
1775     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1776         wrd = (insn >> 12) & 0xf;
1777         rd0 = (insn >> 16) & 0xf;
1778         rd1 = (insn >> 0) & 0xf;
1779         gen_op_iwmmxt_movq_M0_wRn(rd0);
1780         if (insn & (1 << 21)) {
1781             if (insn & (1 << 20))
1782                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1783             else
1784                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1785         } else {
1786             if (insn & (1 << 20))
1787                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1788             else
1789                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1790         }
1791         gen_op_iwmmxt_movq_wRn_M0(wrd);
1792         gen_op_iwmmxt_set_mup();
1793         break;
1794     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1795         wrd = (insn >> 12) & 0xf;
1796         rd0 = (insn >> 16) & 0xf;
1797         rd1 = (insn >> 0) & 0xf;
1798         gen_op_iwmmxt_movq_M0_wRn(rd0);
1799         if (insn & (1 << 21))
1800             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1801         else
1802             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1803         if (!(insn & (1 << 20))) {
1804             iwmmxt_load_reg(cpu_V1, wrd);
1805             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1806         }
1807         gen_op_iwmmxt_movq_wRn_M0(wrd);
1808         gen_op_iwmmxt_set_mup();
1809         break;
1810     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1811         wrd = (insn >> 12) & 0xf;
1812         rd0 = (insn >> 16) & 0xf;
1813         rd1 = (insn >> 0) & 0xf;
1814         gen_op_iwmmxt_movq_M0_wRn(rd0);
1815         switch ((insn >> 22) & 3) {
1816         case 0:
1817             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1818             break;
1819         case 1:
1820             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1821             break;
1822         case 2:
1823             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1824             break;
1825         case 3:
1826             return 1;
1827         }
1828         gen_op_iwmmxt_movq_wRn_M0(wrd);
1829         gen_op_iwmmxt_set_mup();
1830         gen_op_iwmmxt_set_cup();
1831         break;
1832     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1833         wrd = (insn >> 12) & 0xf;
1834         rd0 = (insn >> 16) & 0xf;
1835         rd1 = (insn >> 0) & 0xf;
1836         gen_op_iwmmxt_movq_M0_wRn(rd0);
1837         if (insn & (1 << 22)) {
1838             if (insn & (1 << 20))
1839                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1840             else
1841                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1842         } else {
1843             if (insn & (1 << 20))
1844                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1845             else
1846                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1847         }
1848         gen_op_iwmmxt_movq_wRn_M0(wrd);
1849         gen_op_iwmmxt_set_mup();
1850         gen_op_iwmmxt_set_cup();
1851         break;
1852     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1853         wrd = (insn >> 12) & 0xf;
1854         rd0 = (insn >> 16) & 0xf;
1855         rd1 = (insn >> 0) & 0xf;
1856         gen_op_iwmmxt_movq_M0_wRn(rd0);
1857         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1858         tcg_gen_andi_i32(tmp, tmp, 7);
1859         iwmmxt_load_reg(cpu_V1, rd1);
1860         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1861         tcg_temp_free_i32(tmp);
1862         gen_op_iwmmxt_movq_wRn_M0(wrd);
1863         gen_op_iwmmxt_set_mup();
1864         break;
1865     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1866         if (((insn >> 6) & 3) == 3)
1867             return 1;
1868         rd = (insn >> 12) & 0xf;
1869         wrd = (insn >> 16) & 0xf;
1870         tmp = load_reg(s, rd);
1871         gen_op_iwmmxt_movq_M0_wRn(wrd);
1872         switch ((insn >> 6) & 3) {
1873         case 0:
1874             tmp2 = tcg_constant_i32(0xff);
1875             tmp3 = tcg_constant_i32((insn & 7) << 3);
1876             break;
1877         case 1:
1878             tmp2 = tcg_constant_i32(0xffff);
1879             tmp3 = tcg_constant_i32((insn & 3) << 4);
1880             break;
1881         case 2:
1882             tmp2 = tcg_constant_i32(0xffffffff);
1883             tmp3 = tcg_constant_i32((insn & 1) << 5);
1884             break;
1885         default:
1886             g_assert_not_reached();
1887         }
1888         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1889         tcg_temp_free_i32(tmp);
1890         gen_op_iwmmxt_movq_wRn_M0(wrd);
1891         gen_op_iwmmxt_set_mup();
1892         break;
1893     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1894         rd = (insn >> 12) & 0xf;
1895         wrd = (insn >> 16) & 0xf;
1896         if (rd == 15 || ((insn >> 22) & 3) == 3)
1897             return 1;
1898         gen_op_iwmmxt_movq_M0_wRn(wrd);
1899         tmp = tcg_temp_new_i32();
1900         switch ((insn >> 22) & 3) {
1901         case 0:
1902             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1903             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1904             if (insn & 8) {
1905                 tcg_gen_ext8s_i32(tmp, tmp);
1906             } else {
1907                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1908             }
1909             break;
1910         case 1:
1911             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1912             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1913             if (insn & 8) {
1914                 tcg_gen_ext16s_i32(tmp, tmp);
1915             } else {
1916                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1917             }
1918             break;
1919         case 2:
1920             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1921             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1922             break;
1923         }
1924         store_reg(s, rd, tmp);
1925         break;
1926     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1927         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1928             return 1;
1929         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1930         switch ((insn >> 22) & 3) {
1931         case 0:
1932             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1933             break;
1934         case 1:
1935             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1936             break;
1937         case 2:
1938             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1939             break;
1940         }
1941         tcg_gen_shli_i32(tmp, tmp, 28);
1942         gen_set_nzcv(tmp);
1943         tcg_temp_free_i32(tmp);
1944         break;
1945     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1946         if (((insn >> 6) & 3) == 3)
1947             return 1;
1948         rd = (insn >> 12) & 0xf;
1949         wrd = (insn >> 16) & 0xf;
1950         tmp = load_reg(s, rd);
1951         switch ((insn >> 6) & 3) {
1952         case 0:
1953             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1954             break;
1955         case 1:
1956             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1957             break;
1958         case 2:
1959             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1960             break;
1961         }
1962         tcg_temp_free_i32(tmp);
1963         gen_op_iwmmxt_movq_wRn_M0(wrd);
1964         gen_op_iwmmxt_set_mup();
1965         break;
1966     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1967         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1968             return 1;
1969         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1970         tmp2 = tcg_temp_new_i32();
1971         tcg_gen_mov_i32(tmp2, tmp);
1972         switch ((insn >> 22) & 3) {
1973         case 0:
1974             for (i = 0; i < 7; i ++) {
1975                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1976                 tcg_gen_and_i32(tmp, tmp, tmp2);
1977             }
1978             break;
1979         case 1:
1980             for (i = 0; i < 3; i ++) {
1981                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1982                 tcg_gen_and_i32(tmp, tmp, tmp2);
1983             }
1984             break;
1985         case 2:
1986             tcg_gen_shli_i32(tmp2, tmp2, 16);
1987             tcg_gen_and_i32(tmp, tmp, tmp2);
1988             break;
1989         }
1990         gen_set_nzcv(tmp);
1991         tcg_temp_free_i32(tmp2);
1992         tcg_temp_free_i32(tmp);
1993         break;
1994     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1995         wrd = (insn >> 12) & 0xf;
1996         rd0 = (insn >> 16) & 0xf;
1997         gen_op_iwmmxt_movq_M0_wRn(rd0);
1998         switch ((insn >> 22) & 3) {
1999         case 0:
2000             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2001             break;
2002         case 1:
2003             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2004             break;
2005         case 2:
2006             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2007             break;
2008         case 3:
2009             return 1;
2010         }
2011         gen_op_iwmmxt_movq_wRn_M0(wrd);
2012         gen_op_iwmmxt_set_mup();
2013         break;
2014     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2015         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2016             return 1;
2017         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2018         tmp2 = tcg_temp_new_i32();
2019         tcg_gen_mov_i32(tmp2, tmp);
2020         switch ((insn >> 22) & 3) {
2021         case 0:
2022             for (i = 0; i < 7; i ++) {
2023                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2024                 tcg_gen_or_i32(tmp, tmp, tmp2);
2025             }
2026             break;
2027         case 1:
2028             for (i = 0; i < 3; i ++) {
2029                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2030                 tcg_gen_or_i32(tmp, tmp, tmp2);
2031             }
2032             break;
2033         case 2:
2034             tcg_gen_shli_i32(tmp2, tmp2, 16);
2035             tcg_gen_or_i32(tmp, tmp, tmp2);
2036             break;
2037         }
2038         gen_set_nzcv(tmp);
2039         tcg_temp_free_i32(tmp2);
2040         tcg_temp_free_i32(tmp);
2041         break;
2042     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2043         rd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2046             return 1;
2047         gen_op_iwmmxt_movq_M0_wRn(rd0);
2048         tmp = tcg_temp_new_i32();
2049         switch ((insn >> 22) & 3) {
2050         case 0:
2051             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2052             break;
2053         case 1:
2054             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2055             break;
2056         case 2:
2057             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2058             break;
2059         }
2060         store_reg(s, rd, tmp);
2061         break;
2062     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2063     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2064         wrd = (insn >> 12) & 0xf;
2065         rd0 = (insn >> 16) & 0xf;
2066         rd1 = (insn >> 0) & 0xf;
2067         gen_op_iwmmxt_movq_M0_wRn(rd0);
2068         switch ((insn >> 22) & 3) {
2069         case 0:
2070             if (insn & (1 << 21))
2071                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2072             else
2073                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2074             break;
2075         case 1:
2076             if (insn & (1 << 21))
2077                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2078             else
2079                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2080             break;
2081         case 2:
2082             if (insn & (1 << 21))
2083                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2084             else
2085                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2086             break;
2087         case 3:
2088             return 1;
2089         }
2090         gen_op_iwmmxt_movq_wRn_M0(wrd);
2091         gen_op_iwmmxt_set_mup();
2092         gen_op_iwmmxt_set_cup();
2093         break;
2094     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2095     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2096         wrd = (insn >> 12) & 0xf;
2097         rd0 = (insn >> 16) & 0xf;
2098         gen_op_iwmmxt_movq_M0_wRn(rd0);
2099         switch ((insn >> 22) & 3) {
2100         case 0:
2101             if (insn & (1 << 21))
2102                 gen_op_iwmmxt_unpacklsb_M0();
2103             else
2104                 gen_op_iwmmxt_unpacklub_M0();
2105             break;
2106         case 1:
2107             if (insn & (1 << 21))
2108                 gen_op_iwmmxt_unpacklsw_M0();
2109             else
2110                 gen_op_iwmmxt_unpackluw_M0();
2111             break;
2112         case 2:
2113             if (insn & (1 << 21))
2114                 gen_op_iwmmxt_unpacklsl_M0();
2115             else
2116                 gen_op_iwmmxt_unpacklul_M0();
2117             break;
2118         case 3:
2119             return 1;
2120         }
2121         gen_op_iwmmxt_movq_wRn_M0(wrd);
2122         gen_op_iwmmxt_set_mup();
2123         gen_op_iwmmxt_set_cup();
2124         break;
2125     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2126     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2127         wrd = (insn >> 12) & 0xf;
2128         rd0 = (insn >> 16) & 0xf;
2129         gen_op_iwmmxt_movq_M0_wRn(rd0);
2130         switch ((insn >> 22) & 3) {
2131         case 0:
2132             if (insn & (1 << 21))
2133                 gen_op_iwmmxt_unpackhsb_M0();
2134             else
2135                 gen_op_iwmmxt_unpackhub_M0();
2136             break;
2137         case 1:
2138             if (insn & (1 << 21))
2139                 gen_op_iwmmxt_unpackhsw_M0();
2140             else
2141                 gen_op_iwmmxt_unpackhuw_M0();
2142             break;
2143         case 2:
2144             if (insn & (1 << 21))
2145                 gen_op_iwmmxt_unpackhsl_M0();
2146             else
2147                 gen_op_iwmmxt_unpackhul_M0();
2148             break;
2149         case 3:
2150             return 1;
2151         }
2152         gen_op_iwmmxt_movq_wRn_M0(wrd);
2153         gen_op_iwmmxt_set_mup();
2154         gen_op_iwmmxt_set_cup();
2155         break;
2156     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2157     case 0x214: case 0x614: case 0xa14: case 0xe14:
2158         if (((insn >> 22) & 3) == 0)
2159             return 1;
2160         wrd = (insn >> 12) & 0xf;
2161         rd0 = (insn >> 16) & 0xf;
2162         gen_op_iwmmxt_movq_M0_wRn(rd0);
2163         tmp = tcg_temp_new_i32();
2164         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2165             tcg_temp_free_i32(tmp);
2166             return 1;
2167         }
2168         switch ((insn >> 22) & 3) {
2169         case 1:
2170             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2171             break;
2172         case 2:
2173             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2174             break;
2175         case 3:
2176             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2177             break;
2178         }
2179         tcg_temp_free_i32(tmp);
2180         gen_op_iwmmxt_movq_wRn_M0(wrd);
2181         gen_op_iwmmxt_set_mup();
2182         gen_op_iwmmxt_set_cup();
2183         break;
2184     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2185     case 0x014: case 0x414: case 0x814: case 0xc14:
2186         if (((insn >> 22) & 3) == 0)
2187             return 1;
2188         wrd = (insn >> 12) & 0xf;
2189         rd0 = (insn >> 16) & 0xf;
2190         gen_op_iwmmxt_movq_M0_wRn(rd0);
2191         tmp = tcg_temp_new_i32();
2192         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2193             tcg_temp_free_i32(tmp);
2194             return 1;
2195         }
2196         switch ((insn >> 22) & 3) {
2197         case 1:
2198             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2199             break;
2200         case 2:
2201             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2202             break;
2203         case 3:
2204             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2205             break;
2206         }
2207         tcg_temp_free_i32(tmp);
2208         gen_op_iwmmxt_movq_wRn_M0(wrd);
2209         gen_op_iwmmxt_set_mup();
2210         gen_op_iwmmxt_set_cup();
2211         break;
2212     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2213     case 0x114: case 0x514: case 0x914: case 0xd14:
2214         if (((insn >> 22) & 3) == 0)
2215             return 1;
2216         wrd = (insn >> 12) & 0xf;
2217         rd0 = (insn >> 16) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         tmp = tcg_temp_new_i32();
2220         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2221             tcg_temp_free_i32(tmp);
2222             return 1;
2223         }
2224         switch ((insn >> 22) & 3) {
2225         case 1:
2226             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2227             break;
2228         case 2:
2229             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2230             break;
2231         case 3:
2232             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2233             break;
2234         }
2235         tcg_temp_free_i32(tmp);
2236         gen_op_iwmmxt_movq_wRn_M0(wrd);
2237         gen_op_iwmmxt_set_mup();
2238         gen_op_iwmmxt_set_cup();
2239         break;
2240     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2241     case 0x314: case 0x714: case 0xb14: case 0xf14:
2242         if (((insn >> 22) & 3) == 0)
2243             return 1;
2244         wrd = (insn >> 12) & 0xf;
2245         rd0 = (insn >> 16) & 0xf;
2246         gen_op_iwmmxt_movq_M0_wRn(rd0);
2247         tmp = tcg_temp_new_i32();
2248         switch ((insn >> 22) & 3) {
2249         case 1:
2250             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2251                 tcg_temp_free_i32(tmp);
2252                 return 1;
2253             }
2254             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2255             break;
2256         case 2:
2257             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2258                 tcg_temp_free_i32(tmp);
2259                 return 1;
2260             }
2261             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2262             break;
2263         case 3:
2264             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2265                 tcg_temp_free_i32(tmp);
2266                 return 1;
2267             }
2268             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2269             break;
2270         }
2271         tcg_temp_free_i32(tmp);
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         gen_op_iwmmxt_set_cup();
2275         break;
2276     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2277     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2278         wrd = (insn >> 12) & 0xf;
2279         rd0 = (insn >> 16) & 0xf;
2280         rd1 = (insn >> 0) & 0xf;
2281         gen_op_iwmmxt_movq_M0_wRn(rd0);
2282         switch ((insn >> 22) & 3) {
2283         case 0:
2284             if (insn & (1 << 21))
2285                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2286             else
2287                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2288             break;
2289         case 1:
2290             if (insn & (1 << 21))
2291                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2292             else
2293                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2294             break;
2295         case 2:
2296             if (insn & (1 << 21))
2297                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2298             else
2299                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2300             break;
2301         case 3:
2302             return 1;
2303         }
2304         gen_op_iwmmxt_movq_wRn_M0(wrd);
2305         gen_op_iwmmxt_set_mup();
2306         break;
2307     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2308     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2309         wrd = (insn >> 12) & 0xf;
2310         rd0 = (insn >> 16) & 0xf;
2311         rd1 = (insn >> 0) & 0xf;
2312         gen_op_iwmmxt_movq_M0_wRn(rd0);
2313         switch ((insn >> 22) & 3) {
2314         case 0:
2315             if (insn & (1 << 21))
2316                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2317             else
2318                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2319             break;
2320         case 1:
2321             if (insn & (1 << 21))
2322                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2323             else
2324                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2325             break;
2326         case 2:
2327             if (insn & (1 << 21))
2328                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2329             else
2330                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2331             break;
2332         case 3:
2333             return 1;
2334         }
2335         gen_op_iwmmxt_movq_wRn_M0(wrd);
2336         gen_op_iwmmxt_set_mup();
2337         break;
2338     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2339     case 0x402: case 0x502: case 0x602: case 0x702:
2340         wrd = (insn >> 12) & 0xf;
2341         rd0 = (insn >> 16) & 0xf;
2342         rd1 = (insn >> 0) & 0xf;
2343         gen_op_iwmmxt_movq_M0_wRn(rd0);
2344         iwmmxt_load_reg(cpu_V1, rd1);
2345         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2346                                 tcg_constant_i32((insn >> 20) & 3));
2347         gen_op_iwmmxt_movq_wRn_M0(wrd);
2348         gen_op_iwmmxt_set_mup();
2349         break;
2350     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2351     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2352     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2353     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2354         wrd = (insn >> 12) & 0xf;
2355         rd0 = (insn >> 16) & 0xf;
2356         rd1 = (insn >> 0) & 0xf;
2357         gen_op_iwmmxt_movq_M0_wRn(rd0);
2358         switch ((insn >> 20) & 0xf) {
2359         case 0x0:
2360             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2361             break;
2362         case 0x1:
2363             gen_op_iwmmxt_subub_M0_wRn(rd1);
2364             break;
2365         case 0x3:
2366             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2367             break;
2368         case 0x4:
2369             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2370             break;
2371         case 0x5:
2372             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2373             break;
2374         case 0x7:
2375             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2376             break;
2377         case 0x8:
2378             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2379             break;
2380         case 0x9:
2381             gen_op_iwmmxt_subul_M0_wRn(rd1);
2382             break;
2383         case 0xb:
2384             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2385             break;
2386         default:
2387             return 1;
2388         }
2389         gen_op_iwmmxt_movq_wRn_M0(wrd);
2390         gen_op_iwmmxt_set_mup();
2391         gen_op_iwmmxt_set_cup();
2392         break;
2393     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2394     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2395     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2396     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2397         wrd = (insn >> 12) & 0xf;
2398         rd0 = (insn >> 16) & 0xf;
2399         gen_op_iwmmxt_movq_M0_wRn(rd0);
2400         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2401         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2402         gen_op_iwmmxt_movq_wRn_M0(wrd);
2403         gen_op_iwmmxt_set_mup();
2404         gen_op_iwmmxt_set_cup();
2405         break;
2406     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2407     case 0x418: case 0x518: case 0x618: case 0x718:
2408     case 0x818: case 0x918: case 0xa18: case 0xb18:
2409     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2410         wrd = (insn >> 12) & 0xf;
2411         rd0 = (insn >> 16) & 0xf;
2412         rd1 = (insn >> 0) & 0xf;
2413         gen_op_iwmmxt_movq_M0_wRn(rd0);
2414         switch ((insn >> 20) & 0xf) {
2415         case 0x0:
2416             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2417             break;
2418         case 0x1:
2419             gen_op_iwmmxt_addub_M0_wRn(rd1);
2420             break;
2421         case 0x3:
2422             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2423             break;
2424         case 0x4:
2425             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2426             break;
2427         case 0x5:
2428             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2429             break;
2430         case 0x7:
2431             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2432             break;
2433         case 0x8:
2434             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2435             break;
2436         case 0x9:
2437             gen_op_iwmmxt_addul_M0_wRn(rd1);
2438             break;
2439         case 0xb:
2440             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2441             break;
2442         default:
2443             return 1;
2444         }
2445         gen_op_iwmmxt_movq_wRn_M0(wrd);
2446         gen_op_iwmmxt_set_mup();
2447         gen_op_iwmmxt_set_cup();
2448         break;
2449     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2450     case 0x408: case 0x508: case 0x608: case 0x708:
2451     case 0x808: case 0x908: case 0xa08: case 0xb08:
2452     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2453         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2454             return 1;
2455         wrd = (insn >> 12) & 0xf;
2456         rd0 = (insn >> 16) & 0xf;
2457         rd1 = (insn >> 0) & 0xf;
2458         gen_op_iwmmxt_movq_M0_wRn(rd0);
2459         switch ((insn >> 22) & 3) {
2460         case 1:
2461             if (insn & (1 << 21))
2462                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2463             else
2464                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2465             break;
2466         case 2:
2467             if (insn & (1 << 21))
2468                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2469             else
2470                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2471             break;
2472         case 3:
2473             if (insn & (1 << 21))
2474                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2475             else
2476                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2477             break;
2478         }
2479         gen_op_iwmmxt_movq_wRn_M0(wrd);
2480         gen_op_iwmmxt_set_mup();
2481         gen_op_iwmmxt_set_cup();
2482         break;
2483     case 0x201: case 0x203: case 0x205: case 0x207:
2484     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2485     case 0x211: case 0x213: case 0x215: case 0x217:
2486     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2487         wrd = (insn >> 5) & 0xf;
2488         rd0 = (insn >> 12) & 0xf;
2489         rd1 = (insn >> 0) & 0xf;
2490         if (rd0 == 0xf || rd1 == 0xf)
2491             return 1;
2492         gen_op_iwmmxt_movq_M0_wRn(wrd);
2493         tmp = load_reg(s, rd0);
2494         tmp2 = load_reg(s, rd1);
2495         switch ((insn >> 16) & 0xf) {
2496         case 0x0:                                       /* TMIA */
2497             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2498             break;
2499         case 0x8:                                       /* TMIAPH */
2500             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2501             break;
2502         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2503             if (insn & (1 << 16))
2504                 tcg_gen_shri_i32(tmp, tmp, 16);
2505             if (insn & (1 << 17))
2506                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2507             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2508             break;
2509         default:
2510             tcg_temp_free_i32(tmp2);
2511             tcg_temp_free_i32(tmp);
2512             return 1;
2513         }
2514         tcg_temp_free_i32(tmp2);
2515         tcg_temp_free_i32(tmp);
2516         gen_op_iwmmxt_movq_wRn_M0(wrd);
2517         gen_op_iwmmxt_set_mup();
2518         break;
2519     default:
2520         return 1;
2521     }
2522 
2523     return 0;
2524 }
2525 
2526 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2527    (ie. an undefined instruction).  */
2528 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2529 {
2530     int acc, rd0, rd1, rdhi, rdlo;
2531     TCGv_i32 tmp, tmp2;
2532 
2533     if ((insn & 0x0ff00f10) == 0x0e200010) {
2534         /* Multiply with Internal Accumulate Format */
2535         rd0 = (insn >> 12) & 0xf;
2536         rd1 = insn & 0xf;
2537         acc = (insn >> 5) & 7;
2538 
2539         if (acc != 0)
2540             return 1;
2541 
2542         tmp = load_reg(s, rd0);
2543         tmp2 = load_reg(s, rd1);
2544         switch ((insn >> 16) & 0xf) {
2545         case 0x0:                                       /* MIA */
2546             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2547             break;
2548         case 0x8:                                       /* MIAPH */
2549             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2550             break;
2551         case 0xc:                                       /* MIABB */
2552         case 0xd:                                       /* MIABT */
2553         case 0xe:                                       /* MIATB */
2554         case 0xf:                                       /* MIATT */
2555             if (insn & (1 << 16))
2556                 tcg_gen_shri_i32(tmp, tmp, 16);
2557             if (insn & (1 << 17))
2558                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2559             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2560             break;
2561         default:
2562             return 1;
2563         }
2564         tcg_temp_free_i32(tmp2);
2565         tcg_temp_free_i32(tmp);
2566 
2567         gen_op_iwmmxt_movq_wRn_M0(acc);
2568         return 0;
2569     }
2570 
2571     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2572         /* Internal Accumulator Access Format */
2573         rdhi = (insn >> 16) & 0xf;
2574         rdlo = (insn >> 12) & 0xf;
2575         acc = insn & 7;
2576 
2577         if (acc != 0)
2578             return 1;
2579 
2580         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2581             iwmmxt_load_reg(cpu_V0, acc);
2582             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2583             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2584             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2585         } else {                                        /* MAR */
2586             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2587             iwmmxt_store_reg(cpu_V0, acc);
2588         }
2589         return 0;
2590     }
2591 
2592     return 1;
2593 }
2594 
2595 static void gen_goto_ptr(void)
2596 {
2597     tcg_gen_lookup_and_goto_ptr();
2598 }
2599 
2600 /* This will end the TB but doesn't guarantee we'll return to
2601  * cpu_loop_exec. Any live exit_requests will be processed as we
2602  * enter the next TB.
2603  */
2604 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2605 {
2606     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2607         /*
2608          * For pcrel, the pc must always be up-to-date on entry to
2609          * the linked TB, so that it can use simple additions for all
2610          * further adjustments.  For !pcrel, the linked TB is compiled
2611          * to know its full virtual address, so we can delay the
2612          * update to pc to the unlinked path.  A long chain of links
2613          * can thus avoid many updates to the PC.
2614          */
2615         if (tb_cflags(s->base.tb) & CF_PCREL) {
2616             gen_update_pc(s, diff);
2617             tcg_gen_goto_tb(n);
2618         } else {
2619             tcg_gen_goto_tb(n);
2620             gen_update_pc(s, diff);
2621         }
2622         tcg_gen_exit_tb(s->base.tb, n);
2623     } else {
2624         gen_update_pc(s, diff);
2625         gen_goto_ptr();
2626     }
2627     s->base.is_jmp = DISAS_NORETURN;
2628 }
2629 
2630 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2631 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2632 {
2633     if (unlikely(s->ss_active)) {
2634         /* An indirect jump so that we still trigger the debug exception.  */
2635         gen_update_pc(s, diff);
2636         s->base.is_jmp = DISAS_JUMP;
2637         return;
2638     }
2639     switch (s->base.is_jmp) {
2640     case DISAS_NEXT:
2641     case DISAS_TOO_MANY:
2642     case DISAS_NORETURN:
2643         /*
2644          * The normal case: just go to the destination TB.
2645          * NB: NORETURN happens if we generate code like
2646          *    gen_brcondi(l);
2647          *    gen_jmp();
2648          *    gen_set_label(l);
2649          *    gen_jmp();
2650          * on the second call to gen_jmp().
2651          */
2652         gen_goto_tb(s, tbno, diff);
2653         break;
2654     case DISAS_UPDATE_NOCHAIN:
2655     case DISAS_UPDATE_EXIT:
2656         /*
2657          * We already decided we're leaving the TB for some other reason.
2658          * Avoid using goto_tb so we really do exit back to the main loop
2659          * and don't chain to another TB.
2660          */
2661         gen_update_pc(s, diff);
2662         gen_goto_ptr();
2663         s->base.is_jmp = DISAS_NORETURN;
2664         break;
2665     default:
2666         /*
2667          * We shouldn't be emitting code for a jump and also have
2668          * is_jmp set to one of the special cases like DISAS_SWI.
2669          */
2670         g_assert_not_reached();
2671     }
2672 }
2673 
2674 static inline void gen_jmp(DisasContext *s, target_long diff)
2675 {
2676     gen_jmp_tb(s, diff, 0);
2677 }
2678 
2679 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2680 {
2681     if (x)
2682         tcg_gen_sari_i32(t0, t0, 16);
2683     else
2684         gen_sxth(t0);
2685     if (y)
2686         tcg_gen_sari_i32(t1, t1, 16);
2687     else
2688         gen_sxth(t1);
2689     tcg_gen_mul_i32(t0, t0, t1);
2690 }
2691 
2692 /* Return the mask of PSR bits set by a MSR instruction.  */
2693 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2694 {
2695     uint32_t mask = 0;
2696 
2697     if (flags & (1 << 0)) {
2698         mask |= 0xff;
2699     }
2700     if (flags & (1 << 1)) {
2701         mask |= 0xff00;
2702     }
2703     if (flags & (1 << 2)) {
2704         mask |= 0xff0000;
2705     }
2706     if (flags & (1 << 3)) {
2707         mask |= 0xff000000;
2708     }
2709 
2710     /* Mask out undefined and reserved bits.  */
2711     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2712 
2713     /* Mask out execution state.  */
2714     if (!spsr) {
2715         mask &= ~CPSR_EXEC;
2716     }
2717 
2718     /* Mask out privileged bits.  */
2719     if (IS_USER(s)) {
2720         mask &= CPSR_USER;
2721     }
2722     return mask;
2723 }
2724 
2725 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2726 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2727 {
2728     TCGv_i32 tmp;
2729     if (spsr) {
2730         /* ??? This is also undefined in system mode.  */
2731         if (IS_USER(s))
2732             return 1;
2733 
2734         tmp = load_cpu_field(spsr);
2735         tcg_gen_andi_i32(tmp, tmp, ~mask);
2736         tcg_gen_andi_i32(t0, t0, mask);
2737         tcg_gen_or_i32(tmp, tmp, t0);
2738         store_cpu_field(tmp, spsr);
2739     } else {
2740         gen_set_cpsr(t0, mask);
2741     }
2742     tcg_temp_free_i32(t0);
2743     gen_lookup_tb(s);
2744     return 0;
2745 }
2746 
2747 /* Returns nonzero if access to the PSR is not permitted.  */
2748 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2749 {
2750     TCGv_i32 tmp;
2751     tmp = tcg_temp_new_i32();
2752     tcg_gen_movi_i32(tmp, val);
2753     return gen_set_psr(s, mask, spsr, tmp);
2754 }
2755 
2756 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2757                                      int *tgtmode, int *regno)
2758 {
2759     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2760      * the target mode and register number, and identify the various
2761      * unpredictable cases.
2762      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2763      *  + executed in user mode
2764      *  + using R15 as the src/dest register
2765      *  + accessing an unimplemented register
2766      *  + accessing a register that's inaccessible at current PL/security state*
2767      *  + accessing a register that you could access with a different insn
2768      * We choose to UNDEF in all these cases.
2769      * Since we don't know which of the various AArch32 modes we are in
2770      * we have to defer some checks to runtime.
2771      * Accesses to Monitor mode registers from Secure EL1 (which implies
2772      * that EL3 is AArch64) must trap to EL3.
2773      *
2774      * If the access checks fail this function will emit code to take
2775      * an exception and return false. Otherwise it will return true,
2776      * and set *tgtmode and *regno appropriately.
2777      */
2778     /* These instructions are present only in ARMv8, or in ARMv7 with the
2779      * Virtualization Extensions.
2780      */
2781     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2782         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2783         goto undef;
2784     }
2785 
2786     if (IS_USER(s) || rn == 15) {
2787         goto undef;
2788     }
2789 
2790     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2791      * of registers into (r, sysm).
2792      */
2793     if (r) {
2794         /* SPSRs for other modes */
2795         switch (sysm) {
2796         case 0xe: /* SPSR_fiq */
2797             *tgtmode = ARM_CPU_MODE_FIQ;
2798             break;
2799         case 0x10: /* SPSR_irq */
2800             *tgtmode = ARM_CPU_MODE_IRQ;
2801             break;
2802         case 0x12: /* SPSR_svc */
2803             *tgtmode = ARM_CPU_MODE_SVC;
2804             break;
2805         case 0x14: /* SPSR_abt */
2806             *tgtmode = ARM_CPU_MODE_ABT;
2807             break;
2808         case 0x16: /* SPSR_und */
2809             *tgtmode = ARM_CPU_MODE_UND;
2810             break;
2811         case 0x1c: /* SPSR_mon */
2812             *tgtmode = ARM_CPU_MODE_MON;
2813             break;
2814         case 0x1e: /* SPSR_hyp */
2815             *tgtmode = ARM_CPU_MODE_HYP;
2816             break;
2817         default: /* unallocated */
2818             goto undef;
2819         }
2820         /* We arbitrarily assign SPSR a register number of 16. */
2821         *regno = 16;
2822     } else {
2823         /* general purpose registers for other modes */
2824         switch (sysm) {
2825         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2826             *tgtmode = ARM_CPU_MODE_USR;
2827             *regno = sysm + 8;
2828             break;
2829         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2830             *tgtmode = ARM_CPU_MODE_FIQ;
2831             *regno = sysm;
2832             break;
2833         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2834             *tgtmode = ARM_CPU_MODE_IRQ;
2835             *regno = sysm & 1 ? 13 : 14;
2836             break;
2837         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2838             *tgtmode = ARM_CPU_MODE_SVC;
2839             *regno = sysm & 1 ? 13 : 14;
2840             break;
2841         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2842             *tgtmode = ARM_CPU_MODE_ABT;
2843             *regno = sysm & 1 ? 13 : 14;
2844             break;
2845         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2846             *tgtmode = ARM_CPU_MODE_UND;
2847             *regno = sysm & 1 ? 13 : 14;
2848             break;
2849         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2850             *tgtmode = ARM_CPU_MODE_MON;
2851             *regno = sysm & 1 ? 13 : 14;
2852             break;
2853         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2854             *tgtmode = ARM_CPU_MODE_HYP;
2855             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2856             *regno = sysm & 1 ? 13 : 17;
2857             break;
2858         default: /* unallocated */
2859             goto undef;
2860         }
2861     }
2862 
2863     /* Catch the 'accessing inaccessible register' cases we can detect
2864      * at translate time.
2865      */
2866     switch (*tgtmode) {
2867     case ARM_CPU_MODE_MON:
2868         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2869             goto undef;
2870         }
2871         if (s->current_el == 1) {
2872             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2873              * then accesses to Mon registers trap to Secure EL2, if it exists,
2874              * otherwise EL3.
2875              */
2876             TCGv_i32 tcg_el;
2877 
2878             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2879                 dc_isar_feature(aa64_sel2, s)) {
2880                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2881                 tcg_el = load_cpu_field(cp15.scr_el3);
2882                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2883                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2884             } else {
2885                 tcg_el = tcg_constant_i32(3);
2886             }
2887 
2888             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2889                                     syn_uncategorized(), tcg_el);
2890             tcg_temp_free_i32(tcg_el);
2891             return false;
2892         }
2893         break;
2894     case ARM_CPU_MODE_HYP:
2895         /*
2896          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2897          * (and so we can forbid accesses from EL2 or below). elr_hyp
2898          * can be accessed also from Hyp mode, so forbid accesses from
2899          * EL0 or EL1.
2900          */
2901         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2902             (s->current_el < 3 && *regno != 17)) {
2903             goto undef;
2904         }
2905         break;
2906     default:
2907         break;
2908     }
2909 
2910     return true;
2911 
2912 undef:
2913     /* If we get here then some access check did not pass */
2914     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2915     return false;
2916 }
2917 
2918 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2919 {
2920     TCGv_i32 tcg_reg;
2921     int tgtmode = 0, regno = 0;
2922 
2923     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2924         return;
2925     }
2926 
2927     /* Sync state because msr_banked() can raise exceptions */
2928     gen_set_condexec(s);
2929     gen_update_pc(s, 0);
2930     tcg_reg = load_reg(s, rn);
2931     gen_helper_msr_banked(cpu_env, tcg_reg,
2932                           tcg_constant_i32(tgtmode),
2933                           tcg_constant_i32(regno));
2934     tcg_temp_free_i32(tcg_reg);
2935     s->base.is_jmp = DISAS_UPDATE_EXIT;
2936 }
2937 
2938 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2939 {
2940     TCGv_i32 tcg_reg;
2941     int tgtmode = 0, regno = 0;
2942 
2943     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2944         return;
2945     }
2946 
2947     /* Sync state because mrs_banked() can raise exceptions */
2948     gen_set_condexec(s);
2949     gen_update_pc(s, 0);
2950     tcg_reg = tcg_temp_new_i32();
2951     gen_helper_mrs_banked(tcg_reg, cpu_env,
2952                           tcg_constant_i32(tgtmode),
2953                           tcg_constant_i32(regno));
2954     store_reg(s, rn, tcg_reg);
2955     s->base.is_jmp = DISAS_UPDATE_EXIT;
2956 }
2957 
2958 /* Store value to PC as for an exception return (ie don't
2959  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2960  * will do the masking based on the new value of the Thumb bit.
2961  */
2962 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2963 {
2964     tcg_gen_mov_i32(cpu_R[15], pc);
2965     tcg_temp_free_i32(pc);
2966 }
2967 
2968 /* Generate a v6 exception return.  Marks both values as dead.  */
2969 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2970 {
2971     store_pc_exc_ret(s, pc);
2972     /* The cpsr_write_eret helper will mask the low bits of PC
2973      * appropriately depending on the new Thumb bit, so it must
2974      * be called after storing the new PC.
2975      */
2976     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2977         gen_io_start();
2978     }
2979     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2980     tcg_temp_free_i32(cpsr);
2981     /* Must exit loop to check un-masked IRQs */
2982     s->base.is_jmp = DISAS_EXIT;
2983 }
2984 
2985 /* Generate an old-style exception return. Marks pc as dead. */
2986 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2987 {
2988     gen_rfe(s, pc, load_cpu_field(spsr));
2989 }
2990 
2991 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2992                             uint32_t opr_sz, uint32_t max_sz,
2993                             gen_helper_gvec_3_ptr *fn)
2994 {
2995     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2996 
2997     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2998     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2999                        opr_sz, max_sz, 0, fn);
3000     tcg_temp_free_ptr(qc_ptr);
3001 }
3002 
3003 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3004                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3005 {
3006     static gen_helper_gvec_3_ptr * const fns[2] = {
3007         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3008     };
3009     tcg_debug_assert(vece >= 1 && vece <= 2);
3010     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3011 }
3012 
3013 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3014                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3015 {
3016     static gen_helper_gvec_3_ptr * const fns[2] = {
3017         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3018     };
3019     tcg_debug_assert(vece >= 1 && vece <= 2);
3020     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3021 }
3022 
3023 #define GEN_CMP0(NAME, COND)                                            \
3024     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3025     {                                                                   \
3026         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3027         tcg_gen_neg_i32(d, d);                                          \
3028     }                                                                   \
3029     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3030     {                                                                   \
3031         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3032         tcg_gen_neg_i64(d, d);                                          \
3033     }                                                                   \
3034     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3035     {                                                                   \
3036         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
3037         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3038     }                                                                   \
3039     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3040                             uint32_t opr_sz, uint32_t max_sz)           \
3041     {                                                                   \
3042         const GVecGen2 op[4] = {                                        \
3043             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3044               .fniv = gen_##NAME##0_vec,                                \
3045               .opt_opc = vecop_list_cmp,                                \
3046               .vece = MO_8 },                                           \
3047             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3048               .fniv = gen_##NAME##0_vec,                                \
3049               .opt_opc = vecop_list_cmp,                                \
3050               .vece = MO_16 },                                          \
3051             { .fni4 = gen_##NAME##0_i32,                                \
3052               .fniv = gen_##NAME##0_vec,                                \
3053               .opt_opc = vecop_list_cmp,                                \
3054               .vece = MO_32 },                                          \
3055             { .fni8 = gen_##NAME##0_i64,                                \
3056               .fniv = gen_##NAME##0_vec,                                \
3057               .opt_opc = vecop_list_cmp,                                \
3058               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3059               .vece = MO_64 },                                          \
3060         };                                                              \
3061         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3062     }
3063 
3064 static const TCGOpcode vecop_list_cmp[] = {
3065     INDEX_op_cmp_vec, 0
3066 };
3067 
3068 GEN_CMP0(ceq, TCG_COND_EQ)
3069 GEN_CMP0(cle, TCG_COND_LE)
3070 GEN_CMP0(cge, TCG_COND_GE)
3071 GEN_CMP0(clt, TCG_COND_LT)
3072 GEN_CMP0(cgt, TCG_COND_GT)
3073 
3074 #undef GEN_CMP0
3075 
3076 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3077 {
3078     tcg_gen_vec_sar8i_i64(a, a, shift);
3079     tcg_gen_vec_add8_i64(d, d, a);
3080 }
3081 
3082 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3083 {
3084     tcg_gen_vec_sar16i_i64(a, a, shift);
3085     tcg_gen_vec_add16_i64(d, d, a);
3086 }
3087 
3088 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3089 {
3090     tcg_gen_sari_i32(a, a, shift);
3091     tcg_gen_add_i32(d, d, a);
3092 }
3093 
3094 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3095 {
3096     tcg_gen_sari_i64(a, a, shift);
3097     tcg_gen_add_i64(d, d, a);
3098 }
3099 
3100 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3101 {
3102     tcg_gen_sari_vec(vece, a, a, sh);
3103     tcg_gen_add_vec(vece, d, d, a);
3104 }
3105 
3106 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3107                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3108 {
3109     static const TCGOpcode vecop_list[] = {
3110         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3111     };
3112     static const GVecGen2i ops[4] = {
3113         { .fni8 = gen_ssra8_i64,
3114           .fniv = gen_ssra_vec,
3115           .fno = gen_helper_gvec_ssra_b,
3116           .load_dest = true,
3117           .opt_opc = vecop_list,
3118           .vece = MO_8 },
3119         { .fni8 = gen_ssra16_i64,
3120           .fniv = gen_ssra_vec,
3121           .fno = gen_helper_gvec_ssra_h,
3122           .load_dest = true,
3123           .opt_opc = vecop_list,
3124           .vece = MO_16 },
3125         { .fni4 = gen_ssra32_i32,
3126           .fniv = gen_ssra_vec,
3127           .fno = gen_helper_gvec_ssra_s,
3128           .load_dest = true,
3129           .opt_opc = vecop_list,
3130           .vece = MO_32 },
3131         { .fni8 = gen_ssra64_i64,
3132           .fniv = gen_ssra_vec,
3133           .fno = gen_helper_gvec_ssra_b,
3134           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3135           .opt_opc = vecop_list,
3136           .load_dest = true,
3137           .vece = MO_64 },
3138     };
3139 
3140     /* tszimm encoding produces immediates in the range [1..esize]. */
3141     tcg_debug_assert(shift > 0);
3142     tcg_debug_assert(shift <= (8 << vece));
3143 
3144     /*
3145      * Shifts larger than the element size are architecturally valid.
3146      * Signed results in all sign bits.
3147      */
3148     shift = MIN(shift, (8 << vece) - 1);
3149     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3150 }
3151 
3152 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3153 {
3154     tcg_gen_vec_shr8i_i64(a, a, shift);
3155     tcg_gen_vec_add8_i64(d, d, a);
3156 }
3157 
3158 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3159 {
3160     tcg_gen_vec_shr16i_i64(a, a, shift);
3161     tcg_gen_vec_add16_i64(d, d, a);
3162 }
3163 
3164 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3165 {
3166     tcg_gen_shri_i32(a, a, shift);
3167     tcg_gen_add_i32(d, d, a);
3168 }
3169 
3170 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3171 {
3172     tcg_gen_shri_i64(a, a, shift);
3173     tcg_gen_add_i64(d, d, a);
3174 }
3175 
3176 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3177 {
3178     tcg_gen_shri_vec(vece, a, a, sh);
3179     tcg_gen_add_vec(vece, d, d, a);
3180 }
3181 
3182 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3183                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3184 {
3185     static const TCGOpcode vecop_list[] = {
3186         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3187     };
3188     static const GVecGen2i ops[4] = {
3189         { .fni8 = gen_usra8_i64,
3190           .fniv = gen_usra_vec,
3191           .fno = gen_helper_gvec_usra_b,
3192           .load_dest = true,
3193           .opt_opc = vecop_list,
3194           .vece = MO_8, },
3195         { .fni8 = gen_usra16_i64,
3196           .fniv = gen_usra_vec,
3197           .fno = gen_helper_gvec_usra_h,
3198           .load_dest = true,
3199           .opt_opc = vecop_list,
3200           .vece = MO_16, },
3201         { .fni4 = gen_usra32_i32,
3202           .fniv = gen_usra_vec,
3203           .fno = gen_helper_gvec_usra_s,
3204           .load_dest = true,
3205           .opt_opc = vecop_list,
3206           .vece = MO_32, },
3207         { .fni8 = gen_usra64_i64,
3208           .fniv = gen_usra_vec,
3209           .fno = gen_helper_gvec_usra_d,
3210           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3211           .load_dest = true,
3212           .opt_opc = vecop_list,
3213           .vece = MO_64, },
3214     };
3215 
3216     /* tszimm encoding produces immediates in the range [1..esize]. */
3217     tcg_debug_assert(shift > 0);
3218     tcg_debug_assert(shift <= (8 << vece));
3219 
3220     /*
3221      * Shifts larger than the element size are architecturally valid.
3222      * Unsigned results in all zeros as input to accumulate: nop.
3223      */
3224     if (shift < (8 << vece)) {
3225         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3226     } else {
3227         /* Nop, but we do need to clear the tail. */
3228         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3229     }
3230 }
3231 
3232 /*
3233  * Shift one less than the requested amount, and the low bit is
3234  * the rounding bit.  For the 8 and 16-bit operations, because we
3235  * mask the low bit, we can perform a normal integer shift instead
3236  * of a vector shift.
3237  */
3238 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3239 {
3240     TCGv_i64 t = tcg_temp_new_i64();
3241 
3242     tcg_gen_shri_i64(t, a, sh - 1);
3243     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3244     tcg_gen_vec_sar8i_i64(d, a, sh);
3245     tcg_gen_vec_add8_i64(d, d, t);
3246     tcg_temp_free_i64(t);
3247 }
3248 
3249 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3250 {
3251     TCGv_i64 t = tcg_temp_new_i64();
3252 
3253     tcg_gen_shri_i64(t, a, sh - 1);
3254     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3255     tcg_gen_vec_sar16i_i64(d, a, sh);
3256     tcg_gen_vec_add16_i64(d, d, t);
3257     tcg_temp_free_i64(t);
3258 }
3259 
3260 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3261 {
3262     TCGv_i32 t;
3263 
3264     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3265     if (sh == 32) {
3266         tcg_gen_movi_i32(d, 0);
3267         return;
3268     }
3269     t = tcg_temp_new_i32();
3270     tcg_gen_extract_i32(t, a, sh - 1, 1);
3271     tcg_gen_sari_i32(d, a, sh);
3272     tcg_gen_add_i32(d, d, t);
3273     tcg_temp_free_i32(t);
3274 }
3275 
3276 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3277 {
3278     TCGv_i64 t = tcg_temp_new_i64();
3279 
3280     tcg_gen_extract_i64(t, a, sh - 1, 1);
3281     tcg_gen_sari_i64(d, a, sh);
3282     tcg_gen_add_i64(d, d, t);
3283     tcg_temp_free_i64(t);
3284 }
3285 
3286 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3287 {
3288     TCGv_vec t = tcg_temp_new_vec_matching(d);
3289     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3290 
3291     tcg_gen_shri_vec(vece, t, a, sh - 1);
3292     tcg_gen_dupi_vec(vece, ones, 1);
3293     tcg_gen_and_vec(vece, t, t, ones);
3294     tcg_gen_sari_vec(vece, d, a, sh);
3295     tcg_gen_add_vec(vece, d, d, t);
3296 
3297     tcg_temp_free_vec(t);
3298     tcg_temp_free_vec(ones);
3299 }
3300 
3301 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3302                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3303 {
3304     static const TCGOpcode vecop_list[] = {
3305         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3306     };
3307     static const GVecGen2i ops[4] = {
3308         { .fni8 = gen_srshr8_i64,
3309           .fniv = gen_srshr_vec,
3310           .fno = gen_helper_gvec_srshr_b,
3311           .opt_opc = vecop_list,
3312           .vece = MO_8 },
3313         { .fni8 = gen_srshr16_i64,
3314           .fniv = gen_srshr_vec,
3315           .fno = gen_helper_gvec_srshr_h,
3316           .opt_opc = vecop_list,
3317           .vece = MO_16 },
3318         { .fni4 = gen_srshr32_i32,
3319           .fniv = gen_srshr_vec,
3320           .fno = gen_helper_gvec_srshr_s,
3321           .opt_opc = vecop_list,
3322           .vece = MO_32 },
3323         { .fni8 = gen_srshr64_i64,
3324           .fniv = gen_srshr_vec,
3325           .fno = gen_helper_gvec_srshr_d,
3326           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3327           .opt_opc = vecop_list,
3328           .vece = MO_64 },
3329     };
3330 
3331     /* tszimm encoding produces immediates in the range [1..esize] */
3332     tcg_debug_assert(shift > 0);
3333     tcg_debug_assert(shift <= (8 << vece));
3334 
3335     if (shift == (8 << vece)) {
3336         /*
3337          * Shifts larger than the element size are architecturally valid.
3338          * Signed results in all sign bits.  With rounding, this produces
3339          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3340          * I.e. always zero.
3341          */
3342         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3343     } else {
3344         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3345     }
3346 }
3347 
3348 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3349 {
3350     TCGv_i64 t = tcg_temp_new_i64();
3351 
3352     gen_srshr8_i64(t, a, sh);
3353     tcg_gen_vec_add8_i64(d, d, t);
3354     tcg_temp_free_i64(t);
3355 }
3356 
3357 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3358 {
3359     TCGv_i64 t = tcg_temp_new_i64();
3360 
3361     gen_srshr16_i64(t, a, sh);
3362     tcg_gen_vec_add16_i64(d, d, t);
3363     tcg_temp_free_i64(t);
3364 }
3365 
3366 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3367 {
3368     TCGv_i32 t = tcg_temp_new_i32();
3369 
3370     gen_srshr32_i32(t, a, sh);
3371     tcg_gen_add_i32(d, d, t);
3372     tcg_temp_free_i32(t);
3373 }
3374 
3375 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3376 {
3377     TCGv_i64 t = tcg_temp_new_i64();
3378 
3379     gen_srshr64_i64(t, a, sh);
3380     tcg_gen_add_i64(d, d, t);
3381     tcg_temp_free_i64(t);
3382 }
3383 
3384 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3385 {
3386     TCGv_vec t = tcg_temp_new_vec_matching(d);
3387 
3388     gen_srshr_vec(vece, t, a, sh);
3389     tcg_gen_add_vec(vece, d, d, t);
3390     tcg_temp_free_vec(t);
3391 }
3392 
3393 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3394                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3395 {
3396     static const TCGOpcode vecop_list[] = {
3397         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3398     };
3399     static const GVecGen2i ops[4] = {
3400         { .fni8 = gen_srsra8_i64,
3401           .fniv = gen_srsra_vec,
3402           .fno = gen_helper_gvec_srsra_b,
3403           .opt_opc = vecop_list,
3404           .load_dest = true,
3405           .vece = MO_8 },
3406         { .fni8 = gen_srsra16_i64,
3407           .fniv = gen_srsra_vec,
3408           .fno = gen_helper_gvec_srsra_h,
3409           .opt_opc = vecop_list,
3410           .load_dest = true,
3411           .vece = MO_16 },
3412         { .fni4 = gen_srsra32_i32,
3413           .fniv = gen_srsra_vec,
3414           .fno = gen_helper_gvec_srsra_s,
3415           .opt_opc = vecop_list,
3416           .load_dest = true,
3417           .vece = MO_32 },
3418         { .fni8 = gen_srsra64_i64,
3419           .fniv = gen_srsra_vec,
3420           .fno = gen_helper_gvec_srsra_d,
3421           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3422           .opt_opc = vecop_list,
3423           .load_dest = true,
3424           .vece = MO_64 },
3425     };
3426 
3427     /* tszimm encoding produces immediates in the range [1..esize] */
3428     tcg_debug_assert(shift > 0);
3429     tcg_debug_assert(shift <= (8 << vece));
3430 
3431     /*
3432      * Shifts larger than the element size are architecturally valid.
3433      * Signed results in all sign bits.  With rounding, this produces
3434      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3435      * I.e. always zero.  With accumulation, this leaves D unchanged.
3436      */
3437     if (shift == (8 << vece)) {
3438         /* Nop, but we do need to clear the tail. */
3439         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3440     } else {
3441         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3442     }
3443 }
3444 
3445 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3446 {
3447     TCGv_i64 t = tcg_temp_new_i64();
3448 
3449     tcg_gen_shri_i64(t, a, sh - 1);
3450     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3451     tcg_gen_vec_shr8i_i64(d, a, sh);
3452     tcg_gen_vec_add8_i64(d, d, t);
3453     tcg_temp_free_i64(t);
3454 }
3455 
3456 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3457 {
3458     TCGv_i64 t = tcg_temp_new_i64();
3459 
3460     tcg_gen_shri_i64(t, a, sh - 1);
3461     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3462     tcg_gen_vec_shr16i_i64(d, a, sh);
3463     tcg_gen_vec_add16_i64(d, d, t);
3464     tcg_temp_free_i64(t);
3465 }
3466 
3467 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3468 {
3469     TCGv_i32 t;
3470 
3471     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3472     if (sh == 32) {
3473         tcg_gen_extract_i32(d, a, sh - 1, 1);
3474         return;
3475     }
3476     t = tcg_temp_new_i32();
3477     tcg_gen_extract_i32(t, a, sh - 1, 1);
3478     tcg_gen_shri_i32(d, a, sh);
3479     tcg_gen_add_i32(d, d, t);
3480     tcg_temp_free_i32(t);
3481 }
3482 
3483 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3484 {
3485     TCGv_i64 t = tcg_temp_new_i64();
3486 
3487     tcg_gen_extract_i64(t, a, sh - 1, 1);
3488     tcg_gen_shri_i64(d, a, sh);
3489     tcg_gen_add_i64(d, d, t);
3490     tcg_temp_free_i64(t);
3491 }
3492 
3493 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3494 {
3495     TCGv_vec t = tcg_temp_new_vec_matching(d);
3496     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3497 
3498     tcg_gen_shri_vec(vece, t, a, shift - 1);
3499     tcg_gen_dupi_vec(vece, ones, 1);
3500     tcg_gen_and_vec(vece, t, t, ones);
3501     tcg_gen_shri_vec(vece, d, a, shift);
3502     tcg_gen_add_vec(vece, d, d, t);
3503 
3504     tcg_temp_free_vec(t);
3505     tcg_temp_free_vec(ones);
3506 }
3507 
3508 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3509                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3510 {
3511     static const TCGOpcode vecop_list[] = {
3512         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3513     };
3514     static const GVecGen2i ops[4] = {
3515         { .fni8 = gen_urshr8_i64,
3516           .fniv = gen_urshr_vec,
3517           .fno = gen_helper_gvec_urshr_b,
3518           .opt_opc = vecop_list,
3519           .vece = MO_8 },
3520         { .fni8 = gen_urshr16_i64,
3521           .fniv = gen_urshr_vec,
3522           .fno = gen_helper_gvec_urshr_h,
3523           .opt_opc = vecop_list,
3524           .vece = MO_16 },
3525         { .fni4 = gen_urshr32_i32,
3526           .fniv = gen_urshr_vec,
3527           .fno = gen_helper_gvec_urshr_s,
3528           .opt_opc = vecop_list,
3529           .vece = MO_32 },
3530         { .fni8 = gen_urshr64_i64,
3531           .fniv = gen_urshr_vec,
3532           .fno = gen_helper_gvec_urshr_d,
3533           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3534           .opt_opc = vecop_list,
3535           .vece = MO_64 },
3536     };
3537 
3538     /* tszimm encoding produces immediates in the range [1..esize] */
3539     tcg_debug_assert(shift > 0);
3540     tcg_debug_assert(shift <= (8 << vece));
3541 
3542     if (shift == (8 << vece)) {
3543         /*
3544          * Shifts larger than the element size are architecturally valid.
3545          * Unsigned results in zero.  With rounding, this produces a
3546          * copy of the most significant bit.
3547          */
3548         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3549     } else {
3550         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3551     }
3552 }
3553 
3554 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3555 {
3556     TCGv_i64 t = tcg_temp_new_i64();
3557 
3558     if (sh == 8) {
3559         tcg_gen_vec_shr8i_i64(t, a, 7);
3560     } else {
3561         gen_urshr8_i64(t, a, sh);
3562     }
3563     tcg_gen_vec_add8_i64(d, d, t);
3564     tcg_temp_free_i64(t);
3565 }
3566 
3567 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3568 {
3569     TCGv_i64 t = tcg_temp_new_i64();
3570 
3571     if (sh == 16) {
3572         tcg_gen_vec_shr16i_i64(t, a, 15);
3573     } else {
3574         gen_urshr16_i64(t, a, sh);
3575     }
3576     tcg_gen_vec_add16_i64(d, d, t);
3577     tcg_temp_free_i64(t);
3578 }
3579 
3580 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3581 {
3582     TCGv_i32 t = tcg_temp_new_i32();
3583 
3584     if (sh == 32) {
3585         tcg_gen_shri_i32(t, a, 31);
3586     } else {
3587         gen_urshr32_i32(t, a, sh);
3588     }
3589     tcg_gen_add_i32(d, d, t);
3590     tcg_temp_free_i32(t);
3591 }
3592 
3593 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3594 {
3595     TCGv_i64 t = tcg_temp_new_i64();
3596 
3597     if (sh == 64) {
3598         tcg_gen_shri_i64(t, a, 63);
3599     } else {
3600         gen_urshr64_i64(t, a, sh);
3601     }
3602     tcg_gen_add_i64(d, d, t);
3603     tcg_temp_free_i64(t);
3604 }
3605 
3606 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3607 {
3608     TCGv_vec t = tcg_temp_new_vec_matching(d);
3609 
3610     if (sh == (8 << vece)) {
3611         tcg_gen_shri_vec(vece, t, a, sh - 1);
3612     } else {
3613         gen_urshr_vec(vece, t, a, sh);
3614     }
3615     tcg_gen_add_vec(vece, d, d, t);
3616     tcg_temp_free_vec(t);
3617 }
3618 
3619 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3620                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3621 {
3622     static const TCGOpcode vecop_list[] = {
3623         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3624     };
3625     static const GVecGen2i ops[4] = {
3626         { .fni8 = gen_ursra8_i64,
3627           .fniv = gen_ursra_vec,
3628           .fno = gen_helper_gvec_ursra_b,
3629           .opt_opc = vecop_list,
3630           .load_dest = true,
3631           .vece = MO_8 },
3632         { .fni8 = gen_ursra16_i64,
3633           .fniv = gen_ursra_vec,
3634           .fno = gen_helper_gvec_ursra_h,
3635           .opt_opc = vecop_list,
3636           .load_dest = true,
3637           .vece = MO_16 },
3638         { .fni4 = gen_ursra32_i32,
3639           .fniv = gen_ursra_vec,
3640           .fno = gen_helper_gvec_ursra_s,
3641           .opt_opc = vecop_list,
3642           .load_dest = true,
3643           .vece = MO_32 },
3644         { .fni8 = gen_ursra64_i64,
3645           .fniv = gen_ursra_vec,
3646           .fno = gen_helper_gvec_ursra_d,
3647           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3648           .opt_opc = vecop_list,
3649           .load_dest = true,
3650           .vece = MO_64 },
3651     };
3652 
3653     /* tszimm encoding produces immediates in the range [1..esize] */
3654     tcg_debug_assert(shift > 0);
3655     tcg_debug_assert(shift <= (8 << vece));
3656 
3657     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3658 }
3659 
3660 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3661 {
3662     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3663     TCGv_i64 t = tcg_temp_new_i64();
3664 
3665     tcg_gen_shri_i64(t, a, shift);
3666     tcg_gen_andi_i64(t, t, mask);
3667     tcg_gen_andi_i64(d, d, ~mask);
3668     tcg_gen_or_i64(d, d, t);
3669     tcg_temp_free_i64(t);
3670 }
3671 
3672 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3673 {
3674     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3675     TCGv_i64 t = tcg_temp_new_i64();
3676 
3677     tcg_gen_shri_i64(t, a, shift);
3678     tcg_gen_andi_i64(t, t, mask);
3679     tcg_gen_andi_i64(d, d, ~mask);
3680     tcg_gen_or_i64(d, d, t);
3681     tcg_temp_free_i64(t);
3682 }
3683 
3684 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3685 {
3686     tcg_gen_shri_i32(a, a, shift);
3687     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3688 }
3689 
3690 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3691 {
3692     tcg_gen_shri_i64(a, a, shift);
3693     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3694 }
3695 
3696 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3697 {
3698     TCGv_vec t = tcg_temp_new_vec_matching(d);
3699     TCGv_vec m = tcg_temp_new_vec_matching(d);
3700 
3701     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3702     tcg_gen_shri_vec(vece, t, a, sh);
3703     tcg_gen_and_vec(vece, d, d, m);
3704     tcg_gen_or_vec(vece, d, d, t);
3705 
3706     tcg_temp_free_vec(t);
3707     tcg_temp_free_vec(m);
3708 }
3709 
3710 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3711                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3712 {
3713     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3714     const GVecGen2i ops[4] = {
3715         { .fni8 = gen_shr8_ins_i64,
3716           .fniv = gen_shr_ins_vec,
3717           .fno = gen_helper_gvec_sri_b,
3718           .load_dest = true,
3719           .opt_opc = vecop_list,
3720           .vece = MO_8 },
3721         { .fni8 = gen_shr16_ins_i64,
3722           .fniv = gen_shr_ins_vec,
3723           .fno = gen_helper_gvec_sri_h,
3724           .load_dest = true,
3725           .opt_opc = vecop_list,
3726           .vece = MO_16 },
3727         { .fni4 = gen_shr32_ins_i32,
3728           .fniv = gen_shr_ins_vec,
3729           .fno = gen_helper_gvec_sri_s,
3730           .load_dest = true,
3731           .opt_opc = vecop_list,
3732           .vece = MO_32 },
3733         { .fni8 = gen_shr64_ins_i64,
3734           .fniv = gen_shr_ins_vec,
3735           .fno = gen_helper_gvec_sri_d,
3736           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3737           .load_dest = true,
3738           .opt_opc = vecop_list,
3739           .vece = MO_64 },
3740     };
3741 
3742     /* tszimm encoding produces immediates in the range [1..esize]. */
3743     tcg_debug_assert(shift > 0);
3744     tcg_debug_assert(shift <= (8 << vece));
3745 
3746     /* Shift of esize leaves destination unchanged. */
3747     if (shift < (8 << vece)) {
3748         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3749     } else {
3750         /* Nop, but we do need to clear the tail. */
3751         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3752     }
3753 }
3754 
3755 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3756 {
3757     uint64_t mask = dup_const(MO_8, 0xff << shift);
3758     TCGv_i64 t = tcg_temp_new_i64();
3759 
3760     tcg_gen_shli_i64(t, a, shift);
3761     tcg_gen_andi_i64(t, t, mask);
3762     tcg_gen_andi_i64(d, d, ~mask);
3763     tcg_gen_or_i64(d, d, t);
3764     tcg_temp_free_i64(t);
3765 }
3766 
3767 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3768 {
3769     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3770     TCGv_i64 t = tcg_temp_new_i64();
3771 
3772     tcg_gen_shli_i64(t, a, shift);
3773     tcg_gen_andi_i64(t, t, mask);
3774     tcg_gen_andi_i64(d, d, ~mask);
3775     tcg_gen_or_i64(d, d, t);
3776     tcg_temp_free_i64(t);
3777 }
3778 
3779 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3780 {
3781     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3782 }
3783 
3784 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3785 {
3786     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3787 }
3788 
3789 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3790 {
3791     TCGv_vec t = tcg_temp_new_vec_matching(d);
3792     TCGv_vec m = tcg_temp_new_vec_matching(d);
3793 
3794     tcg_gen_shli_vec(vece, t, a, sh);
3795     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3796     tcg_gen_and_vec(vece, d, d, m);
3797     tcg_gen_or_vec(vece, d, d, t);
3798 
3799     tcg_temp_free_vec(t);
3800     tcg_temp_free_vec(m);
3801 }
3802 
3803 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3804                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3805 {
3806     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3807     const GVecGen2i ops[4] = {
3808         { .fni8 = gen_shl8_ins_i64,
3809           .fniv = gen_shl_ins_vec,
3810           .fno = gen_helper_gvec_sli_b,
3811           .load_dest = true,
3812           .opt_opc = vecop_list,
3813           .vece = MO_8 },
3814         { .fni8 = gen_shl16_ins_i64,
3815           .fniv = gen_shl_ins_vec,
3816           .fno = gen_helper_gvec_sli_h,
3817           .load_dest = true,
3818           .opt_opc = vecop_list,
3819           .vece = MO_16 },
3820         { .fni4 = gen_shl32_ins_i32,
3821           .fniv = gen_shl_ins_vec,
3822           .fno = gen_helper_gvec_sli_s,
3823           .load_dest = true,
3824           .opt_opc = vecop_list,
3825           .vece = MO_32 },
3826         { .fni8 = gen_shl64_ins_i64,
3827           .fniv = gen_shl_ins_vec,
3828           .fno = gen_helper_gvec_sli_d,
3829           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3830           .load_dest = true,
3831           .opt_opc = vecop_list,
3832           .vece = MO_64 },
3833     };
3834 
3835     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3836     tcg_debug_assert(shift >= 0);
3837     tcg_debug_assert(shift < (8 << vece));
3838 
3839     if (shift == 0) {
3840         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3841     } else {
3842         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3843     }
3844 }
3845 
3846 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3847 {
3848     gen_helper_neon_mul_u8(a, a, b);
3849     gen_helper_neon_add_u8(d, d, a);
3850 }
3851 
3852 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3853 {
3854     gen_helper_neon_mul_u8(a, a, b);
3855     gen_helper_neon_sub_u8(d, d, a);
3856 }
3857 
3858 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3859 {
3860     gen_helper_neon_mul_u16(a, a, b);
3861     gen_helper_neon_add_u16(d, d, a);
3862 }
3863 
3864 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3865 {
3866     gen_helper_neon_mul_u16(a, a, b);
3867     gen_helper_neon_sub_u16(d, d, a);
3868 }
3869 
3870 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3871 {
3872     tcg_gen_mul_i32(a, a, b);
3873     tcg_gen_add_i32(d, d, a);
3874 }
3875 
3876 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3877 {
3878     tcg_gen_mul_i32(a, a, b);
3879     tcg_gen_sub_i32(d, d, a);
3880 }
3881 
3882 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3883 {
3884     tcg_gen_mul_i64(a, a, b);
3885     tcg_gen_add_i64(d, d, a);
3886 }
3887 
3888 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3889 {
3890     tcg_gen_mul_i64(a, a, b);
3891     tcg_gen_sub_i64(d, d, a);
3892 }
3893 
3894 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3895 {
3896     tcg_gen_mul_vec(vece, a, a, b);
3897     tcg_gen_add_vec(vece, d, d, a);
3898 }
3899 
3900 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3901 {
3902     tcg_gen_mul_vec(vece, a, a, b);
3903     tcg_gen_sub_vec(vece, d, d, a);
3904 }
3905 
3906 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3907  * these tables are shared with AArch64 which does support them.
3908  */
3909 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3910                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3911 {
3912     static const TCGOpcode vecop_list[] = {
3913         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3914     };
3915     static const GVecGen3 ops[4] = {
3916         { .fni4 = gen_mla8_i32,
3917           .fniv = gen_mla_vec,
3918           .load_dest = true,
3919           .opt_opc = vecop_list,
3920           .vece = MO_8 },
3921         { .fni4 = gen_mla16_i32,
3922           .fniv = gen_mla_vec,
3923           .load_dest = true,
3924           .opt_opc = vecop_list,
3925           .vece = MO_16 },
3926         { .fni4 = gen_mla32_i32,
3927           .fniv = gen_mla_vec,
3928           .load_dest = true,
3929           .opt_opc = vecop_list,
3930           .vece = MO_32 },
3931         { .fni8 = gen_mla64_i64,
3932           .fniv = gen_mla_vec,
3933           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3934           .load_dest = true,
3935           .opt_opc = vecop_list,
3936           .vece = MO_64 },
3937     };
3938     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3939 }
3940 
3941 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3942                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3943 {
3944     static const TCGOpcode vecop_list[] = {
3945         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3946     };
3947     static const GVecGen3 ops[4] = {
3948         { .fni4 = gen_mls8_i32,
3949           .fniv = gen_mls_vec,
3950           .load_dest = true,
3951           .opt_opc = vecop_list,
3952           .vece = MO_8 },
3953         { .fni4 = gen_mls16_i32,
3954           .fniv = gen_mls_vec,
3955           .load_dest = true,
3956           .opt_opc = vecop_list,
3957           .vece = MO_16 },
3958         { .fni4 = gen_mls32_i32,
3959           .fniv = gen_mls_vec,
3960           .load_dest = true,
3961           .opt_opc = vecop_list,
3962           .vece = MO_32 },
3963         { .fni8 = gen_mls64_i64,
3964           .fniv = gen_mls_vec,
3965           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3966           .load_dest = true,
3967           .opt_opc = vecop_list,
3968           .vece = MO_64 },
3969     };
3970     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3971 }
3972 
3973 /* CMTST : test is "if (X & Y != 0)". */
3974 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3975 {
3976     tcg_gen_and_i32(d, a, b);
3977     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3978     tcg_gen_neg_i32(d, d);
3979 }
3980 
3981 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3982 {
3983     tcg_gen_and_i64(d, a, b);
3984     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3985     tcg_gen_neg_i64(d, d);
3986 }
3987 
3988 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3989 {
3990     tcg_gen_and_vec(vece, d, a, b);
3991     tcg_gen_dupi_vec(vece, a, 0);
3992     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3993 }
3994 
3995 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3996                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3997 {
3998     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3999     static const GVecGen3 ops[4] = {
4000         { .fni4 = gen_helper_neon_tst_u8,
4001           .fniv = gen_cmtst_vec,
4002           .opt_opc = vecop_list,
4003           .vece = MO_8 },
4004         { .fni4 = gen_helper_neon_tst_u16,
4005           .fniv = gen_cmtst_vec,
4006           .opt_opc = vecop_list,
4007           .vece = MO_16 },
4008         { .fni4 = gen_cmtst_i32,
4009           .fniv = gen_cmtst_vec,
4010           .opt_opc = vecop_list,
4011           .vece = MO_32 },
4012         { .fni8 = gen_cmtst_i64,
4013           .fniv = gen_cmtst_vec,
4014           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4015           .opt_opc = vecop_list,
4016           .vece = MO_64 },
4017     };
4018     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4019 }
4020 
4021 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4022 {
4023     TCGv_i32 lval = tcg_temp_new_i32();
4024     TCGv_i32 rval = tcg_temp_new_i32();
4025     TCGv_i32 lsh = tcg_temp_new_i32();
4026     TCGv_i32 rsh = tcg_temp_new_i32();
4027     TCGv_i32 zero = tcg_constant_i32(0);
4028     TCGv_i32 max = tcg_constant_i32(32);
4029 
4030     /*
4031      * Rely on the TCG guarantee that out of range shifts produce
4032      * unspecified results, not undefined behaviour (i.e. no trap).
4033      * Discard out-of-range results after the fact.
4034      */
4035     tcg_gen_ext8s_i32(lsh, shift);
4036     tcg_gen_neg_i32(rsh, lsh);
4037     tcg_gen_shl_i32(lval, src, lsh);
4038     tcg_gen_shr_i32(rval, src, rsh);
4039     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4040     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4041 
4042     tcg_temp_free_i32(lval);
4043     tcg_temp_free_i32(rval);
4044     tcg_temp_free_i32(lsh);
4045     tcg_temp_free_i32(rsh);
4046 }
4047 
4048 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4049 {
4050     TCGv_i64 lval = tcg_temp_new_i64();
4051     TCGv_i64 rval = tcg_temp_new_i64();
4052     TCGv_i64 lsh = tcg_temp_new_i64();
4053     TCGv_i64 rsh = tcg_temp_new_i64();
4054     TCGv_i64 zero = tcg_constant_i64(0);
4055     TCGv_i64 max = tcg_constant_i64(64);
4056 
4057     /*
4058      * Rely on the TCG guarantee that out of range shifts produce
4059      * unspecified results, not undefined behaviour (i.e. no trap).
4060      * Discard out-of-range results after the fact.
4061      */
4062     tcg_gen_ext8s_i64(lsh, shift);
4063     tcg_gen_neg_i64(rsh, lsh);
4064     tcg_gen_shl_i64(lval, src, lsh);
4065     tcg_gen_shr_i64(rval, src, rsh);
4066     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4067     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4068 
4069     tcg_temp_free_i64(lval);
4070     tcg_temp_free_i64(rval);
4071     tcg_temp_free_i64(lsh);
4072     tcg_temp_free_i64(rsh);
4073 }
4074 
4075 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4076                          TCGv_vec src, TCGv_vec shift)
4077 {
4078     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4079     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4080     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4081     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4082     TCGv_vec msk, max;
4083 
4084     tcg_gen_neg_vec(vece, rsh, shift);
4085     if (vece == MO_8) {
4086         tcg_gen_mov_vec(lsh, shift);
4087     } else {
4088         msk = tcg_temp_new_vec_matching(dst);
4089         tcg_gen_dupi_vec(vece, msk, 0xff);
4090         tcg_gen_and_vec(vece, lsh, shift, msk);
4091         tcg_gen_and_vec(vece, rsh, rsh, msk);
4092         tcg_temp_free_vec(msk);
4093     }
4094 
4095     /*
4096      * Rely on the TCG guarantee that out of range shifts produce
4097      * unspecified results, not undefined behaviour (i.e. no trap).
4098      * Discard out-of-range results after the fact.
4099      */
4100     tcg_gen_shlv_vec(vece, lval, src, lsh);
4101     tcg_gen_shrv_vec(vece, rval, src, rsh);
4102 
4103     max = tcg_temp_new_vec_matching(dst);
4104     tcg_gen_dupi_vec(vece, max, 8 << vece);
4105 
4106     /*
4107      * The choice of LT (signed) and GEU (unsigned) are biased toward
4108      * the instructions of the x86_64 host.  For MO_8, the whole byte
4109      * is significant so we must use an unsigned compare; otherwise we
4110      * have already masked to a byte and so a signed compare works.
4111      * Other tcg hosts have a full set of comparisons and do not care.
4112      */
4113     if (vece == MO_8) {
4114         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4115         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4116         tcg_gen_andc_vec(vece, lval, lval, lsh);
4117         tcg_gen_andc_vec(vece, rval, rval, rsh);
4118     } else {
4119         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4120         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4121         tcg_gen_and_vec(vece, lval, lval, lsh);
4122         tcg_gen_and_vec(vece, rval, rval, rsh);
4123     }
4124     tcg_gen_or_vec(vece, dst, lval, rval);
4125 
4126     tcg_temp_free_vec(max);
4127     tcg_temp_free_vec(lval);
4128     tcg_temp_free_vec(rval);
4129     tcg_temp_free_vec(lsh);
4130     tcg_temp_free_vec(rsh);
4131 }
4132 
4133 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4134                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4135 {
4136     static const TCGOpcode vecop_list[] = {
4137         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4138         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4139     };
4140     static const GVecGen3 ops[4] = {
4141         { .fniv = gen_ushl_vec,
4142           .fno = gen_helper_gvec_ushl_b,
4143           .opt_opc = vecop_list,
4144           .vece = MO_8 },
4145         { .fniv = gen_ushl_vec,
4146           .fno = gen_helper_gvec_ushl_h,
4147           .opt_opc = vecop_list,
4148           .vece = MO_16 },
4149         { .fni4 = gen_ushl_i32,
4150           .fniv = gen_ushl_vec,
4151           .opt_opc = vecop_list,
4152           .vece = MO_32 },
4153         { .fni8 = gen_ushl_i64,
4154           .fniv = gen_ushl_vec,
4155           .opt_opc = vecop_list,
4156           .vece = MO_64 },
4157     };
4158     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4159 }
4160 
4161 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4162 {
4163     TCGv_i32 lval = tcg_temp_new_i32();
4164     TCGv_i32 rval = tcg_temp_new_i32();
4165     TCGv_i32 lsh = tcg_temp_new_i32();
4166     TCGv_i32 rsh = tcg_temp_new_i32();
4167     TCGv_i32 zero = tcg_constant_i32(0);
4168     TCGv_i32 max = tcg_constant_i32(31);
4169 
4170     /*
4171      * Rely on the TCG guarantee that out of range shifts produce
4172      * unspecified results, not undefined behaviour (i.e. no trap).
4173      * Discard out-of-range results after the fact.
4174      */
4175     tcg_gen_ext8s_i32(lsh, shift);
4176     tcg_gen_neg_i32(rsh, lsh);
4177     tcg_gen_shl_i32(lval, src, lsh);
4178     tcg_gen_umin_i32(rsh, rsh, max);
4179     tcg_gen_sar_i32(rval, src, rsh);
4180     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4181     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4182 
4183     tcg_temp_free_i32(lval);
4184     tcg_temp_free_i32(rval);
4185     tcg_temp_free_i32(lsh);
4186     tcg_temp_free_i32(rsh);
4187 }
4188 
4189 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4190 {
4191     TCGv_i64 lval = tcg_temp_new_i64();
4192     TCGv_i64 rval = tcg_temp_new_i64();
4193     TCGv_i64 lsh = tcg_temp_new_i64();
4194     TCGv_i64 rsh = tcg_temp_new_i64();
4195     TCGv_i64 zero = tcg_constant_i64(0);
4196     TCGv_i64 max = tcg_constant_i64(63);
4197 
4198     /*
4199      * Rely on the TCG guarantee that out of range shifts produce
4200      * unspecified results, not undefined behaviour (i.e. no trap).
4201      * Discard out-of-range results after the fact.
4202      */
4203     tcg_gen_ext8s_i64(lsh, shift);
4204     tcg_gen_neg_i64(rsh, lsh);
4205     tcg_gen_shl_i64(lval, src, lsh);
4206     tcg_gen_umin_i64(rsh, rsh, max);
4207     tcg_gen_sar_i64(rval, src, rsh);
4208     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4209     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4210 
4211     tcg_temp_free_i64(lval);
4212     tcg_temp_free_i64(rval);
4213     tcg_temp_free_i64(lsh);
4214     tcg_temp_free_i64(rsh);
4215 }
4216 
4217 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4218                          TCGv_vec src, TCGv_vec shift)
4219 {
4220     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4221     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4222     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4223     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4224     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4225 
4226     /*
4227      * Rely on the TCG guarantee that out of range shifts produce
4228      * unspecified results, not undefined behaviour (i.e. no trap).
4229      * Discard out-of-range results after the fact.
4230      */
4231     tcg_gen_neg_vec(vece, rsh, shift);
4232     if (vece == MO_8) {
4233         tcg_gen_mov_vec(lsh, shift);
4234     } else {
4235         tcg_gen_dupi_vec(vece, tmp, 0xff);
4236         tcg_gen_and_vec(vece, lsh, shift, tmp);
4237         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4238     }
4239 
4240     /* Bound rsh so out of bound right shift gets -1.  */
4241     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4242     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4243     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4244 
4245     tcg_gen_shlv_vec(vece, lval, src, lsh);
4246     tcg_gen_sarv_vec(vece, rval, src, rsh);
4247 
4248     /* Select in-bound left shift.  */
4249     tcg_gen_andc_vec(vece, lval, lval, tmp);
4250 
4251     /* Select between left and right shift.  */
4252     if (vece == MO_8) {
4253         tcg_gen_dupi_vec(vece, tmp, 0);
4254         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4255     } else {
4256         tcg_gen_dupi_vec(vece, tmp, 0x80);
4257         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4258     }
4259 
4260     tcg_temp_free_vec(lval);
4261     tcg_temp_free_vec(rval);
4262     tcg_temp_free_vec(lsh);
4263     tcg_temp_free_vec(rsh);
4264     tcg_temp_free_vec(tmp);
4265 }
4266 
4267 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4268                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4269 {
4270     static const TCGOpcode vecop_list[] = {
4271         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4272         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4273     };
4274     static const GVecGen3 ops[4] = {
4275         { .fniv = gen_sshl_vec,
4276           .fno = gen_helper_gvec_sshl_b,
4277           .opt_opc = vecop_list,
4278           .vece = MO_8 },
4279         { .fniv = gen_sshl_vec,
4280           .fno = gen_helper_gvec_sshl_h,
4281           .opt_opc = vecop_list,
4282           .vece = MO_16 },
4283         { .fni4 = gen_sshl_i32,
4284           .fniv = gen_sshl_vec,
4285           .opt_opc = vecop_list,
4286           .vece = MO_32 },
4287         { .fni8 = gen_sshl_i64,
4288           .fniv = gen_sshl_vec,
4289           .opt_opc = vecop_list,
4290           .vece = MO_64 },
4291     };
4292     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4293 }
4294 
4295 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4296                           TCGv_vec a, TCGv_vec b)
4297 {
4298     TCGv_vec x = tcg_temp_new_vec_matching(t);
4299     tcg_gen_add_vec(vece, x, a, b);
4300     tcg_gen_usadd_vec(vece, t, a, b);
4301     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4302     tcg_gen_or_vec(vece, sat, sat, x);
4303     tcg_temp_free_vec(x);
4304 }
4305 
4306 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4307                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4308 {
4309     static const TCGOpcode vecop_list[] = {
4310         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4311     };
4312     static const GVecGen4 ops[4] = {
4313         { .fniv = gen_uqadd_vec,
4314           .fno = gen_helper_gvec_uqadd_b,
4315           .write_aofs = true,
4316           .opt_opc = vecop_list,
4317           .vece = MO_8 },
4318         { .fniv = gen_uqadd_vec,
4319           .fno = gen_helper_gvec_uqadd_h,
4320           .write_aofs = true,
4321           .opt_opc = vecop_list,
4322           .vece = MO_16 },
4323         { .fniv = gen_uqadd_vec,
4324           .fno = gen_helper_gvec_uqadd_s,
4325           .write_aofs = true,
4326           .opt_opc = vecop_list,
4327           .vece = MO_32 },
4328         { .fniv = gen_uqadd_vec,
4329           .fno = gen_helper_gvec_uqadd_d,
4330           .write_aofs = true,
4331           .opt_opc = vecop_list,
4332           .vece = MO_64 },
4333     };
4334     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4335                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4336 }
4337 
4338 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4339                           TCGv_vec a, TCGv_vec b)
4340 {
4341     TCGv_vec x = tcg_temp_new_vec_matching(t);
4342     tcg_gen_add_vec(vece, x, a, b);
4343     tcg_gen_ssadd_vec(vece, t, a, b);
4344     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4345     tcg_gen_or_vec(vece, sat, sat, x);
4346     tcg_temp_free_vec(x);
4347 }
4348 
4349 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4350                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4351 {
4352     static const TCGOpcode vecop_list[] = {
4353         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4354     };
4355     static const GVecGen4 ops[4] = {
4356         { .fniv = gen_sqadd_vec,
4357           .fno = gen_helper_gvec_sqadd_b,
4358           .opt_opc = vecop_list,
4359           .write_aofs = true,
4360           .vece = MO_8 },
4361         { .fniv = gen_sqadd_vec,
4362           .fno = gen_helper_gvec_sqadd_h,
4363           .opt_opc = vecop_list,
4364           .write_aofs = true,
4365           .vece = MO_16 },
4366         { .fniv = gen_sqadd_vec,
4367           .fno = gen_helper_gvec_sqadd_s,
4368           .opt_opc = vecop_list,
4369           .write_aofs = true,
4370           .vece = MO_32 },
4371         { .fniv = gen_sqadd_vec,
4372           .fno = gen_helper_gvec_sqadd_d,
4373           .opt_opc = vecop_list,
4374           .write_aofs = true,
4375           .vece = MO_64 },
4376     };
4377     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4378                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4379 }
4380 
4381 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4382                           TCGv_vec a, TCGv_vec b)
4383 {
4384     TCGv_vec x = tcg_temp_new_vec_matching(t);
4385     tcg_gen_sub_vec(vece, x, a, b);
4386     tcg_gen_ussub_vec(vece, t, a, b);
4387     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4388     tcg_gen_or_vec(vece, sat, sat, x);
4389     tcg_temp_free_vec(x);
4390 }
4391 
4392 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4393                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4394 {
4395     static const TCGOpcode vecop_list[] = {
4396         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4397     };
4398     static const GVecGen4 ops[4] = {
4399         { .fniv = gen_uqsub_vec,
4400           .fno = gen_helper_gvec_uqsub_b,
4401           .opt_opc = vecop_list,
4402           .write_aofs = true,
4403           .vece = MO_8 },
4404         { .fniv = gen_uqsub_vec,
4405           .fno = gen_helper_gvec_uqsub_h,
4406           .opt_opc = vecop_list,
4407           .write_aofs = true,
4408           .vece = MO_16 },
4409         { .fniv = gen_uqsub_vec,
4410           .fno = gen_helper_gvec_uqsub_s,
4411           .opt_opc = vecop_list,
4412           .write_aofs = true,
4413           .vece = MO_32 },
4414         { .fniv = gen_uqsub_vec,
4415           .fno = gen_helper_gvec_uqsub_d,
4416           .opt_opc = vecop_list,
4417           .write_aofs = true,
4418           .vece = MO_64 },
4419     };
4420     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4421                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4422 }
4423 
4424 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4425                           TCGv_vec a, TCGv_vec b)
4426 {
4427     TCGv_vec x = tcg_temp_new_vec_matching(t);
4428     tcg_gen_sub_vec(vece, x, a, b);
4429     tcg_gen_sssub_vec(vece, t, a, b);
4430     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4431     tcg_gen_or_vec(vece, sat, sat, x);
4432     tcg_temp_free_vec(x);
4433 }
4434 
4435 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4436                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4437 {
4438     static const TCGOpcode vecop_list[] = {
4439         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4440     };
4441     static const GVecGen4 ops[4] = {
4442         { .fniv = gen_sqsub_vec,
4443           .fno = gen_helper_gvec_sqsub_b,
4444           .opt_opc = vecop_list,
4445           .write_aofs = true,
4446           .vece = MO_8 },
4447         { .fniv = gen_sqsub_vec,
4448           .fno = gen_helper_gvec_sqsub_h,
4449           .opt_opc = vecop_list,
4450           .write_aofs = true,
4451           .vece = MO_16 },
4452         { .fniv = gen_sqsub_vec,
4453           .fno = gen_helper_gvec_sqsub_s,
4454           .opt_opc = vecop_list,
4455           .write_aofs = true,
4456           .vece = MO_32 },
4457         { .fniv = gen_sqsub_vec,
4458           .fno = gen_helper_gvec_sqsub_d,
4459           .opt_opc = vecop_list,
4460           .write_aofs = true,
4461           .vece = MO_64 },
4462     };
4463     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4464                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4465 }
4466 
4467 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4468 {
4469     TCGv_i32 t = tcg_temp_new_i32();
4470 
4471     tcg_gen_sub_i32(t, a, b);
4472     tcg_gen_sub_i32(d, b, a);
4473     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4474     tcg_temp_free_i32(t);
4475 }
4476 
4477 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4478 {
4479     TCGv_i64 t = tcg_temp_new_i64();
4480 
4481     tcg_gen_sub_i64(t, a, b);
4482     tcg_gen_sub_i64(d, b, a);
4483     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4484     tcg_temp_free_i64(t);
4485 }
4486 
4487 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4488 {
4489     TCGv_vec t = tcg_temp_new_vec_matching(d);
4490 
4491     tcg_gen_smin_vec(vece, t, a, b);
4492     tcg_gen_smax_vec(vece, d, a, b);
4493     tcg_gen_sub_vec(vece, d, d, t);
4494     tcg_temp_free_vec(t);
4495 }
4496 
4497 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4498                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4499 {
4500     static const TCGOpcode vecop_list[] = {
4501         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4502     };
4503     static const GVecGen3 ops[4] = {
4504         { .fniv = gen_sabd_vec,
4505           .fno = gen_helper_gvec_sabd_b,
4506           .opt_opc = vecop_list,
4507           .vece = MO_8 },
4508         { .fniv = gen_sabd_vec,
4509           .fno = gen_helper_gvec_sabd_h,
4510           .opt_opc = vecop_list,
4511           .vece = MO_16 },
4512         { .fni4 = gen_sabd_i32,
4513           .fniv = gen_sabd_vec,
4514           .fno = gen_helper_gvec_sabd_s,
4515           .opt_opc = vecop_list,
4516           .vece = MO_32 },
4517         { .fni8 = gen_sabd_i64,
4518           .fniv = gen_sabd_vec,
4519           .fno = gen_helper_gvec_sabd_d,
4520           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4521           .opt_opc = vecop_list,
4522           .vece = MO_64 },
4523     };
4524     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4525 }
4526 
4527 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4528 {
4529     TCGv_i32 t = tcg_temp_new_i32();
4530 
4531     tcg_gen_sub_i32(t, a, b);
4532     tcg_gen_sub_i32(d, b, a);
4533     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4534     tcg_temp_free_i32(t);
4535 }
4536 
4537 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4538 {
4539     TCGv_i64 t = tcg_temp_new_i64();
4540 
4541     tcg_gen_sub_i64(t, a, b);
4542     tcg_gen_sub_i64(d, b, a);
4543     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4544     tcg_temp_free_i64(t);
4545 }
4546 
4547 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4548 {
4549     TCGv_vec t = tcg_temp_new_vec_matching(d);
4550 
4551     tcg_gen_umin_vec(vece, t, a, b);
4552     tcg_gen_umax_vec(vece, d, a, b);
4553     tcg_gen_sub_vec(vece, d, d, t);
4554     tcg_temp_free_vec(t);
4555 }
4556 
4557 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4558                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4559 {
4560     static const TCGOpcode vecop_list[] = {
4561         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4562     };
4563     static const GVecGen3 ops[4] = {
4564         { .fniv = gen_uabd_vec,
4565           .fno = gen_helper_gvec_uabd_b,
4566           .opt_opc = vecop_list,
4567           .vece = MO_8 },
4568         { .fniv = gen_uabd_vec,
4569           .fno = gen_helper_gvec_uabd_h,
4570           .opt_opc = vecop_list,
4571           .vece = MO_16 },
4572         { .fni4 = gen_uabd_i32,
4573           .fniv = gen_uabd_vec,
4574           .fno = gen_helper_gvec_uabd_s,
4575           .opt_opc = vecop_list,
4576           .vece = MO_32 },
4577         { .fni8 = gen_uabd_i64,
4578           .fniv = gen_uabd_vec,
4579           .fno = gen_helper_gvec_uabd_d,
4580           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4581           .opt_opc = vecop_list,
4582           .vece = MO_64 },
4583     };
4584     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4585 }
4586 
4587 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4588 {
4589     TCGv_i32 t = tcg_temp_new_i32();
4590     gen_sabd_i32(t, a, b);
4591     tcg_gen_add_i32(d, d, t);
4592     tcg_temp_free_i32(t);
4593 }
4594 
4595 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4596 {
4597     TCGv_i64 t = tcg_temp_new_i64();
4598     gen_sabd_i64(t, a, b);
4599     tcg_gen_add_i64(d, d, t);
4600     tcg_temp_free_i64(t);
4601 }
4602 
4603 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4604 {
4605     TCGv_vec t = tcg_temp_new_vec_matching(d);
4606     gen_sabd_vec(vece, t, a, b);
4607     tcg_gen_add_vec(vece, d, d, t);
4608     tcg_temp_free_vec(t);
4609 }
4610 
4611 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4612                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4613 {
4614     static const TCGOpcode vecop_list[] = {
4615         INDEX_op_sub_vec, INDEX_op_add_vec,
4616         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4617     };
4618     static const GVecGen3 ops[4] = {
4619         { .fniv = gen_saba_vec,
4620           .fno = gen_helper_gvec_saba_b,
4621           .opt_opc = vecop_list,
4622           .load_dest = true,
4623           .vece = MO_8 },
4624         { .fniv = gen_saba_vec,
4625           .fno = gen_helper_gvec_saba_h,
4626           .opt_opc = vecop_list,
4627           .load_dest = true,
4628           .vece = MO_16 },
4629         { .fni4 = gen_saba_i32,
4630           .fniv = gen_saba_vec,
4631           .fno = gen_helper_gvec_saba_s,
4632           .opt_opc = vecop_list,
4633           .load_dest = true,
4634           .vece = MO_32 },
4635         { .fni8 = gen_saba_i64,
4636           .fniv = gen_saba_vec,
4637           .fno = gen_helper_gvec_saba_d,
4638           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4639           .opt_opc = vecop_list,
4640           .load_dest = true,
4641           .vece = MO_64 },
4642     };
4643     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4644 }
4645 
4646 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4647 {
4648     TCGv_i32 t = tcg_temp_new_i32();
4649     gen_uabd_i32(t, a, b);
4650     tcg_gen_add_i32(d, d, t);
4651     tcg_temp_free_i32(t);
4652 }
4653 
4654 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4655 {
4656     TCGv_i64 t = tcg_temp_new_i64();
4657     gen_uabd_i64(t, a, b);
4658     tcg_gen_add_i64(d, d, t);
4659     tcg_temp_free_i64(t);
4660 }
4661 
4662 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4663 {
4664     TCGv_vec t = tcg_temp_new_vec_matching(d);
4665     gen_uabd_vec(vece, t, a, b);
4666     tcg_gen_add_vec(vece, d, d, t);
4667     tcg_temp_free_vec(t);
4668 }
4669 
4670 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4671                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4672 {
4673     static const TCGOpcode vecop_list[] = {
4674         INDEX_op_sub_vec, INDEX_op_add_vec,
4675         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4676     };
4677     static const GVecGen3 ops[4] = {
4678         { .fniv = gen_uaba_vec,
4679           .fno = gen_helper_gvec_uaba_b,
4680           .opt_opc = vecop_list,
4681           .load_dest = true,
4682           .vece = MO_8 },
4683         { .fniv = gen_uaba_vec,
4684           .fno = gen_helper_gvec_uaba_h,
4685           .opt_opc = vecop_list,
4686           .load_dest = true,
4687           .vece = MO_16 },
4688         { .fni4 = gen_uaba_i32,
4689           .fniv = gen_uaba_vec,
4690           .fno = gen_helper_gvec_uaba_s,
4691           .opt_opc = vecop_list,
4692           .load_dest = true,
4693           .vece = MO_32 },
4694         { .fni8 = gen_uaba_i64,
4695           .fniv = gen_uaba_vec,
4696           .fno = gen_helper_gvec_uaba_d,
4697           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4698           .opt_opc = vecop_list,
4699           .load_dest = true,
4700           .vece = MO_64 },
4701     };
4702     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4703 }
4704 
4705 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4706                            int opc1, int crn, int crm, int opc2,
4707                            bool isread, int rt, int rt2)
4708 {
4709     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4710     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4711     TCGv_ptr tcg_ri = NULL;
4712     bool need_exit_tb;
4713     uint32_t syndrome;
4714 
4715     /*
4716      * Note that since we are an implementation which takes an
4717      * exception on a trapped conditional instruction only if the
4718      * instruction passes its condition code check, we can take
4719      * advantage of the clause in the ARM ARM that allows us to set
4720      * the COND field in the instruction to 0xE in all cases.
4721      * We could fish the actual condition out of the insn (ARM)
4722      * or the condexec bits (Thumb) but it isn't necessary.
4723      */
4724     switch (cpnum) {
4725     case 14:
4726         if (is64) {
4727             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4728                                          isread, false);
4729         } else {
4730             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4731                                         rt, isread, false);
4732         }
4733         break;
4734     case 15:
4735         if (is64) {
4736             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4737                                          isread, false);
4738         } else {
4739             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4740                                         rt, isread, false);
4741         }
4742         break;
4743     default:
4744         /*
4745          * ARMv8 defines that only coprocessors 14 and 15 exist,
4746          * so this can only happen if this is an ARMv7 or earlier CPU,
4747          * in which case the syndrome information won't actually be
4748          * guest visible.
4749          */
4750         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4751         syndrome = syn_uncategorized();
4752         break;
4753     }
4754 
4755     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4756         /*
4757          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4758          * over the UNDEF for "no such register" or the UNDEF for "access
4759          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4760          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4761          * access_check_cp_reg(), after the checks for whether the access
4762          * configurably trapped to EL1.
4763          */
4764         uint32_t maskbit = is64 ? crm : crn;
4765 
4766         if (maskbit != 4 && maskbit != 14) {
4767             /* T4 and T14 are RES0 so never cause traps */
4768             TCGv_i32 t;
4769             DisasLabel over = gen_disas_label(s);
4770 
4771             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4772             tcg_gen_andi_i32(t, t, 1u << maskbit);
4773             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4774             tcg_temp_free_i32(t);
4775 
4776             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4777             set_disas_label(s, over);
4778         }
4779     }
4780 
4781     if (!ri) {
4782         /*
4783          * Unknown register; this might be a guest error or a QEMU
4784          * unimplemented feature.
4785          */
4786         if (is64) {
4787             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4788                           "64 bit system register cp:%d opc1: %d crm:%d "
4789                           "(%s)\n",
4790                           isread ? "read" : "write", cpnum, opc1, crm,
4791                           s->ns ? "non-secure" : "secure");
4792         } else {
4793             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4794                           "system register cp:%d opc1:%d crn:%d crm:%d "
4795                           "opc2:%d (%s)\n",
4796                           isread ? "read" : "write", cpnum, opc1, crn,
4797                           crm, opc2, s->ns ? "non-secure" : "secure");
4798         }
4799         unallocated_encoding(s);
4800         return;
4801     }
4802 
4803     /* Check access permissions */
4804     if (!cp_access_ok(s->current_el, ri, isread)) {
4805         unallocated_encoding(s);
4806         return;
4807     }
4808 
4809     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4810         (ri->fgt && s->fgt_active) ||
4811         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4812         /*
4813          * Emit code to perform further access permissions checks at
4814          * runtime; this may result in an exception.
4815          * Note that on XScale all cp0..c13 registers do an access check
4816          * call in order to handle c15_cpar.
4817          */
4818         gen_set_condexec(s);
4819         gen_update_pc(s, 0);
4820         tcg_ri = tcg_temp_new_ptr();
4821         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4822                                        tcg_constant_i32(key),
4823                                        tcg_constant_i32(syndrome),
4824                                        tcg_constant_i32(isread));
4825     } else if (ri->type & ARM_CP_RAISES_EXC) {
4826         /*
4827          * The readfn or writefn might raise an exception;
4828          * synchronize the CPU state in case it does.
4829          */
4830         gen_set_condexec(s);
4831         gen_update_pc(s, 0);
4832     }
4833 
4834     /* Handle special cases first */
4835     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4836     case 0:
4837         break;
4838     case ARM_CP_NOP:
4839         goto exit;
4840     case ARM_CP_WFI:
4841         if (isread) {
4842             unallocated_encoding(s);
4843         } else {
4844             gen_update_pc(s, curr_insn_len(s));
4845             s->base.is_jmp = DISAS_WFI;
4846         }
4847         goto exit;
4848     default:
4849         g_assert_not_reached();
4850     }
4851 
4852     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4853         gen_io_start();
4854     }
4855 
4856     if (isread) {
4857         /* Read */
4858         if (is64) {
4859             TCGv_i64 tmp64;
4860             TCGv_i32 tmp;
4861             if (ri->type & ARM_CP_CONST) {
4862                 tmp64 = tcg_constant_i64(ri->resetvalue);
4863             } else if (ri->readfn) {
4864                 if (!tcg_ri) {
4865                     tcg_ri = gen_lookup_cp_reg(key);
4866                 }
4867                 tmp64 = tcg_temp_new_i64();
4868                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4869             } else {
4870                 tmp64 = tcg_temp_new_i64();
4871                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4872             }
4873             tmp = tcg_temp_new_i32();
4874             tcg_gen_extrl_i64_i32(tmp, tmp64);
4875             store_reg(s, rt, tmp);
4876             tmp = tcg_temp_new_i32();
4877             tcg_gen_extrh_i64_i32(tmp, tmp64);
4878             tcg_temp_free_i64(tmp64);
4879             store_reg(s, rt2, tmp);
4880         } else {
4881             TCGv_i32 tmp;
4882             if (ri->type & ARM_CP_CONST) {
4883                 tmp = tcg_constant_i32(ri->resetvalue);
4884             } else if (ri->readfn) {
4885                 if (!tcg_ri) {
4886                     tcg_ri = gen_lookup_cp_reg(key);
4887                 }
4888                 tmp = tcg_temp_new_i32();
4889                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4890             } else {
4891                 tmp = load_cpu_offset(ri->fieldoffset);
4892             }
4893             if (rt == 15) {
4894                 /* Destination register of r15 for 32 bit loads sets
4895                  * the condition codes from the high 4 bits of the value
4896                  */
4897                 gen_set_nzcv(tmp);
4898                 tcg_temp_free_i32(tmp);
4899             } else {
4900                 store_reg(s, rt, tmp);
4901             }
4902         }
4903     } else {
4904         /* Write */
4905         if (ri->type & ARM_CP_CONST) {
4906             /* If not forbidden by access permissions, treat as WI */
4907             goto exit;
4908         }
4909 
4910         if (is64) {
4911             TCGv_i32 tmplo, tmphi;
4912             TCGv_i64 tmp64 = tcg_temp_new_i64();
4913             tmplo = load_reg(s, rt);
4914             tmphi = load_reg(s, rt2);
4915             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4916             tcg_temp_free_i32(tmplo);
4917             tcg_temp_free_i32(tmphi);
4918             if (ri->writefn) {
4919                 if (!tcg_ri) {
4920                     tcg_ri = gen_lookup_cp_reg(key);
4921                 }
4922                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4923             } else {
4924                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4925             }
4926             tcg_temp_free_i64(tmp64);
4927         } else {
4928             TCGv_i32 tmp = load_reg(s, rt);
4929             if (ri->writefn) {
4930                 if (!tcg_ri) {
4931                     tcg_ri = gen_lookup_cp_reg(key);
4932                 }
4933                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4934                 tcg_temp_free_i32(tmp);
4935             } else {
4936                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4937             }
4938         }
4939     }
4940 
4941     /* I/O operations must end the TB here (whether read or write) */
4942     need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4943                     (ri->type & ARM_CP_IO));
4944 
4945     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4946         /*
4947          * A write to any coprocessor register that ends a TB
4948          * must rebuild the hflags for the next TB.
4949          */
4950         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4951         /*
4952          * We default to ending the TB on a coprocessor register write,
4953          * but allow this to be suppressed by the register definition
4954          * (usually only necessary to work around guest bugs).
4955          */
4956         need_exit_tb = true;
4957     }
4958     if (need_exit_tb) {
4959         gen_lookup_tb(s);
4960     }
4961 
4962  exit:
4963     if (tcg_ri) {
4964         tcg_temp_free_ptr(tcg_ri);
4965     }
4966 }
4967 
4968 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4969 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4970 {
4971     int cpnum = (insn >> 8) & 0xf;
4972 
4973     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4974         unallocated_encoding(s);
4975     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4976         if (disas_iwmmxt_insn(s, insn)) {
4977             unallocated_encoding(s);
4978         }
4979     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4980         if (disas_dsp_insn(s, insn)) {
4981             unallocated_encoding(s);
4982         }
4983     }
4984 }
4985 
4986 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4987 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4988 {
4989     TCGv_i32 tmp;
4990     tmp = tcg_temp_new_i32();
4991     tcg_gen_extrl_i64_i32(tmp, val);
4992     store_reg(s, rlow, tmp);
4993     tmp = tcg_temp_new_i32();
4994     tcg_gen_extrh_i64_i32(tmp, val);
4995     store_reg(s, rhigh, tmp);
4996 }
4997 
4998 /* load and add a 64-bit value from a register pair.  */
4999 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
5000 {
5001     TCGv_i64 tmp;
5002     TCGv_i32 tmpl;
5003     TCGv_i32 tmph;
5004 
5005     /* Load 64-bit value rd:rn.  */
5006     tmpl = load_reg(s, rlow);
5007     tmph = load_reg(s, rhigh);
5008     tmp = tcg_temp_new_i64();
5009     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5010     tcg_temp_free_i32(tmpl);
5011     tcg_temp_free_i32(tmph);
5012     tcg_gen_add_i64(val, val, tmp);
5013     tcg_temp_free_i64(tmp);
5014 }
5015 
5016 /* Set N and Z flags from hi|lo.  */
5017 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
5018 {
5019     tcg_gen_mov_i32(cpu_NF, hi);
5020     tcg_gen_or_i32(cpu_ZF, lo, hi);
5021 }
5022 
5023 /* Load/Store exclusive instructions are implemented by remembering
5024    the value/address loaded, and seeing if these are the same
5025    when the store is performed.  This should be sufficient to implement
5026    the architecturally mandated semantics, and avoids having to monitor
5027    regular stores.  The compare vs the remembered value is done during
5028    the cmpxchg operation, but we must compare the addresses manually.  */
5029 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
5030                                TCGv_i32 addr, int size)
5031 {
5032     TCGv_i32 tmp = tcg_temp_new_i32();
5033     MemOp opc = size | MO_ALIGN | s->be_data;
5034 
5035     s->is_ldex = true;
5036 
5037     if (size == 3) {
5038         TCGv_i32 tmp2 = tcg_temp_new_i32();
5039         TCGv_i64 t64 = tcg_temp_new_i64();
5040 
5041         /*
5042          * For AArch32, architecturally the 32-bit word at the lowest
5043          * address is always Rt and the one at addr+4 is Rt2, even if
5044          * the CPU is big-endian. That means we don't want to do a
5045          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
5046          * architecturally 64-bit access, but instead do a 64-bit access
5047          * using MO_BE if appropriate and then split the two halves.
5048          */
5049         TCGv taddr = gen_aa32_addr(s, addr, opc);
5050 
5051         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
5052         tcg_temp_free(taddr);
5053         tcg_gen_mov_i64(cpu_exclusive_val, t64);
5054         if (s->be_data == MO_BE) {
5055             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5056         } else {
5057             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5058         }
5059         tcg_temp_free_i64(t64);
5060 
5061         store_reg(s, rt2, tmp2);
5062     } else {
5063         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5064         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5065     }
5066 
5067     store_reg(s, rt, tmp);
5068     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5069 }
5070 
5071 static void gen_clrex(DisasContext *s)
5072 {
5073     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5074 }
5075 
5076 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5077                                 TCGv_i32 addr, int size)
5078 {
5079     TCGv_i32 t0, t1, t2;
5080     TCGv_i64 extaddr;
5081     TCGv taddr;
5082     TCGLabel *done_label;
5083     TCGLabel *fail_label;
5084     MemOp opc = size | MO_ALIGN | s->be_data;
5085 
5086     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5087          [addr] = {Rt};
5088          {Rd} = 0;
5089        } else {
5090          {Rd} = 1;
5091        } */
5092     fail_label = gen_new_label();
5093     done_label = gen_new_label();
5094     extaddr = tcg_temp_new_i64();
5095     tcg_gen_extu_i32_i64(extaddr, addr);
5096     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5097     tcg_temp_free_i64(extaddr);
5098 
5099     taddr = gen_aa32_addr(s, addr, opc);
5100     t0 = tcg_temp_new_i32();
5101     t1 = load_reg(s, rt);
5102     if (size == 3) {
5103         TCGv_i64 o64 = tcg_temp_new_i64();
5104         TCGv_i64 n64 = tcg_temp_new_i64();
5105 
5106         t2 = load_reg(s, rt2);
5107 
5108         /*
5109          * For AArch32, architecturally the 32-bit word at the lowest
5110          * address is always Rt and the one at addr+4 is Rt2, even if
5111          * the CPU is big-endian. Since we're going to treat this as a
5112          * single 64-bit BE store, we need to put the two halves in the
5113          * opposite order for BE to LE, so that they end up in the right
5114          * places.  We don't want gen_aa32_st_i64, because that checks
5115          * SCTLR_B as if for an architectural 64-bit access.
5116          */
5117         if (s->be_data == MO_BE) {
5118             tcg_gen_concat_i32_i64(n64, t2, t1);
5119         } else {
5120             tcg_gen_concat_i32_i64(n64, t1, t2);
5121         }
5122         tcg_temp_free_i32(t2);
5123 
5124         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5125                                    get_mem_index(s), opc);
5126         tcg_temp_free_i64(n64);
5127 
5128         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5129         tcg_gen_extrl_i64_i32(t0, o64);
5130 
5131         tcg_temp_free_i64(o64);
5132     } else {
5133         t2 = tcg_temp_new_i32();
5134         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5135         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5136         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5137         tcg_temp_free_i32(t2);
5138     }
5139     tcg_temp_free_i32(t1);
5140     tcg_temp_free(taddr);
5141     tcg_gen_mov_i32(cpu_R[rd], t0);
5142     tcg_temp_free_i32(t0);
5143     tcg_gen_br(done_label);
5144 
5145     gen_set_label(fail_label);
5146     tcg_gen_movi_i32(cpu_R[rd], 1);
5147     gen_set_label(done_label);
5148     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5149 }
5150 
5151 /* gen_srs:
5152  * @env: CPUARMState
5153  * @s: DisasContext
5154  * @mode: mode field from insn (which stack to store to)
5155  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5156  * @writeback: true if writeback bit set
5157  *
5158  * Generate code for the SRS (Store Return State) insn.
5159  */
5160 static void gen_srs(DisasContext *s,
5161                     uint32_t mode, uint32_t amode, bool writeback)
5162 {
5163     int32_t offset;
5164     TCGv_i32 addr, tmp;
5165     bool undef = false;
5166 
5167     /* SRS is:
5168      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5169      *   and specified mode is monitor mode
5170      * - UNDEFINED in Hyp mode
5171      * - UNPREDICTABLE in User or System mode
5172      * - UNPREDICTABLE if the specified mode is:
5173      * -- not implemented
5174      * -- not a valid mode number
5175      * -- a mode that's at a higher exception level
5176      * -- Monitor, if we are Non-secure
5177      * For the UNPREDICTABLE cases we choose to UNDEF.
5178      */
5179     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5180         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5181         return;
5182     }
5183 
5184     if (s->current_el == 0 || s->current_el == 2) {
5185         undef = true;
5186     }
5187 
5188     switch (mode) {
5189     case ARM_CPU_MODE_USR:
5190     case ARM_CPU_MODE_FIQ:
5191     case ARM_CPU_MODE_IRQ:
5192     case ARM_CPU_MODE_SVC:
5193     case ARM_CPU_MODE_ABT:
5194     case ARM_CPU_MODE_UND:
5195     case ARM_CPU_MODE_SYS:
5196         break;
5197     case ARM_CPU_MODE_HYP:
5198         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5199             undef = true;
5200         }
5201         break;
5202     case ARM_CPU_MODE_MON:
5203         /* No need to check specifically for "are we non-secure" because
5204          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5205          * so if this isn't EL3 then we must be non-secure.
5206          */
5207         if (s->current_el != 3) {
5208             undef = true;
5209         }
5210         break;
5211     default:
5212         undef = true;
5213     }
5214 
5215     if (undef) {
5216         unallocated_encoding(s);
5217         return;
5218     }
5219 
5220     addr = tcg_temp_new_i32();
5221     /* get_r13_banked() will raise an exception if called from System mode */
5222     gen_set_condexec(s);
5223     gen_update_pc(s, 0);
5224     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5225     switch (amode) {
5226     case 0: /* DA */
5227         offset = -4;
5228         break;
5229     case 1: /* IA */
5230         offset = 0;
5231         break;
5232     case 2: /* DB */
5233         offset = -8;
5234         break;
5235     case 3: /* IB */
5236         offset = 4;
5237         break;
5238     default:
5239         g_assert_not_reached();
5240     }
5241     tcg_gen_addi_i32(addr, addr, offset);
5242     tmp = load_reg(s, 14);
5243     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5244     tcg_temp_free_i32(tmp);
5245     tmp = load_cpu_field(spsr);
5246     tcg_gen_addi_i32(addr, addr, 4);
5247     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5248     tcg_temp_free_i32(tmp);
5249     if (writeback) {
5250         switch (amode) {
5251         case 0:
5252             offset = -8;
5253             break;
5254         case 1:
5255             offset = 4;
5256             break;
5257         case 2:
5258             offset = -4;
5259             break;
5260         case 3:
5261             offset = 0;
5262             break;
5263         default:
5264             g_assert_not_reached();
5265         }
5266         tcg_gen_addi_i32(addr, addr, offset);
5267         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5268     }
5269     tcg_temp_free_i32(addr);
5270     s->base.is_jmp = DISAS_UPDATE_EXIT;
5271 }
5272 
5273 /* Skip this instruction if the ARM condition is false */
5274 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5275 {
5276     arm_gen_condlabel(s);
5277     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5278 }
5279 
5280 
5281 /*
5282  * Constant expanders used by T16/T32 decode
5283  */
5284 
5285 /* Return only the rotation part of T32ExpandImm.  */
5286 static int t32_expandimm_rot(DisasContext *s, int x)
5287 {
5288     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5289 }
5290 
5291 /* Return the unrotated immediate from T32ExpandImm.  */
5292 static int t32_expandimm_imm(DisasContext *s, int x)
5293 {
5294     int imm = extract32(x, 0, 8);
5295 
5296     switch (extract32(x, 8, 4)) {
5297     case 0: /* XY */
5298         /* Nothing to do.  */
5299         break;
5300     case 1: /* 00XY00XY */
5301         imm *= 0x00010001;
5302         break;
5303     case 2: /* XY00XY00 */
5304         imm *= 0x01000100;
5305         break;
5306     case 3: /* XYXYXYXY */
5307         imm *= 0x01010101;
5308         break;
5309     default:
5310         /* Rotated constant.  */
5311         imm |= 0x80;
5312         break;
5313     }
5314     return imm;
5315 }
5316 
5317 static int t32_branch24(DisasContext *s, int x)
5318 {
5319     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5320     x ^= !(x < 0) * (3 << 21);
5321     /* Append the final zero.  */
5322     return x << 1;
5323 }
5324 
5325 static int t16_setflags(DisasContext *s)
5326 {
5327     return s->condexec_mask == 0;
5328 }
5329 
5330 static int t16_push_list(DisasContext *s, int x)
5331 {
5332     return (x & 0xff) | (x & 0x100) << (14 - 8);
5333 }
5334 
5335 static int t16_pop_list(DisasContext *s, int x)
5336 {
5337     return (x & 0xff) | (x & 0x100) << (15 - 8);
5338 }
5339 
5340 /*
5341  * Include the generated decoders.
5342  */
5343 
5344 #include "decode-a32.c.inc"
5345 #include "decode-a32-uncond.c.inc"
5346 #include "decode-t32.c.inc"
5347 #include "decode-t16.c.inc"
5348 
5349 static bool valid_cp(DisasContext *s, int cp)
5350 {
5351     /*
5352      * Return true if this coprocessor field indicates something
5353      * that's really a possible coprocessor.
5354      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5355      * and of those only cp14 and cp15 were used for registers.
5356      * cp10 and cp11 were used for VFP and Neon, whose decode is
5357      * dealt with elsewhere. With the advent of fp16, cp9 is also
5358      * now part of VFP.
5359      * For v8A and later, the encoding has been tightened so that
5360      * only cp14 and cp15 are valid, and other values aren't considered
5361      * to be in the coprocessor-instruction space at all. v8M still
5362      * permits coprocessors 0..7.
5363      * For XScale, we must not decode the XScale cp0, cp1 space as
5364      * a standard coprocessor insn, because we want to fall through to
5365      * the legacy disas_xscale_insn() decoder after decodetree is done.
5366      */
5367     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5368         return false;
5369     }
5370 
5371     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5372         !arm_dc_feature(s, ARM_FEATURE_M)) {
5373         return cp >= 14;
5374     }
5375     return cp < 8 || cp >= 14;
5376 }
5377 
5378 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5379 {
5380     if (!valid_cp(s, a->cp)) {
5381         return false;
5382     }
5383     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5384                    false, a->rt, 0);
5385     return true;
5386 }
5387 
5388 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5389 {
5390     if (!valid_cp(s, a->cp)) {
5391         return false;
5392     }
5393     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5394                    true, a->rt, 0);
5395     return true;
5396 }
5397 
5398 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5399 {
5400     if (!valid_cp(s, a->cp)) {
5401         return false;
5402     }
5403     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5404                    false, a->rt, a->rt2);
5405     return true;
5406 }
5407 
5408 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5409 {
5410     if (!valid_cp(s, a->cp)) {
5411         return false;
5412     }
5413     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5414                    true, a->rt, a->rt2);
5415     return true;
5416 }
5417 
5418 /* Helpers to swap operands for reverse-subtract.  */
5419 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5420 {
5421     tcg_gen_sub_i32(dst, b, a);
5422 }
5423 
5424 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5425 {
5426     gen_sub_CC(dst, b, a);
5427 }
5428 
5429 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5430 {
5431     gen_sub_carry(dest, b, a);
5432 }
5433 
5434 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5435 {
5436     gen_sbc_CC(dest, b, a);
5437 }
5438 
5439 /*
5440  * Helpers for the data processing routines.
5441  *
5442  * After the computation store the results back.
5443  * This may be suppressed altogether (STREG_NONE), require a runtime
5444  * check against the stack limits (STREG_SP_CHECK), or generate an
5445  * exception return.  Oh, or store into a register.
5446  *
5447  * Always return true, indicating success for a trans_* function.
5448  */
5449 typedef enum {
5450    STREG_NONE,
5451    STREG_NORMAL,
5452    STREG_SP_CHECK,
5453    STREG_EXC_RET,
5454 } StoreRegKind;
5455 
5456 static bool store_reg_kind(DisasContext *s, int rd,
5457                             TCGv_i32 val, StoreRegKind kind)
5458 {
5459     switch (kind) {
5460     case STREG_NONE:
5461         tcg_temp_free_i32(val);
5462         return true;
5463     case STREG_NORMAL:
5464         /* See ALUWritePC: Interworking only from a32 mode. */
5465         if (s->thumb) {
5466             store_reg(s, rd, val);
5467         } else {
5468             store_reg_bx(s, rd, val);
5469         }
5470         return true;
5471     case STREG_SP_CHECK:
5472         store_sp_checked(s, val);
5473         return true;
5474     case STREG_EXC_RET:
5475         gen_exception_return(s, val);
5476         return true;
5477     }
5478     g_assert_not_reached();
5479 }
5480 
5481 /*
5482  * Data Processing (register)
5483  *
5484  * Operate, with set flags, one register source,
5485  * one immediate shifted register source, and a destination.
5486  */
5487 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5488                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5489                          int logic_cc, StoreRegKind kind)
5490 {
5491     TCGv_i32 tmp1, tmp2;
5492 
5493     tmp2 = load_reg(s, a->rm);
5494     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5495     tmp1 = load_reg(s, a->rn);
5496 
5497     gen(tmp1, tmp1, tmp2);
5498     tcg_temp_free_i32(tmp2);
5499 
5500     if (logic_cc) {
5501         gen_logic_CC(tmp1);
5502     }
5503     return store_reg_kind(s, a->rd, tmp1, kind);
5504 }
5505 
5506 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5507                          void (*gen)(TCGv_i32, TCGv_i32),
5508                          int logic_cc, StoreRegKind kind)
5509 {
5510     TCGv_i32 tmp;
5511 
5512     tmp = load_reg(s, a->rm);
5513     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5514 
5515     gen(tmp, tmp);
5516     if (logic_cc) {
5517         gen_logic_CC(tmp);
5518     }
5519     return store_reg_kind(s, a->rd, tmp, kind);
5520 }
5521 
5522 /*
5523  * Data-processing (register-shifted register)
5524  *
5525  * Operate, with set flags, one register source,
5526  * one register shifted register source, and a destination.
5527  */
5528 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5529                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5530                          int logic_cc, StoreRegKind kind)
5531 {
5532     TCGv_i32 tmp1, tmp2;
5533 
5534     tmp1 = load_reg(s, a->rs);
5535     tmp2 = load_reg(s, a->rm);
5536     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5537     tmp1 = load_reg(s, a->rn);
5538 
5539     gen(tmp1, tmp1, tmp2);
5540     tcg_temp_free_i32(tmp2);
5541 
5542     if (logic_cc) {
5543         gen_logic_CC(tmp1);
5544     }
5545     return store_reg_kind(s, a->rd, tmp1, kind);
5546 }
5547 
5548 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5549                          void (*gen)(TCGv_i32, TCGv_i32),
5550                          int logic_cc, StoreRegKind kind)
5551 {
5552     TCGv_i32 tmp1, tmp2;
5553 
5554     tmp1 = load_reg(s, a->rs);
5555     tmp2 = load_reg(s, a->rm);
5556     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5557 
5558     gen(tmp2, tmp2);
5559     if (logic_cc) {
5560         gen_logic_CC(tmp2);
5561     }
5562     return store_reg_kind(s, a->rd, tmp2, kind);
5563 }
5564 
5565 /*
5566  * Data-processing (immediate)
5567  *
5568  * Operate, with set flags, one register source,
5569  * one rotated immediate, and a destination.
5570  *
5571  * Note that logic_cc && a->rot setting CF based on the msb of the
5572  * immediate is the reason why we must pass in the unrotated form
5573  * of the immediate.
5574  */
5575 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5576                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5577                          int logic_cc, StoreRegKind kind)
5578 {
5579     TCGv_i32 tmp1;
5580     uint32_t imm;
5581 
5582     imm = ror32(a->imm, a->rot);
5583     if (logic_cc && a->rot) {
5584         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5585     }
5586     tmp1 = load_reg(s, a->rn);
5587 
5588     gen(tmp1, tmp1, tcg_constant_i32(imm));
5589 
5590     if (logic_cc) {
5591         gen_logic_CC(tmp1);
5592     }
5593     return store_reg_kind(s, a->rd, tmp1, kind);
5594 }
5595 
5596 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5597                          void (*gen)(TCGv_i32, TCGv_i32),
5598                          int logic_cc, StoreRegKind kind)
5599 {
5600     TCGv_i32 tmp;
5601     uint32_t imm;
5602 
5603     imm = ror32(a->imm, a->rot);
5604     if (logic_cc && a->rot) {
5605         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5606     }
5607 
5608     tmp = tcg_temp_new_i32();
5609     gen(tmp, tcg_constant_i32(imm));
5610 
5611     if (logic_cc) {
5612         gen_logic_CC(tmp);
5613     }
5614     return store_reg_kind(s, a->rd, tmp, kind);
5615 }
5616 
5617 #define DO_ANY3(NAME, OP, L, K)                                         \
5618     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5619     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5620     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5621     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5622     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5623     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5624 
5625 #define DO_ANY2(NAME, OP, L, K)                                         \
5626     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5627     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5628     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5629     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5630     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5631     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5632 
5633 #define DO_CMP2(NAME, OP, L)                                            \
5634     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5635     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5636     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5637     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5638     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5639     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5640 
5641 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5642 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5643 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5644 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5645 
5646 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5647 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5648 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5649 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5650 
5651 DO_CMP2(TST, tcg_gen_and_i32, true)
5652 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5653 DO_CMP2(CMN, gen_add_CC, false)
5654 DO_CMP2(CMP, gen_sub_CC, false)
5655 
5656 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5657         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5658 
5659 /*
5660  * Note for the computation of StoreRegKind we return out of the
5661  * middle of the functions that are expanded by DO_ANY3, and that
5662  * we modify a->s via that parameter before it is used by OP.
5663  */
5664 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5665         ({
5666             StoreRegKind ret = STREG_NORMAL;
5667             if (a->rd == 15 && a->s) {
5668                 /*
5669                  * See ALUExceptionReturn:
5670                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5671                  * In Hyp mode, UNDEFINED.
5672                  */
5673                 if (IS_USER(s) || s->current_el == 2) {
5674                     unallocated_encoding(s);
5675                     return true;
5676                 }
5677                 /* There is no writeback of nzcv to PSTATE.  */
5678                 a->s = 0;
5679                 ret = STREG_EXC_RET;
5680             } else if (a->rd == 13 && a->rn == 13) {
5681                 ret = STREG_SP_CHECK;
5682             }
5683             ret;
5684         }))
5685 
5686 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5687         ({
5688             StoreRegKind ret = STREG_NORMAL;
5689             if (a->rd == 15 && a->s) {
5690                 /*
5691                  * See ALUExceptionReturn:
5692                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5693                  * In Hyp mode, UNDEFINED.
5694                  */
5695                 if (IS_USER(s) || s->current_el == 2) {
5696                     unallocated_encoding(s);
5697                     return true;
5698                 }
5699                 /* There is no writeback of nzcv to PSTATE.  */
5700                 a->s = 0;
5701                 ret = STREG_EXC_RET;
5702             } else if (a->rd == 13) {
5703                 ret = STREG_SP_CHECK;
5704             }
5705             ret;
5706         }))
5707 
5708 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5709 
5710 /*
5711  * ORN is only available with T32, so there is no register-shifted-register
5712  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5713  */
5714 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5715 {
5716     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5717 }
5718 
5719 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5720 {
5721     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5722 }
5723 
5724 #undef DO_ANY3
5725 #undef DO_ANY2
5726 #undef DO_CMP2
5727 
5728 static bool trans_ADR(DisasContext *s, arg_ri *a)
5729 {
5730     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5731     return true;
5732 }
5733 
5734 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5735 {
5736     if (!ENABLE_ARCH_6T2) {
5737         return false;
5738     }
5739 
5740     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5741     return true;
5742 }
5743 
5744 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5745 {
5746     TCGv_i32 tmp;
5747 
5748     if (!ENABLE_ARCH_6T2) {
5749         return false;
5750     }
5751 
5752     tmp = load_reg(s, a->rd);
5753     tcg_gen_ext16u_i32(tmp, tmp);
5754     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5755     store_reg(s, a->rd, tmp);
5756     return true;
5757 }
5758 
5759 /*
5760  * v8.1M MVE wide-shifts
5761  */
5762 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5763                           WideShiftImmFn *fn)
5764 {
5765     TCGv_i64 rda;
5766     TCGv_i32 rdalo, rdahi;
5767 
5768     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5769         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5770         return false;
5771     }
5772     if (a->rdahi == 15) {
5773         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5774         return false;
5775     }
5776     if (!dc_isar_feature(aa32_mve, s) ||
5777         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5778         a->rdahi == 13) {
5779         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5780         unallocated_encoding(s);
5781         return true;
5782     }
5783 
5784     if (a->shim == 0) {
5785         a->shim = 32;
5786     }
5787 
5788     rda = tcg_temp_new_i64();
5789     rdalo = load_reg(s, a->rdalo);
5790     rdahi = load_reg(s, a->rdahi);
5791     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5792 
5793     fn(rda, rda, a->shim);
5794 
5795     tcg_gen_extrl_i64_i32(rdalo, rda);
5796     tcg_gen_extrh_i64_i32(rdahi, rda);
5797     store_reg(s, a->rdalo, rdalo);
5798     store_reg(s, a->rdahi, rdahi);
5799     tcg_temp_free_i64(rda);
5800 
5801     return true;
5802 }
5803 
5804 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5805 {
5806     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5807 }
5808 
5809 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5810 {
5811     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5812 }
5813 
5814 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5815 {
5816     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5817 }
5818 
5819 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5820 {
5821     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5822 }
5823 
5824 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5825 {
5826     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5827 }
5828 
5829 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5830 {
5831     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5832 }
5833 
5834 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5835 {
5836     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5837 }
5838 
5839 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5840 {
5841     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5842 }
5843 
5844 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5845 {
5846     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5847 }
5848 
5849 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5850 {
5851     TCGv_i64 rda;
5852     TCGv_i32 rdalo, rdahi;
5853 
5854     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5855         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5856         return false;
5857     }
5858     if (a->rdahi == 15) {
5859         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5860         return false;
5861     }
5862     if (!dc_isar_feature(aa32_mve, s) ||
5863         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5864         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5865         a->rm == a->rdahi || a->rm == a->rdalo) {
5866         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5867         unallocated_encoding(s);
5868         return true;
5869     }
5870 
5871     rda = tcg_temp_new_i64();
5872     rdalo = load_reg(s, a->rdalo);
5873     rdahi = load_reg(s, a->rdahi);
5874     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5875 
5876     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5877     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5878 
5879     tcg_gen_extrl_i64_i32(rdalo, rda);
5880     tcg_gen_extrh_i64_i32(rdahi, rda);
5881     store_reg(s, a->rdalo, rdalo);
5882     store_reg(s, a->rdahi, rdahi);
5883     tcg_temp_free_i64(rda);
5884 
5885     return true;
5886 }
5887 
5888 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5889 {
5890     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5891 }
5892 
5893 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5894 {
5895     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5896 }
5897 
5898 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5899 {
5900     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5901 }
5902 
5903 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5904 {
5905     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5906 }
5907 
5908 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5909 {
5910     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5911 }
5912 
5913 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5914 {
5915     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5916 }
5917 
5918 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5919 {
5920     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5921         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5922         return false;
5923     }
5924     if (!dc_isar_feature(aa32_mve, s) ||
5925         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5926         a->rda == 13 || a->rda == 15) {
5927         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5928         unallocated_encoding(s);
5929         return true;
5930     }
5931 
5932     if (a->shim == 0) {
5933         a->shim = 32;
5934     }
5935     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5936 
5937     return true;
5938 }
5939 
5940 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5941 {
5942     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5943 }
5944 
5945 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5946 {
5947     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5948 }
5949 
5950 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5951 {
5952     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5953 }
5954 
5955 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5956 {
5957     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5958 }
5959 
5960 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5961 {
5962     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5963 }
5964 
5965 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5966 {
5967     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5968 }
5969 
5970 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5971 {
5972     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5973         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5974         return false;
5975     }
5976     if (!dc_isar_feature(aa32_mve, s) ||
5977         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5978         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5979         a->rm == a->rda) {
5980         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5981         unallocated_encoding(s);
5982         return true;
5983     }
5984 
5985     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5986     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5987     return true;
5988 }
5989 
5990 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5991 {
5992     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5993 }
5994 
5995 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5996 {
5997     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5998 }
5999 
6000 /*
6001  * Multiply and multiply accumulate
6002  */
6003 
6004 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
6005 {
6006     TCGv_i32 t1, t2;
6007 
6008     t1 = load_reg(s, a->rn);
6009     t2 = load_reg(s, a->rm);
6010     tcg_gen_mul_i32(t1, t1, t2);
6011     tcg_temp_free_i32(t2);
6012     if (add) {
6013         t2 = load_reg(s, a->ra);
6014         tcg_gen_add_i32(t1, t1, t2);
6015         tcg_temp_free_i32(t2);
6016     }
6017     if (a->s) {
6018         gen_logic_CC(t1);
6019     }
6020     store_reg(s, a->rd, t1);
6021     return true;
6022 }
6023 
6024 static bool trans_MUL(DisasContext *s, arg_MUL *a)
6025 {
6026     return op_mla(s, a, false);
6027 }
6028 
6029 static bool trans_MLA(DisasContext *s, arg_MLA *a)
6030 {
6031     return op_mla(s, a, true);
6032 }
6033 
6034 static bool trans_MLS(DisasContext *s, arg_MLS *a)
6035 {
6036     TCGv_i32 t1, t2;
6037 
6038     if (!ENABLE_ARCH_6T2) {
6039         return false;
6040     }
6041     t1 = load_reg(s, a->rn);
6042     t2 = load_reg(s, a->rm);
6043     tcg_gen_mul_i32(t1, t1, t2);
6044     tcg_temp_free_i32(t2);
6045     t2 = load_reg(s, a->ra);
6046     tcg_gen_sub_i32(t1, t2, t1);
6047     tcg_temp_free_i32(t2);
6048     store_reg(s, a->rd, t1);
6049     return true;
6050 }
6051 
6052 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
6053 {
6054     TCGv_i32 t0, t1, t2, t3;
6055 
6056     t0 = load_reg(s, a->rm);
6057     t1 = load_reg(s, a->rn);
6058     if (uns) {
6059         tcg_gen_mulu2_i32(t0, t1, t0, t1);
6060     } else {
6061         tcg_gen_muls2_i32(t0, t1, t0, t1);
6062     }
6063     if (add) {
6064         t2 = load_reg(s, a->ra);
6065         t3 = load_reg(s, a->rd);
6066         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6067         tcg_temp_free_i32(t2);
6068         tcg_temp_free_i32(t3);
6069     }
6070     if (a->s) {
6071         gen_logicq_cc(t0, t1);
6072     }
6073     store_reg(s, a->ra, t0);
6074     store_reg(s, a->rd, t1);
6075     return true;
6076 }
6077 
6078 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6079 {
6080     return op_mlal(s, a, true, false);
6081 }
6082 
6083 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6084 {
6085     return op_mlal(s, a, false, false);
6086 }
6087 
6088 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6089 {
6090     return op_mlal(s, a, true, true);
6091 }
6092 
6093 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6094 {
6095     return op_mlal(s, a, false, true);
6096 }
6097 
6098 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6099 {
6100     TCGv_i32 t0, t1, t2, zero;
6101 
6102     if (s->thumb
6103         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6104         : !ENABLE_ARCH_6) {
6105         return false;
6106     }
6107 
6108     t0 = load_reg(s, a->rm);
6109     t1 = load_reg(s, a->rn);
6110     tcg_gen_mulu2_i32(t0, t1, t0, t1);
6111     zero = tcg_constant_i32(0);
6112     t2 = load_reg(s, a->ra);
6113     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6114     tcg_temp_free_i32(t2);
6115     t2 = load_reg(s, a->rd);
6116     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6117     tcg_temp_free_i32(t2);
6118     store_reg(s, a->ra, t0);
6119     store_reg(s, a->rd, t1);
6120     return true;
6121 }
6122 
6123 /*
6124  * Saturating addition and subtraction
6125  */
6126 
6127 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6128 {
6129     TCGv_i32 t0, t1;
6130 
6131     if (s->thumb
6132         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6133         : !ENABLE_ARCH_5TE) {
6134         return false;
6135     }
6136 
6137     t0 = load_reg(s, a->rm);
6138     t1 = load_reg(s, a->rn);
6139     if (doub) {
6140         gen_helper_add_saturate(t1, cpu_env, t1, t1);
6141     }
6142     if (add) {
6143         gen_helper_add_saturate(t0, cpu_env, t0, t1);
6144     } else {
6145         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6146     }
6147     tcg_temp_free_i32(t1);
6148     store_reg(s, a->rd, t0);
6149     return true;
6150 }
6151 
6152 #define DO_QADDSUB(NAME, ADD, DOUB) \
6153 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6154 {                                                        \
6155     return op_qaddsub(s, a, ADD, DOUB);                  \
6156 }
6157 
6158 DO_QADDSUB(QADD, true, false)
6159 DO_QADDSUB(QSUB, false, false)
6160 DO_QADDSUB(QDADD, true, true)
6161 DO_QADDSUB(QDSUB, false, true)
6162 
6163 #undef DO_QADDSUB
6164 
6165 /*
6166  * Halfword multiply and multiply accumulate
6167  */
6168 
6169 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6170                        int add_long, bool nt, bool mt)
6171 {
6172     TCGv_i32 t0, t1, tl, th;
6173 
6174     if (s->thumb
6175         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6176         : !ENABLE_ARCH_5TE) {
6177         return false;
6178     }
6179 
6180     t0 = load_reg(s, a->rn);
6181     t1 = load_reg(s, a->rm);
6182     gen_mulxy(t0, t1, nt, mt);
6183     tcg_temp_free_i32(t1);
6184 
6185     switch (add_long) {
6186     case 0:
6187         store_reg(s, a->rd, t0);
6188         break;
6189     case 1:
6190         t1 = load_reg(s, a->ra);
6191         gen_helper_add_setq(t0, cpu_env, t0, t1);
6192         tcg_temp_free_i32(t1);
6193         store_reg(s, a->rd, t0);
6194         break;
6195     case 2:
6196         tl = load_reg(s, a->ra);
6197         th = load_reg(s, a->rd);
6198         /* Sign-extend the 32-bit product to 64 bits.  */
6199         t1 = tcg_temp_new_i32();
6200         tcg_gen_sari_i32(t1, t0, 31);
6201         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6202         tcg_temp_free_i32(t0);
6203         tcg_temp_free_i32(t1);
6204         store_reg(s, a->ra, tl);
6205         store_reg(s, a->rd, th);
6206         break;
6207     default:
6208         g_assert_not_reached();
6209     }
6210     return true;
6211 }
6212 
6213 #define DO_SMLAX(NAME, add, nt, mt) \
6214 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6215 {                                                          \
6216     return op_smlaxxx(s, a, add, nt, mt);                  \
6217 }
6218 
6219 DO_SMLAX(SMULBB, 0, 0, 0)
6220 DO_SMLAX(SMULBT, 0, 0, 1)
6221 DO_SMLAX(SMULTB, 0, 1, 0)
6222 DO_SMLAX(SMULTT, 0, 1, 1)
6223 
6224 DO_SMLAX(SMLABB, 1, 0, 0)
6225 DO_SMLAX(SMLABT, 1, 0, 1)
6226 DO_SMLAX(SMLATB, 1, 1, 0)
6227 DO_SMLAX(SMLATT, 1, 1, 1)
6228 
6229 DO_SMLAX(SMLALBB, 2, 0, 0)
6230 DO_SMLAX(SMLALBT, 2, 0, 1)
6231 DO_SMLAX(SMLALTB, 2, 1, 0)
6232 DO_SMLAX(SMLALTT, 2, 1, 1)
6233 
6234 #undef DO_SMLAX
6235 
6236 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6237 {
6238     TCGv_i32 t0, t1;
6239 
6240     if (!ENABLE_ARCH_5TE) {
6241         return false;
6242     }
6243 
6244     t0 = load_reg(s, a->rn);
6245     t1 = load_reg(s, a->rm);
6246     /*
6247      * Since the nominal result is product<47:16>, shift the 16-bit
6248      * input up by 16 bits, so that the result is at product<63:32>.
6249      */
6250     if (mt) {
6251         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6252     } else {
6253         tcg_gen_shli_i32(t1, t1, 16);
6254     }
6255     tcg_gen_muls2_i32(t0, t1, t0, t1);
6256     tcg_temp_free_i32(t0);
6257     if (add) {
6258         t0 = load_reg(s, a->ra);
6259         gen_helper_add_setq(t1, cpu_env, t1, t0);
6260         tcg_temp_free_i32(t0);
6261     }
6262     store_reg(s, a->rd, t1);
6263     return true;
6264 }
6265 
6266 #define DO_SMLAWX(NAME, add, mt) \
6267 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6268 {                                                          \
6269     return op_smlawx(s, a, add, mt);                       \
6270 }
6271 
6272 DO_SMLAWX(SMULWB, 0, 0)
6273 DO_SMLAWX(SMULWT, 0, 1)
6274 DO_SMLAWX(SMLAWB, 1, 0)
6275 DO_SMLAWX(SMLAWT, 1, 1)
6276 
6277 #undef DO_SMLAWX
6278 
6279 /*
6280  * MSR (immediate) and hints
6281  */
6282 
6283 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6284 {
6285     /*
6286      * When running single-threaded TCG code, use the helper to ensure that
6287      * the next round-robin scheduled vCPU gets a crack.  When running in
6288      * MTTCG we don't generate jumps to the helper as it won't affect the
6289      * scheduling of other vCPUs.
6290      */
6291     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6292         gen_update_pc(s, curr_insn_len(s));
6293         s->base.is_jmp = DISAS_YIELD;
6294     }
6295     return true;
6296 }
6297 
6298 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6299 {
6300     /*
6301      * When running single-threaded TCG code, use the helper to ensure that
6302      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6303      * just skip this instruction.  Currently the SEV/SEVL instructions,
6304      * which are *one* of many ways to wake the CPU from WFE, are not
6305      * implemented so we can't sleep like WFI does.
6306      */
6307     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6308         gen_update_pc(s, curr_insn_len(s));
6309         s->base.is_jmp = DISAS_WFE;
6310     }
6311     return true;
6312 }
6313 
6314 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6315 {
6316     /* For WFI, halt the vCPU until an IRQ. */
6317     gen_update_pc(s, curr_insn_len(s));
6318     s->base.is_jmp = DISAS_WFI;
6319     return true;
6320 }
6321 
6322 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6323 {
6324     /*
6325      * For M-profile, minimal-RAS ESB can be a NOP.
6326      * Without RAS, we must implement this as NOP.
6327      */
6328     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6329         /*
6330          * QEMU does not have a source of physical SErrors,
6331          * so we are only concerned with virtual SErrors.
6332          * The pseudocode in the ARM for this case is
6333          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6334          *      AArch32.vESBOperation();
6335          * Most of the condition can be evaluated at translation time.
6336          * Test for EL2 present, and defer test for SEL2 to runtime.
6337          */
6338         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6339             gen_helper_vesb(cpu_env);
6340         }
6341     }
6342     return true;
6343 }
6344 
6345 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6346 {
6347     return true;
6348 }
6349 
6350 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6351 {
6352     uint32_t val = ror32(a->imm, a->rot * 2);
6353     uint32_t mask = msr_mask(s, a->mask, a->r);
6354 
6355     if (gen_set_psr_im(s, mask, a->r, val)) {
6356         unallocated_encoding(s);
6357     }
6358     return true;
6359 }
6360 
6361 /*
6362  * Cyclic Redundancy Check
6363  */
6364 
6365 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6366 {
6367     TCGv_i32 t1, t2, t3;
6368 
6369     if (!dc_isar_feature(aa32_crc32, s)) {
6370         return false;
6371     }
6372 
6373     t1 = load_reg(s, a->rn);
6374     t2 = load_reg(s, a->rm);
6375     switch (sz) {
6376     case MO_8:
6377         gen_uxtb(t2);
6378         break;
6379     case MO_16:
6380         gen_uxth(t2);
6381         break;
6382     case MO_32:
6383         break;
6384     default:
6385         g_assert_not_reached();
6386     }
6387     t3 = tcg_constant_i32(1 << sz);
6388     if (c) {
6389         gen_helper_crc32c(t1, t1, t2, t3);
6390     } else {
6391         gen_helper_crc32(t1, t1, t2, t3);
6392     }
6393     tcg_temp_free_i32(t2);
6394     store_reg(s, a->rd, t1);
6395     return true;
6396 }
6397 
6398 #define DO_CRC32(NAME, c, sz) \
6399 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6400     { return op_crc32(s, a, c, sz); }
6401 
6402 DO_CRC32(CRC32B, false, MO_8)
6403 DO_CRC32(CRC32H, false, MO_16)
6404 DO_CRC32(CRC32W, false, MO_32)
6405 DO_CRC32(CRC32CB, true, MO_8)
6406 DO_CRC32(CRC32CH, true, MO_16)
6407 DO_CRC32(CRC32CW, true, MO_32)
6408 
6409 #undef DO_CRC32
6410 
6411 /*
6412  * Miscellaneous instructions
6413  */
6414 
6415 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6416 {
6417     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6418         return false;
6419     }
6420     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6421     return true;
6422 }
6423 
6424 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6425 {
6426     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6427         return false;
6428     }
6429     gen_msr_banked(s, a->r, a->sysm, a->rn);
6430     return true;
6431 }
6432 
6433 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6434 {
6435     TCGv_i32 tmp;
6436 
6437     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6438         return false;
6439     }
6440     if (a->r) {
6441         if (IS_USER(s)) {
6442             unallocated_encoding(s);
6443             return true;
6444         }
6445         tmp = load_cpu_field(spsr);
6446     } else {
6447         tmp = tcg_temp_new_i32();
6448         gen_helper_cpsr_read(tmp, cpu_env);
6449     }
6450     store_reg(s, a->rd, tmp);
6451     return true;
6452 }
6453 
6454 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6455 {
6456     TCGv_i32 tmp;
6457     uint32_t mask = msr_mask(s, a->mask, a->r);
6458 
6459     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6460         return false;
6461     }
6462     tmp = load_reg(s, a->rn);
6463     if (gen_set_psr(s, mask, a->r, tmp)) {
6464         unallocated_encoding(s);
6465     }
6466     return true;
6467 }
6468 
6469 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6470 {
6471     TCGv_i32 tmp;
6472 
6473     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6474         return false;
6475     }
6476     tmp = tcg_temp_new_i32();
6477     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6478     store_reg(s, a->rd, tmp);
6479     return true;
6480 }
6481 
6482 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6483 {
6484     TCGv_i32 addr, reg;
6485 
6486     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6487         return false;
6488     }
6489     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6490     reg = load_reg(s, a->rn);
6491     gen_helper_v7m_msr(cpu_env, addr, reg);
6492     tcg_temp_free_i32(reg);
6493     /* If we wrote to CONTROL, the EL might have changed */
6494     gen_rebuild_hflags(s, true);
6495     gen_lookup_tb(s);
6496     return true;
6497 }
6498 
6499 static bool trans_BX(DisasContext *s, arg_BX *a)
6500 {
6501     if (!ENABLE_ARCH_4T) {
6502         return false;
6503     }
6504     gen_bx_excret(s, load_reg(s, a->rm));
6505     return true;
6506 }
6507 
6508 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6509 {
6510     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6511         return false;
6512     }
6513     /*
6514      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6515      * TBFLAGS bit on a basically-never-happens case, so call a helper
6516      * function to check for the trap and raise the exception if needed
6517      * (passing it the register number for the syndrome value).
6518      * v8A doesn't have this HSTR bit.
6519      */
6520     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6521         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6522         s->current_el < 2 && s->ns) {
6523         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6524     }
6525     /* Trivial implementation equivalent to bx.  */
6526     gen_bx(s, load_reg(s, a->rm));
6527     return true;
6528 }
6529 
6530 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6531 {
6532     TCGv_i32 tmp;
6533 
6534     if (!ENABLE_ARCH_5) {
6535         return false;
6536     }
6537     tmp = load_reg(s, a->rm);
6538     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6539     gen_bx(s, tmp);
6540     return true;
6541 }
6542 
6543 /*
6544  * BXNS/BLXNS: only exist for v8M with the security extensions,
6545  * and always UNDEF if NonSecure.  We don't implement these in
6546  * the user-only mode either (in theory you can use them from
6547  * Secure User mode but they are too tied in to system emulation).
6548  */
6549 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6550 {
6551     if (!s->v8m_secure || IS_USER_ONLY) {
6552         unallocated_encoding(s);
6553     } else {
6554         gen_bxns(s, a->rm);
6555     }
6556     return true;
6557 }
6558 
6559 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6560 {
6561     if (!s->v8m_secure || IS_USER_ONLY) {
6562         unallocated_encoding(s);
6563     } else {
6564         gen_blxns(s, a->rm);
6565     }
6566     return true;
6567 }
6568 
6569 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6570 {
6571     TCGv_i32 tmp;
6572 
6573     if (!ENABLE_ARCH_5) {
6574         return false;
6575     }
6576     tmp = load_reg(s, a->rm);
6577     tcg_gen_clzi_i32(tmp, tmp, 32);
6578     store_reg(s, a->rd, tmp);
6579     return true;
6580 }
6581 
6582 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6583 {
6584     TCGv_i32 tmp;
6585 
6586     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6587         return false;
6588     }
6589     if (IS_USER(s)) {
6590         unallocated_encoding(s);
6591         return true;
6592     }
6593     if (s->current_el == 2) {
6594         /* ERET from Hyp uses ELR_Hyp, not LR */
6595         tmp = load_cpu_field(elr_el[2]);
6596     } else {
6597         tmp = load_reg(s, 14);
6598     }
6599     gen_exception_return(s, tmp);
6600     return true;
6601 }
6602 
6603 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6604 {
6605     gen_hlt(s, a->imm);
6606     return true;
6607 }
6608 
6609 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6610 {
6611     if (!ENABLE_ARCH_5) {
6612         return false;
6613     }
6614     /* BKPT is OK with ECI set and leaves it untouched */
6615     s->eci_handled = true;
6616     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6617         semihosting_enabled(s->current_el == 0) &&
6618         (a->imm == 0xab)) {
6619         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6620     } else {
6621         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6622     }
6623     return true;
6624 }
6625 
6626 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6627 {
6628     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6629         return false;
6630     }
6631     if (IS_USER(s)) {
6632         unallocated_encoding(s);
6633     } else {
6634         gen_hvc(s, a->imm);
6635     }
6636     return true;
6637 }
6638 
6639 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6640 {
6641     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6642         return false;
6643     }
6644     if (IS_USER(s)) {
6645         unallocated_encoding(s);
6646     } else {
6647         gen_smc(s);
6648     }
6649     return true;
6650 }
6651 
6652 static bool trans_SG(DisasContext *s, arg_SG *a)
6653 {
6654     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6655         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6656         return false;
6657     }
6658     /*
6659      * SG (v8M only)
6660      * The bulk of the behaviour for this instruction is implemented
6661      * in v7m_handle_execute_nsc(), which deals with the insn when
6662      * it is executed by a CPU in non-secure state from memory
6663      * which is Secure & NonSecure-Callable.
6664      * Here we only need to handle the remaining cases:
6665      *  * in NS memory (including the "security extension not
6666      *    implemented" case) : NOP
6667      *  * in S memory but CPU already secure (clear IT bits)
6668      * We know that the attribute for the memory this insn is
6669      * in must match the current CPU state, because otherwise
6670      * get_phys_addr_pmsav8 would have generated an exception.
6671      */
6672     if (s->v8m_secure) {
6673         /* Like the IT insn, we don't need to generate any code */
6674         s->condexec_cond = 0;
6675         s->condexec_mask = 0;
6676     }
6677     return true;
6678 }
6679 
6680 static bool trans_TT(DisasContext *s, arg_TT *a)
6681 {
6682     TCGv_i32 addr, tmp;
6683 
6684     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6685         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6686         return false;
6687     }
6688     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6689         /* We UNDEF for these UNPREDICTABLE cases */
6690         unallocated_encoding(s);
6691         return true;
6692     }
6693     if (a->A && !s->v8m_secure) {
6694         /* This case is UNDEFINED.  */
6695         unallocated_encoding(s);
6696         return true;
6697     }
6698 
6699     addr = load_reg(s, a->rn);
6700     tmp = tcg_temp_new_i32();
6701     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6702     tcg_temp_free_i32(addr);
6703     store_reg(s, a->rd, tmp);
6704     return true;
6705 }
6706 
6707 /*
6708  * Load/store register index
6709  */
6710 
6711 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6712 {
6713     ISSInfo ret;
6714 
6715     /* ISS not valid if writeback */
6716     if (p && !w) {
6717         ret = rd;
6718         if (curr_insn_len(s) == 2) {
6719             ret |= ISSIs16Bit;
6720         }
6721     } else {
6722         ret = ISSInvalid;
6723     }
6724     return ret;
6725 }
6726 
6727 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6728 {
6729     TCGv_i32 addr = load_reg(s, a->rn);
6730 
6731     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6732         gen_helper_v8m_stackcheck(cpu_env, addr);
6733     }
6734 
6735     if (a->p) {
6736         TCGv_i32 ofs = load_reg(s, a->rm);
6737         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6738         if (a->u) {
6739             tcg_gen_add_i32(addr, addr, ofs);
6740         } else {
6741             tcg_gen_sub_i32(addr, addr, ofs);
6742         }
6743         tcg_temp_free_i32(ofs);
6744     }
6745     return addr;
6746 }
6747 
6748 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6749                             TCGv_i32 addr, int address_offset)
6750 {
6751     if (!a->p) {
6752         TCGv_i32 ofs = load_reg(s, a->rm);
6753         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6754         if (a->u) {
6755             tcg_gen_add_i32(addr, addr, ofs);
6756         } else {
6757             tcg_gen_sub_i32(addr, addr, ofs);
6758         }
6759         tcg_temp_free_i32(ofs);
6760     } else if (!a->w) {
6761         tcg_temp_free_i32(addr);
6762         return;
6763     }
6764     tcg_gen_addi_i32(addr, addr, address_offset);
6765     store_reg(s, a->rn, addr);
6766 }
6767 
6768 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6769                        MemOp mop, int mem_idx)
6770 {
6771     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6772     TCGv_i32 addr, tmp;
6773 
6774     addr = op_addr_rr_pre(s, a);
6775 
6776     tmp = tcg_temp_new_i32();
6777     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6778     disas_set_da_iss(s, mop, issinfo);
6779 
6780     /*
6781      * Perform base writeback before the loaded value to
6782      * ensure correct behavior with overlapping index registers.
6783      */
6784     op_addr_rr_post(s, a, addr, 0);
6785     store_reg_from_load(s, a->rt, tmp);
6786     return true;
6787 }
6788 
6789 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6790                         MemOp mop, int mem_idx)
6791 {
6792     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6793     TCGv_i32 addr, tmp;
6794 
6795     /*
6796      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6797      * is either UNPREDICTABLE or has defined behaviour
6798      */
6799     if (s->thumb && a->rn == 15) {
6800         return false;
6801     }
6802 
6803     addr = op_addr_rr_pre(s, a);
6804 
6805     tmp = load_reg(s, a->rt);
6806     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6807     disas_set_da_iss(s, mop, issinfo);
6808     tcg_temp_free_i32(tmp);
6809 
6810     op_addr_rr_post(s, a, addr, 0);
6811     return true;
6812 }
6813 
6814 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6815 {
6816     int mem_idx = get_mem_index(s);
6817     TCGv_i32 addr, tmp;
6818 
6819     if (!ENABLE_ARCH_5TE) {
6820         return false;
6821     }
6822     if (a->rt & 1) {
6823         unallocated_encoding(s);
6824         return true;
6825     }
6826     addr = op_addr_rr_pre(s, a);
6827 
6828     tmp = tcg_temp_new_i32();
6829     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6830     store_reg(s, a->rt, tmp);
6831 
6832     tcg_gen_addi_i32(addr, addr, 4);
6833 
6834     tmp = tcg_temp_new_i32();
6835     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6836     store_reg(s, a->rt + 1, tmp);
6837 
6838     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6839     op_addr_rr_post(s, a, addr, -4);
6840     return true;
6841 }
6842 
6843 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6844 {
6845     int mem_idx = get_mem_index(s);
6846     TCGv_i32 addr, tmp;
6847 
6848     if (!ENABLE_ARCH_5TE) {
6849         return false;
6850     }
6851     if (a->rt & 1) {
6852         unallocated_encoding(s);
6853         return true;
6854     }
6855     addr = op_addr_rr_pre(s, a);
6856 
6857     tmp = load_reg(s, a->rt);
6858     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6859     tcg_temp_free_i32(tmp);
6860 
6861     tcg_gen_addi_i32(addr, addr, 4);
6862 
6863     tmp = load_reg(s, a->rt + 1);
6864     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6865     tcg_temp_free_i32(tmp);
6866 
6867     op_addr_rr_post(s, a, addr, -4);
6868     return true;
6869 }
6870 
6871 /*
6872  * Load/store immediate index
6873  */
6874 
6875 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6876 {
6877     int ofs = a->imm;
6878 
6879     if (!a->u) {
6880         ofs = -ofs;
6881     }
6882 
6883     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6884         /*
6885          * Stackcheck. Here we know 'addr' is the current SP;
6886          * U is set if we're moving SP up, else down. It is
6887          * UNKNOWN whether the limit check triggers when SP starts
6888          * below the limit and ends up above it; we chose to do so.
6889          */
6890         if (!a->u) {
6891             TCGv_i32 newsp = tcg_temp_new_i32();
6892             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6893             gen_helper_v8m_stackcheck(cpu_env, newsp);
6894             tcg_temp_free_i32(newsp);
6895         } else {
6896             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6897         }
6898     }
6899 
6900     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6901 }
6902 
6903 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6904                             TCGv_i32 addr, int address_offset)
6905 {
6906     if (!a->p) {
6907         if (a->u) {
6908             address_offset += a->imm;
6909         } else {
6910             address_offset -= a->imm;
6911         }
6912     } else if (!a->w) {
6913         tcg_temp_free_i32(addr);
6914         return;
6915     }
6916     tcg_gen_addi_i32(addr, addr, address_offset);
6917     store_reg(s, a->rn, addr);
6918 }
6919 
6920 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6921                        MemOp mop, int mem_idx)
6922 {
6923     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6924     TCGv_i32 addr, tmp;
6925 
6926     addr = op_addr_ri_pre(s, a);
6927 
6928     tmp = tcg_temp_new_i32();
6929     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6930     disas_set_da_iss(s, mop, issinfo);
6931 
6932     /*
6933      * Perform base writeback before the loaded value to
6934      * ensure correct behavior with overlapping index registers.
6935      */
6936     op_addr_ri_post(s, a, addr, 0);
6937     store_reg_from_load(s, a->rt, tmp);
6938     return true;
6939 }
6940 
6941 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6942                         MemOp mop, int mem_idx)
6943 {
6944     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6945     TCGv_i32 addr, tmp;
6946 
6947     /*
6948      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6949      * is either UNPREDICTABLE or has defined behaviour
6950      */
6951     if (s->thumb && a->rn == 15) {
6952         return false;
6953     }
6954 
6955     addr = op_addr_ri_pre(s, a);
6956 
6957     tmp = load_reg(s, a->rt);
6958     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6959     disas_set_da_iss(s, mop, issinfo);
6960     tcg_temp_free_i32(tmp);
6961 
6962     op_addr_ri_post(s, a, addr, 0);
6963     return true;
6964 }
6965 
6966 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6967 {
6968     int mem_idx = get_mem_index(s);
6969     TCGv_i32 addr, tmp;
6970 
6971     addr = op_addr_ri_pre(s, a);
6972 
6973     tmp = tcg_temp_new_i32();
6974     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6975     store_reg(s, a->rt, tmp);
6976 
6977     tcg_gen_addi_i32(addr, addr, 4);
6978 
6979     tmp = tcg_temp_new_i32();
6980     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6981     store_reg(s, rt2, tmp);
6982 
6983     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6984     op_addr_ri_post(s, a, addr, -4);
6985     return true;
6986 }
6987 
6988 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6989 {
6990     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6991         return false;
6992     }
6993     return op_ldrd_ri(s, a, a->rt + 1);
6994 }
6995 
6996 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6997 {
6998     arg_ldst_ri b = {
6999         .u = a->u, .w = a->w, .p = a->p,
7000         .rn = a->rn, .rt = a->rt, .imm = a->imm
7001     };
7002     return op_ldrd_ri(s, &b, a->rt2);
7003 }
7004 
7005 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
7006 {
7007     int mem_idx = get_mem_index(s);
7008     TCGv_i32 addr, tmp;
7009 
7010     addr = op_addr_ri_pre(s, a);
7011 
7012     tmp = load_reg(s, a->rt);
7013     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7014     tcg_temp_free_i32(tmp);
7015 
7016     tcg_gen_addi_i32(addr, addr, 4);
7017 
7018     tmp = load_reg(s, rt2);
7019     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7020     tcg_temp_free_i32(tmp);
7021 
7022     op_addr_ri_post(s, a, addr, -4);
7023     return true;
7024 }
7025 
7026 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
7027 {
7028     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
7029         return false;
7030     }
7031     return op_strd_ri(s, a, a->rt + 1);
7032 }
7033 
7034 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7035 {
7036     arg_ldst_ri b = {
7037         .u = a->u, .w = a->w, .p = a->p,
7038         .rn = a->rn, .rt = a->rt, .imm = a->imm
7039     };
7040     return op_strd_ri(s, &b, a->rt2);
7041 }
7042 
7043 #define DO_LDST(NAME, WHICH, MEMOP) \
7044 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
7045 {                                                                     \
7046     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
7047 }                                                                     \
7048 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
7049 {                                                                     \
7050     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
7051 }                                                                     \
7052 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
7053 {                                                                     \
7054     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
7055 }                                                                     \
7056 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
7057 {                                                                     \
7058     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
7059 }
7060 
7061 DO_LDST(LDR, load, MO_UL)
7062 DO_LDST(LDRB, load, MO_UB)
7063 DO_LDST(LDRH, load, MO_UW)
7064 DO_LDST(LDRSB, load, MO_SB)
7065 DO_LDST(LDRSH, load, MO_SW)
7066 
7067 DO_LDST(STR, store, MO_UL)
7068 DO_LDST(STRB, store, MO_UB)
7069 DO_LDST(STRH, store, MO_UW)
7070 
7071 #undef DO_LDST
7072 
7073 /*
7074  * Synchronization primitives
7075  */
7076 
7077 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7078 {
7079     TCGv_i32 addr, tmp;
7080     TCGv taddr;
7081 
7082     opc |= s->be_data;
7083     addr = load_reg(s, a->rn);
7084     taddr = gen_aa32_addr(s, addr, opc);
7085     tcg_temp_free_i32(addr);
7086 
7087     tmp = load_reg(s, a->rt2);
7088     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7089     tcg_temp_free(taddr);
7090 
7091     store_reg(s, a->rt, tmp);
7092     return true;
7093 }
7094 
7095 static bool trans_SWP(DisasContext *s, arg_SWP *a)
7096 {
7097     return op_swp(s, a, MO_UL | MO_ALIGN);
7098 }
7099 
7100 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7101 {
7102     return op_swp(s, a, MO_UB);
7103 }
7104 
7105 /*
7106  * Load/Store Exclusive and Load-Acquire/Store-Release
7107  */
7108 
7109 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7110 {
7111     TCGv_i32 addr;
7112     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7113     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7114 
7115     /* We UNDEF for these UNPREDICTABLE cases.  */
7116     if (a->rd == 15 || a->rn == 15 || a->rt == 15
7117         || a->rd == a->rn || a->rd == a->rt
7118         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7119         || (mop == MO_64
7120             && (a->rt2 == 15
7121                 || a->rd == a->rt2
7122                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7123         unallocated_encoding(s);
7124         return true;
7125     }
7126 
7127     if (rel) {
7128         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7129     }
7130 
7131     addr = tcg_temp_new_i32();
7132     load_reg_var(s, addr, a->rn);
7133     tcg_gen_addi_i32(addr, addr, a->imm);
7134 
7135     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7136     tcg_temp_free_i32(addr);
7137     return true;
7138 }
7139 
7140 static bool trans_STREX(DisasContext *s, arg_STREX *a)
7141 {
7142     if (!ENABLE_ARCH_6) {
7143         return false;
7144     }
7145     return op_strex(s, a, MO_32, false);
7146 }
7147 
7148 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7149 {
7150     if (!ENABLE_ARCH_6K) {
7151         return false;
7152     }
7153     /* We UNDEF for these UNPREDICTABLE cases.  */
7154     if (a->rt & 1) {
7155         unallocated_encoding(s);
7156         return true;
7157     }
7158     a->rt2 = a->rt + 1;
7159     return op_strex(s, a, MO_64, false);
7160 }
7161 
7162 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7163 {
7164     return op_strex(s, a, MO_64, false);
7165 }
7166 
7167 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7168 {
7169     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7170         return false;
7171     }
7172     return op_strex(s, a, MO_8, false);
7173 }
7174 
7175 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7176 {
7177     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7178         return false;
7179     }
7180     return op_strex(s, a, MO_16, false);
7181 }
7182 
7183 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7184 {
7185     if (!ENABLE_ARCH_8) {
7186         return false;
7187     }
7188     return op_strex(s, a, MO_32, true);
7189 }
7190 
7191 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7192 {
7193     if (!ENABLE_ARCH_8) {
7194         return false;
7195     }
7196     /* We UNDEF for these UNPREDICTABLE cases.  */
7197     if (a->rt & 1) {
7198         unallocated_encoding(s);
7199         return true;
7200     }
7201     a->rt2 = a->rt + 1;
7202     return op_strex(s, a, MO_64, true);
7203 }
7204 
7205 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7206 {
7207     if (!ENABLE_ARCH_8) {
7208         return false;
7209     }
7210     return op_strex(s, a, MO_64, true);
7211 }
7212 
7213 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7214 {
7215     if (!ENABLE_ARCH_8) {
7216         return false;
7217     }
7218     return op_strex(s, a, MO_8, true);
7219 }
7220 
7221 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7222 {
7223     if (!ENABLE_ARCH_8) {
7224         return false;
7225     }
7226     return op_strex(s, a, MO_16, true);
7227 }
7228 
7229 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7230 {
7231     TCGv_i32 addr, tmp;
7232 
7233     if (!ENABLE_ARCH_8) {
7234         return false;
7235     }
7236     /* We UNDEF for these UNPREDICTABLE cases.  */
7237     if (a->rn == 15 || a->rt == 15) {
7238         unallocated_encoding(s);
7239         return true;
7240     }
7241 
7242     addr = load_reg(s, a->rn);
7243     tmp = load_reg(s, a->rt);
7244     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7245     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7246     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7247 
7248     tcg_temp_free_i32(tmp);
7249     tcg_temp_free_i32(addr);
7250     return true;
7251 }
7252 
7253 static bool trans_STL(DisasContext *s, arg_STL *a)
7254 {
7255     return op_stl(s, a, MO_UL);
7256 }
7257 
7258 static bool trans_STLB(DisasContext *s, arg_STL *a)
7259 {
7260     return op_stl(s, a, MO_UB);
7261 }
7262 
7263 static bool trans_STLH(DisasContext *s, arg_STL *a)
7264 {
7265     return op_stl(s, a, MO_UW);
7266 }
7267 
7268 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7269 {
7270     TCGv_i32 addr;
7271     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7272     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7273 
7274     /* We UNDEF for these UNPREDICTABLE cases.  */
7275     if (a->rn == 15 || a->rt == 15
7276         || (!v8a && s->thumb && a->rt == 13)
7277         || (mop == MO_64
7278             && (a->rt2 == 15 || a->rt == a->rt2
7279                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7280         unallocated_encoding(s);
7281         return true;
7282     }
7283 
7284     addr = tcg_temp_new_i32();
7285     load_reg_var(s, addr, a->rn);
7286     tcg_gen_addi_i32(addr, addr, a->imm);
7287 
7288     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7289     tcg_temp_free_i32(addr);
7290 
7291     if (acq) {
7292         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7293     }
7294     return true;
7295 }
7296 
7297 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7298 {
7299     if (!ENABLE_ARCH_6) {
7300         return false;
7301     }
7302     return op_ldrex(s, a, MO_32, false);
7303 }
7304 
7305 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7306 {
7307     if (!ENABLE_ARCH_6K) {
7308         return false;
7309     }
7310     /* We UNDEF for these UNPREDICTABLE cases.  */
7311     if (a->rt & 1) {
7312         unallocated_encoding(s);
7313         return true;
7314     }
7315     a->rt2 = a->rt + 1;
7316     return op_ldrex(s, a, MO_64, false);
7317 }
7318 
7319 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7320 {
7321     return op_ldrex(s, a, MO_64, false);
7322 }
7323 
7324 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7325 {
7326     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7327         return false;
7328     }
7329     return op_ldrex(s, a, MO_8, false);
7330 }
7331 
7332 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7333 {
7334     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7335         return false;
7336     }
7337     return op_ldrex(s, a, MO_16, false);
7338 }
7339 
7340 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7341 {
7342     if (!ENABLE_ARCH_8) {
7343         return false;
7344     }
7345     return op_ldrex(s, a, MO_32, true);
7346 }
7347 
7348 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7349 {
7350     if (!ENABLE_ARCH_8) {
7351         return false;
7352     }
7353     /* We UNDEF for these UNPREDICTABLE cases.  */
7354     if (a->rt & 1) {
7355         unallocated_encoding(s);
7356         return true;
7357     }
7358     a->rt2 = a->rt + 1;
7359     return op_ldrex(s, a, MO_64, true);
7360 }
7361 
7362 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7363 {
7364     if (!ENABLE_ARCH_8) {
7365         return false;
7366     }
7367     return op_ldrex(s, a, MO_64, true);
7368 }
7369 
7370 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7371 {
7372     if (!ENABLE_ARCH_8) {
7373         return false;
7374     }
7375     return op_ldrex(s, a, MO_8, true);
7376 }
7377 
7378 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7379 {
7380     if (!ENABLE_ARCH_8) {
7381         return false;
7382     }
7383     return op_ldrex(s, a, MO_16, true);
7384 }
7385 
7386 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7387 {
7388     TCGv_i32 addr, tmp;
7389 
7390     if (!ENABLE_ARCH_8) {
7391         return false;
7392     }
7393     /* We UNDEF for these UNPREDICTABLE cases.  */
7394     if (a->rn == 15 || a->rt == 15) {
7395         unallocated_encoding(s);
7396         return true;
7397     }
7398 
7399     addr = load_reg(s, a->rn);
7400     tmp = tcg_temp_new_i32();
7401     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7402     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7403     tcg_temp_free_i32(addr);
7404 
7405     store_reg(s, a->rt, tmp);
7406     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7407     return true;
7408 }
7409 
7410 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7411 {
7412     return op_lda(s, a, MO_UL);
7413 }
7414 
7415 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7416 {
7417     return op_lda(s, a, MO_UB);
7418 }
7419 
7420 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7421 {
7422     return op_lda(s, a, MO_UW);
7423 }
7424 
7425 /*
7426  * Media instructions
7427  */
7428 
7429 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7430 {
7431     TCGv_i32 t1, t2;
7432 
7433     if (!ENABLE_ARCH_6) {
7434         return false;
7435     }
7436 
7437     t1 = load_reg(s, a->rn);
7438     t2 = load_reg(s, a->rm);
7439     gen_helper_usad8(t1, t1, t2);
7440     tcg_temp_free_i32(t2);
7441     if (a->ra != 15) {
7442         t2 = load_reg(s, a->ra);
7443         tcg_gen_add_i32(t1, t1, t2);
7444         tcg_temp_free_i32(t2);
7445     }
7446     store_reg(s, a->rd, t1);
7447     return true;
7448 }
7449 
7450 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7451 {
7452     TCGv_i32 tmp;
7453     int width = a->widthm1 + 1;
7454     int shift = a->lsb;
7455 
7456     if (!ENABLE_ARCH_6T2) {
7457         return false;
7458     }
7459     if (shift + width > 32) {
7460         /* UNPREDICTABLE; we choose to UNDEF */
7461         unallocated_encoding(s);
7462         return true;
7463     }
7464 
7465     tmp = load_reg(s, a->rn);
7466     if (u) {
7467         tcg_gen_extract_i32(tmp, tmp, shift, width);
7468     } else {
7469         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7470     }
7471     store_reg(s, a->rd, tmp);
7472     return true;
7473 }
7474 
7475 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7476 {
7477     return op_bfx(s, a, false);
7478 }
7479 
7480 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7481 {
7482     return op_bfx(s, a, true);
7483 }
7484 
7485 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7486 {
7487     TCGv_i32 tmp;
7488     int msb = a->msb, lsb = a->lsb;
7489     int width;
7490 
7491     if (!ENABLE_ARCH_6T2) {
7492         return false;
7493     }
7494     if (msb < lsb) {
7495         /* UNPREDICTABLE; we choose to UNDEF */
7496         unallocated_encoding(s);
7497         return true;
7498     }
7499 
7500     width = msb + 1 - lsb;
7501     if (a->rn == 15) {
7502         /* BFC */
7503         tmp = tcg_const_i32(0);
7504     } else {
7505         /* BFI */
7506         tmp = load_reg(s, a->rn);
7507     }
7508     if (width != 32) {
7509         TCGv_i32 tmp2 = load_reg(s, a->rd);
7510         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7511         tcg_temp_free_i32(tmp2);
7512     }
7513     store_reg(s, a->rd, tmp);
7514     return true;
7515 }
7516 
7517 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7518 {
7519     unallocated_encoding(s);
7520     return true;
7521 }
7522 
7523 /*
7524  * Parallel addition and subtraction
7525  */
7526 
7527 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7528                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7529 {
7530     TCGv_i32 t0, t1;
7531 
7532     if (s->thumb
7533         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7534         : !ENABLE_ARCH_6) {
7535         return false;
7536     }
7537 
7538     t0 = load_reg(s, a->rn);
7539     t1 = load_reg(s, a->rm);
7540 
7541     gen(t0, t0, t1);
7542 
7543     tcg_temp_free_i32(t1);
7544     store_reg(s, a->rd, t0);
7545     return true;
7546 }
7547 
7548 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7549                              void (*gen)(TCGv_i32, TCGv_i32,
7550                                          TCGv_i32, TCGv_ptr))
7551 {
7552     TCGv_i32 t0, t1;
7553     TCGv_ptr ge;
7554 
7555     if (s->thumb
7556         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7557         : !ENABLE_ARCH_6) {
7558         return false;
7559     }
7560 
7561     t0 = load_reg(s, a->rn);
7562     t1 = load_reg(s, a->rm);
7563 
7564     ge = tcg_temp_new_ptr();
7565     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7566     gen(t0, t0, t1, ge);
7567 
7568     tcg_temp_free_ptr(ge);
7569     tcg_temp_free_i32(t1);
7570     store_reg(s, a->rd, t0);
7571     return true;
7572 }
7573 
7574 #define DO_PAR_ADDSUB(NAME, helper) \
7575 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7576 {                                                       \
7577     return op_par_addsub(s, a, helper);                 \
7578 }
7579 
7580 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7581 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7582 {                                                       \
7583     return op_par_addsub_ge(s, a, helper);              \
7584 }
7585 
7586 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7587 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7588 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7589 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7590 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7591 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7592 
7593 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7594 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7595 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7596 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7597 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7598 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7599 
7600 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7601 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7602 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7603 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7604 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7605 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7606 
7607 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7608 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7609 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7610 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7611 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7612 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7613 
7614 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7615 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7616 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7617 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7618 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7619 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7620 
7621 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7622 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7623 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7624 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7625 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7626 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7627 
7628 #undef DO_PAR_ADDSUB
7629 #undef DO_PAR_ADDSUB_GE
7630 
7631 /*
7632  * Packing, unpacking, saturation, and reversal
7633  */
7634 
7635 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7636 {
7637     TCGv_i32 tn, tm;
7638     int shift = a->imm;
7639 
7640     if (s->thumb
7641         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7642         : !ENABLE_ARCH_6) {
7643         return false;
7644     }
7645 
7646     tn = load_reg(s, a->rn);
7647     tm = load_reg(s, a->rm);
7648     if (a->tb) {
7649         /* PKHTB */
7650         if (shift == 0) {
7651             shift = 31;
7652         }
7653         tcg_gen_sari_i32(tm, tm, shift);
7654         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7655     } else {
7656         /* PKHBT */
7657         tcg_gen_shli_i32(tm, tm, shift);
7658         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7659     }
7660     tcg_temp_free_i32(tm);
7661     store_reg(s, a->rd, tn);
7662     return true;
7663 }
7664 
7665 static bool op_sat(DisasContext *s, arg_sat *a,
7666                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7667 {
7668     TCGv_i32 tmp;
7669     int shift = a->imm;
7670 
7671     if (!ENABLE_ARCH_6) {
7672         return false;
7673     }
7674 
7675     tmp = load_reg(s, a->rn);
7676     if (a->sh) {
7677         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7678     } else {
7679         tcg_gen_shli_i32(tmp, tmp, shift);
7680     }
7681 
7682     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7683 
7684     store_reg(s, a->rd, tmp);
7685     return true;
7686 }
7687 
7688 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7689 {
7690     return op_sat(s, a, gen_helper_ssat);
7691 }
7692 
7693 static bool trans_USAT(DisasContext *s, arg_sat *a)
7694 {
7695     return op_sat(s, a, gen_helper_usat);
7696 }
7697 
7698 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7699 {
7700     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7701         return false;
7702     }
7703     return op_sat(s, a, gen_helper_ssat16);
7704 }
7705 
7706 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7707 {
7708     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7709         return false;
7710     }
7711     return op_sat(s, a, gen_helper_usat16);
7712 }
7713 
7714 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7715                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7716                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7717 {
7718     TCGv_i32 tmp;
7719 
7720     if (!ENABLE_ARCH_6) {
7721         return false;
7722     }
7723 
7724     tmp = load_reg(s, a->rm);
7725     /*
7726      * TODO: In many cases we could do a shift instead of a rotate.
7727      * Combined with a simple extend, that becomes an extract.
7728      */
7729     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7730     gen_extract(tmp, tmp);
7731 
7732     if (a->rn != 15) {
7733         TCGv_i32 tmp2 = load_reg(s, a->rn);
7734         gen_add(tmp, tmp, tmp2);
7735         tcg_temp_free_i32(tmp2);
7736     }
7737     store_reg(s, a->rd, tmp);
7738     return true;
7739 }
7740 
7741 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7742 {
7743     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7744 }
7745 
7746 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7747 {
7748     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7749 }
7750 
7751 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7752 {
7753     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7754         return false;
7755     }
7756     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7757 }
7758 
7759 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7760 {
7761     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7762 }
7763 
7764 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7765 {
7766     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7767 }
7768 
7769 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7770 {
7771     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7772         return false;
7773     }
7774     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7775 }
7776 
7777 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7778 {
7779     TCGv_i32 t1, t2, t3;
7780 
7781     if (s->thumb
7782         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7783         : !ENABLE_ARCH_6) {
7784         return false;
7785     }
7786 
7787     t1 = load_reg(s, a->rn);
7788     t2 = load_reg(s, a->rm);
7789     t3 = tcg_temp_new_i32();
7790     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7791     gen_helper_sel_flags(t1, t3, t1, t2);
7792     tcg_temp_free_i32(t3);
7793     tcg_temp_free_i32(t2);
7794     store_reg(s, a->rd, t1);
7795     return true;
7796 }
7797 
7798 static bool op_rr(DisasContext *s, arg_rr *a,
7799                   void (*gen)(TCGv_i32, TCGv_i32))
7800 {
7801     TCGv_i32 tmp;
7802 
7803     tmp = load_reg(s, a->rm);
7804     gen(tmp, tmp);
7805     store_reg(s, a->rd, tmp);
7806     return true;
7807 }
7808 
7809 static bool trans_REV(DisasContext *s, arg_rr *a)
7810 {
7811     if (!ENABLE_ARCH_6) {
7812         return false;
7813     }
7814     return op_rr(s, a, tcg_gen_bswap32_i32);
7815 }
7816 
7817 static bool trans_REV16(DisasContext *s, arg_rr *a)
7818 {
7819     if (!ENABLE_ARCH_6) {
7820         return false;
7821     }
7822     return op_rr(s, a, gen_rev16);
7823 }
7824 
7825 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7826 {
7827     if (!ENABLE_ARCH_6) {
7828         return false;
7829     }
7830     return op_rr(s, a, gen_revsh);
7831 }
7832 
7833 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7834 {
7835     if (!ENABLE_ARCH_6T2) {
7836         return false;
7837     }
7838     return op_rr(s, a, gen_helper_rbit);
7839 }
7840 
7841 /*
7842  * Signed multiply, signed and unsigned divide
7843  */
7844 
7845 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7846 {
7847     TCGv_i32 t1, t2;
7848 
7849     if (!ENABLE_ARCH_6) {
7850         return false;
7851     }
7852 
7853     t1 = load_reg(s, a->rn);
7854     t2 = load_reg(s, a->rm);
7855     if (m_swap) {
7856         gen_swap_half(t2, t2);
7857     }
7858     gen_smul_dual(t1, t2);
7859 
7860     if (sub) {
7861         /*
7862          * This subtraction cannot overflow, so we can do a simple
7863          * 32-bit subtraction and then a possible 32-bit saturating
7864          * addition of Ra.
7865          */
7866         tcg_gen_sub_i32(t1, t1, t2);
7867         tcg_temp_free_i32(t2);
7868 
7869         if (a->ra != 15) {
7870             t2 = load_reg(s, a->ra);
7871             gen_helper_add_setq(t1, cpu_env, t1, t2);
7872             tcg_temp_free_i32(t2);
7873         }
7874     } else if (a->ra == 15) {
7875         /* Single saturation-checking addition */
7876         gen_helper_add_setq(t1, cpu_env, t1, t2);
7877         tcg_temp_free_i32(t2);
7878     } else {
7879         /*
7880          * We need to add the products and Ra together and then
7881          * determine whether the final result overflowed. Doing
7882          * this as two separate add-and-check-overflow steps incorrectly
7883          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7884          * Do all the arithmetic at 64-bits and then check for overflow.
7885          */
7886         TCGv_i64 p64, q64;
7887         TCGv_i32 t3, qf, one;
7888 
7889         p64 = tcg_temp_new_i64();
7890         q64 = tcg_temp_new_i64();
7891         tcg_gen_ext_i32_i64(p64, t1);
7892         tcg_gen_ext_i32_i64(q64, t2);
7893         tcg_gen_add_i64(p64, p64, q64);
7894         load_reg_var(s, t2, a->ra);
7895         tcg_gen_ext_i32_i64(q64, t2);
7896         tcg_gen_add_i64(p64, p64, q64);
7897         tcg_temp_free_i64(q64);
7898 
7899         tcg_gen_extr_i64_i32(t1, t2, p64);
7900         tcg_temp_free_i64(p64);
7901         /*
7902          * t1 is the low half of the result which goes into Rd.
7903          * We have overflow and must set Q if the high half (t2)
7904          * is different from the sign-extension of t1.
7905          */
7906         t3 = tcg_temp_new_i32();
7907         tcg_gen_sari_i32(t3, t1, 31);
7908         qf = load_cpu_field(QF);
7909         one = tcg_constant_i32(1);
7910         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7911         store_cpu_field(qf, QF);
7912         tcg_temp_free_i32(t3);
7913         tcg_temp_free_i32(t2);
7914     }
7915     store_reg(s, a->rd, t1);
7916     return true;
7917 }
7918 
7919 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7920 {
7921     return op_smlad(s, a, false, false);
7922 }
7923 
7924 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7925 {
7926     return op_smlad(s, a, true, false);
7927 }
7928 
7929 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7930 {
7931     return op_smlad(s, a, false, true);
7932 }
7933 
7934 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7935 {
7936     return op_smlad(s, a, true, true);
7937 }
7938 
7939 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7940 {
7941     TCGv_i32 t1, t2;
7942     TCGv_i64 l1, l2;
7943 
7944     if (!ENABLE_ARCH_6) {
7945         return false;
7946     }
7947 
7948     t1 = load_reg(s, a->rn);
7949     t2 = load_reg(s, a->rm);
7950     if (m_swap) {
7951         gen_swap_half(t2, t2);
7952     }
7953     gen_smul_dual(t1, t2);
7954 
7955     l1 = tcg_temp_new_i64();
7956     l2 = tcg_temp_new_i64();
7957     tcg_gen_ext_i32_i64(l1, t1);
7958     tcg_gen_ext_i32_i64(l2, t2);
7959     tcg_temp_free_i32(t1);
7960     tcg_temp_free_i32(t2);
7961 
7962     if (sub) {
7963         tcg_gen_sub_i64(l1, l1, l2);
7964     } else {
7965         tcg_gen_add_i64(l1, l1, l2);
7966     }
7967     tcg_temp_free_i64(l2);
7968 
7969     gen_addq(s, l1, a->ra, a->rd);
7970     gen_storeq_reg(s, a->ra, a->rd, l1);
7971     tcg_temp_free_i64(l1);
7972     return true;
7973 }
7974 
7975 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7976 {
7977     return op_smlald(s, a, false, false);
7978 }
7979 
7980 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7981 {
7982     return op_smlald(s, a, true, false);
7983 }
7984 
7985 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7986 {
7987     return op_smlald(s, a, false, true);
7988 }
7989 
7990 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7991 {
7992     return op_smlald(s, a, true, true);
7993 }
7994 
7995 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7996 {
7997     TCGv_i32 t1, t2;
7998 
7999     if (s->thumb
8000         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8001         : !ENABLE_ARCH_6) {
8002         return false;
8003     }
8004 
8005     t1 = load_reg(s, a->rn);
8006     t2 = load_reg(s, a->rm);
8007     tcg_gen_muls2_i32(t2, t1, t1, t2);
8008 
8009     if (a->ra != 15) {
8010         TCGv_i32 t3 = load_reg(s, a->ra);
8011         if (sub) {
8012             /*
8013              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
8014              * a non-zero multiplicand lowpart, and the correct result
8015              * lowpart for rounding.
8016              */
8017             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
8018         } else {
8019             tcg_gen_add_i32(t1, t1, t3);
8020         }
8021         tcg_temp_free_i32(t3);
8022     }
8023     if (round) {
8024         /*
8025          * Adding 0x80000000 to the 64-bit quantity means that we have
8026          * carry in to the high word when the low word has the msb set.
8027          */
8028         tcg_gen_shri_i32(t2, t2, 31);
8029         tcg_gen_add_i32(t1, t1, t2);
8030     }
8031     tcg_temp_free_i32(t2);
8032     store_reg(s, a->rd, t1);
8033     return true;
8034 }
8035 
8036 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
8037 {
8038     return op_smmla(s, a, false, false);
8039 }
8040 
8041 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
8042 {
8043     return op_smmla(s, a, true, false);
8044 }
8045 
8046 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
8047 {
8048     return op_smmla(s, a, false, true);
8049 }
8050 
8051 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
8052 {
8053     return op_smmla(s, a, true, true);
8054 }
8055 
8056 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
8057 {
8058     TCGv_i32 t1, t2;
8059 
8060     if (s->thumb
8061         ? !dc_isar_feature(aa32_thumb_div, s)
8062         : !dc_isar_feature(aa32_arm_div, s)) {
8063         return false;
8064     }
8065 
8066     t1 = load_reg(s, a->rn);
8067     t2 = load_reg(s, a->rm);
8068     if (u) {
8069         gen_helper_udiv(t1, cpu_env, t1, t2);
8070     } else {
8071         gen_helper_sdiv(t1, cpu_env, t1, t2);
8072     }
8073     tcg_temp_free_i32(t2);
8074     store_reg(s, a->rd, t1);
8075     return true;
8076 }
8077 
8078 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8079 {
8080     return op_div(s, a, false);
8081 }
8082 
8083 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8084 {
8085     return op_div(s, a, true);
8086 }
8087 
8088 /*
8089  * Block data transfer
8090  */
8091 
8092 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8093 {
8094     TCGv_i32 addr = load_reg(s, a->rn);
8095 
8096     if (a->b) {
8097         if (a->i) {
8098             /* pre increment */
8099             tcg_gen_addi_i32(addr, addr, 4);
8100         } else {
8101             /* pre decrement */
8102             tcg_gen_addi_i32(addr, addr, -(n * 4));
8103         }
8104     } else if (!a->i && n != 1) {
8105         /* post decrement */
8106         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8107     }
8108 
8109     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8110         /*
8111          * If the writeback is incrementing SP rather than
8112          * decrementing it, and the initial SP is below the
8113          * stack limit but the final written-back SP would
8114          * be above, then we must not perform any memory
8115          * accesses, but it is IMPDEF whether we generate
8116          * an exception. We choose to do so in this case.
8117          * At this point 'addr' is the lowest address, so
8118          * either the original SP (if incrementing) or our
8119          * final SP (if decrementing), so that's what we check.
8120          */
8121         gen_helper_v8m_stackcheck(cpu_env, addr);
8122     }
8123 
8124     return addr;
8125 }
8126 
8127 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8128                                TCGv_i32 addr, int n)
8129 {
8130     if (a->w) {
8131         /* write back */
8132         if (!a->b) {
8133             if (a->i) {
8134                 /* post increment */
8135                 tcg_gen_addi_i32(addr, addr, 4);
8136             } else {
8137                 /* post decrement */
8138                 tcg_gen_addi_i32(addr, addr, -(n * 4));
8139             }
8140         } else if (!a->i && n != 1) {
8141             /* pre decrement */
8142             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8143         }
8144         store_reg(s, a->rn, addr);
8145     } else {
8146         tcg_temp_free_i32(addr);
8147     }
8148 }
8149 
8150 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8151 {
8152     int i, j, n, list, mem_idx;
8153     bool user = a->u;
8154     TCGv_i32 addr, tmp;
8155 
8156     if (user) {
8157         /* STM (user) */
8158         if (IS_USER(s)) {
8159             /* Only usable in supervisor mode.  */
8160             unallocated_encoding(s);
8161             return true;
8162         }
8163     }
8164 
8165     list = a->list;
8166     n = ctpop16(list);
8167     if (n < min_n || a->rn == 15) {
8168         unallocated_encoding(s);
8169         return true;
8170     }
8171 
8172     s->eci_handled = true;
8173 
8174     addr = op_addr_block_pre(s, a, n);
8175     mem_idx = get_mem_index(s);
8176 
8177     for (i = j = 0; i < 16; i++) {
8178         if (!(list & (1 << i))) {
8179             continue;
8180         }
8181 
8182         if (user && i != 15) {
8183             tmp = tcg_temp_new_i32();
8184             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
8185         } else {
8186             tmp = load_reg(s, i);
8187         }
8188         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8189         tcg_temp_free_i32(tmp);
8190 
8191         /* No need to add after the last transfer.  */
8192         if (++j != n) {
8193             tcg_gen_addi_i32(addr, addr, 4);
8194         }
8195     }
8196 
8197     op_addr_block_post(s, a, addr, n);
8198     clear_eci_state(s);
8199     return true;
8200 }
8201 
8202 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8203 {
8204     /* BitCount(list) < 1 is UNPREDICTABLE */
8205     return op_stm(s, a, 1);
8206 }
8207 
8208 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8209 {
8210     /* Writeback register in register list is UNPREDICTABLE for T32.  */
8211     if (a->w && (a->list & (1 << a->rn))) {
8212         unallocated_encoding(s);
8213         return true;
8214     }
8215     /* BitCount(list) < 2 is UNPREDICTABLE */
8216     return op_stm(s, a, 2);
8217 }
8218 
8219 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8220 {
8221     int i, j, n, list, mem_idx;
8222     bool loaded_base;
8223     bool user = a->u;
8224     bool exc_return = false;
8225     TCGv_i32 addr, tmp, loaded_var;
8226 
8227     if (user) {
8228         /* LDM (user), LDM (exception return) */
8229         if (IS_USER(s)) {
8230             /* Only usable in supervisor mode.  */
8231             unallocated_encoding(s);
8232             return true;
8233         }
8234         if (extract32(a->list, 15, 1)) {
8235             exc_return = true;
8236             user = false;
8237         } else {
8238             /* LDM (user) does not allow writeback.  */
8239             if (a->w) {
8240                 unallocated_encoding(s);
8241                 return true;
8242             }
8243         }
8244     }
8245 
8246     list = a->list;
8247     n = ctpop16(list);
8248     if (n < min_n || a->rn == 15) {
8249         unallocated_encoding(s);
8250         return true;
8251     }
8252 
8253     s->eci_handled = true;
8254 
8255     addr = op_addr_block_pre(s, a, n);
8256     mem_idx = get_mem_index(s);
8257     loaded_base = false;
8258     loaded_var = NULL;
8259 
8260     for (i = j = 0; i < 16; i++) {
8261         if (!(list & (1 << i))) {
8262             continue;
8263         }
8264 
8265         tmp = tcg_temp_new_i32();
8266         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8267         if (user) {
8268             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8269             tcg_temp_free_i32(tmp);
8270         } else if (i == a->rn) {
8271             loaded_var = tmp;
8272             loaded_base = true;
8273         } else if (i == 15 && exc_return) {
8274             store_pc_exc_ret(s, tmp);
8275         } else {
8276             store_reg_from_load(s, i, tmp);
8277         }
8278 
8279         /* No need to add after the last transfer.  */
8280         if (++j != n) {
8281             tcg_gen_addi_i32(addr, addr, 4);
8282         }
8283     }
8284 
8285     op_addr_block_post(s, a, addr, n);
8286 
8287     if (loaded_base) {
8288         /* Note that we reject base == pc above.  */
8289         store_reg(s, a->rn, loaded_var);
8290     }
8291 
8292     if (exc_return) {
8293         /* Restore CPSR from SPSR.  */
8294         tmp = load_cpu_field(spsr);
8295         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8296             gen_io_start();
8297         }
8298         gen_helper_cpsr_write_eret(cpu_env, tmp);
8299         tcg_temp_free_i32(tmp);
8300         /* Must exit loop to check un-masked IRQs */
8301         s->base.is_jmp = DISAS_EXIT;
8302     }
8303     clear_eci_state(s);
8304     return true;
8305 }
8306 
8307 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8308 {
8309     /*
8310      * Writeback register in register list is UNPREDICTABLE
8311      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8312      * an UNKNOWN value to the base register.
8313      */
8314     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8315         unallocated_encoding(s);
8316         return true;
8317     }
8318     /* BitCount(list) < 1 is UNPREDICTABLE */
8319     return do_ldm(s, a, 1);
8320 }
8321 
8322 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8323 {
8324     /* Writeback register in register list is UNPREDICTABLE for T32. */
8325     if (a->w && (a->list & (1 << a->rn))) {
8326         unallocated_encoding(s);
8327         return true;
8328     }
8329     /* BitCount(list) < 2 is UNPREDICTABLE */
8330     return do_ldm(s, a, 2);
8331 }
8332 
8333 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8334 {
8335     /* Writeback is conditional on the base register not being loaded.  */
8336     a->w = !(a->list & (1 << a->rn));
8337     /* BitCount(list) < 1 is UNPREDICTABLE */
8338     return do_ldm(s, a, 1);
8339 }
8340 
8341 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8342 {
8343     int i;
8344     TCGv_i32 zero;
8345 
8346     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8347         return false;
8348     }
8349 
8350     if (extract32(a->list, 13, 1)) {
8351         return false;
8352     }
8353 
8354     if (!a->list) {
8355         /* UNPREDICTABLE; we choose to UNDEF */
8356         return false;
8357     }
8358 
8359     s->eci_handled = true;
8360 
8361     zero = tcg_constant_i32(0);
8362     for (i = 0; i < 15; i++) {
8363         if (extract32(a->list, i, 1)) {
8364             /* Clear R[i] */
8365             tcg_gen_mov_i32(cpu_R[i], zero);
8366         }
8367     }
8368     if (extract32(a->list, 15, 1)) {
8369         /*
8370          * Clear APSR (by calling the MSR helper with the same argument
8371          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8372          */
8373         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8374     }
8375     clear_eci_state(s);
8376     return true;
8377 }
8378 
8379 /*
8380  * Branch, branch with link
8381  */
8382 
8383 static bool trans_B(DisasContext *s, arg_i *a)
8384 {
8385     gen_jmp(s, jmp_diff(s, a->imm));
8386     return true;
8387 }
8388 
8389 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8390 {
8391     /* This has cond from encoding, required to be outside IT block.  */
8392     if (a->cond >= 0xe) {
8393         return false;
8394     }
8395     if (s->condexec_mask) {
8396         unallocated_encoding(s);
8397         return true;
8398     }
8399     arm_skip_unless(s, a->cond);
8400     gen_jmp(s, jmp_diff(s, a->imm));
8401     return true;
8402 }
8403 
8404 static bool trans_BL(DisasContext *s, arg_i *a)
8405 {
8406     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8407     gen_jmp(s, jmp_diff(s, a->imm));
8408     return true;
8409 }
8410 
8411 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8412 {
8413     /*
8414      * BLX <imm> would be useless on M-profile; the encoding space
8415      * is used for other insns from v8.1M onward, and UNDEFs before that.
8416      */
8417     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8418         return false;
8419     }
8420 
8421     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8422     if (s->thumb && (a->imm & 2)) {
8423         return false;
8424     }
8425     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8426     store_cpu_field_constant(!s->thumb, thumb);
8427     /* This jump is computed from an aligned PC: subtract off the low bits. */
8428     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8429     return true;
8430 }
8431 
8432 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8433 {
8434     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8435     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8436     return true;
8437 }
8438 
8439 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8440 {
8441     TCGv_i32 tmp = tcg_temp_new_i32();
8442 
8443     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8444     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8445     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8446     gen_bx(s, tmp);
8447     return true;
8448 }
8449 
8450 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8451 {
8452     TCGv_i32 tmp;
8453 
8454     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8455     if (!ENABLE_ARCH_5) {
8456         return false;
8457     }
8458     tmp = tcg_temp_new_i32();
8459     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8460     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8461     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8462     gen_bx(s, tmp);
8463     return true;
8464 }
8465 
8466 static bool trans_BF(DisasContext *s, arg_BF *a)
8467 {
8468     /*
8469      * M-profile branch future insns. The architecture permits an
8470      * implementation to implement these as NOPs (equivalent to
8471      * discarding the LO_BRANCH_INFO cache immediately), and we
8472      * take that IMPDEF option because for QEMU a "real" implementation
8473      * would be complicated and wouldn't execute any faster.
8474      */
8475     if (!dc_isar_feature(aa32_lob, s)) {
8476         return false;
8477     }
8478     if (a->boff == 0) {
8479         /* SEE "Related encodings" (loop insns) */
8480         return false;
8481     }
8482     /* Handle as NOP */
8483     return true;
8484 }
8485 
8486 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8487 {
8488     /* M-profile low-overhead loop start */
8489     TCGv_i32 tmp;
8490 
8491     if (!dc_isar_feature(aa32_lob, s)) {
8492         return false;
8493     }
8494     if (a->rn == 13 || a->rn == 15) {
8495         /*
8496          * For DLSTP rn == 15 is a related encoding (LCTP); the
8497          * other cases caught by this condition are all
8498          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8499          */
8500         return false;
8501     }
8502 
8503     if (a->size != 4) {
8504         /* DLSTP */
8505         if (!dc_isar_feature(aa32_mve, s)) {
8506             return false;
8507         }
8508         if (!vfp_access_check(s)) {
8509             return true;
8510         }
8511     }
8512 
8513     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8514     tmp = load_reg(s, a->rn);
8515     store_reg(s, 14, tmp);
8516     if (a->size != 4) {
8517         /* DLSTP: set FPSCR.LTPSIZE */
8518         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8519         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8520     }
8521     return true;
8522 }
8523 
8524 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8525 {
8526     /* M-profile low-overhead while-loop start */
8527     TCGv_i32 tmp;
8528     DisasLabel nextlabel;
8529 
8530     if (!dc_isar_feature(aa32_lob, s)) {
8531         return false;
8532     }
8533     if (a->rn == 13 || a->rn == 15) {
8534         /*
8535          * For WLSTP rn == 15 is a related encoding (LE); the
8536          * other cases caught by this condition are all
8537          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8538          */
8539         return false;
8540     }
8541     if (s->condexec_mask) {
8542         /*
8543          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8544          * we choose to UNDEF, because otherwise our use of
8545          * gen_goto_tb(1) would clash with the use of TB exit 1
8546          * in the dc->condjmp condition-failed codepath in
8547          * arm_tr_tb_stop() and we'd get an assertion.
8548          */
8549         return false;
8550     }
8551     if (a->size != 4) {
8552         /* WLSTP */
8553         if (!dc_isar_feature(aa32_mve, s)) {
8554             return false;
8555         }
8556         /*
8557          * We need to check that the FPU is enabled here, but mustn't
8558          * call vfp_access_check() to do that because we don't want to
8559          * do the lazy state preservation in the "loop count is zero" case.
8560          * Do the check-and-raise-exception by hand.
8561          */
8562         if (s->fp_excp_el) {
8563             gen_exception_insn_el(s, 0, EXCP_NOCP,
8564                                   syn_uncategorized(), s->fp_excp_el);
8565             return true;
8566         }
8567     }
8568 
8569     nextlabel = gen_disas_label(s);
8570     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8571     tmp = load_reg(s, a->rn);
8572     store_reg(s, 14, tmp);
8573     if (a->size != 4) {
8574         /*
8575          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8576          * lazy state preservation, new FP context creation, etc,
8577          * that vfp_access_check() does. We know that the actual
8578          * access check will succeed (ie it won't generate code that
8579          * throws an exception) because we did that check by hand earlier.
8580          */
8581         bool ok = vfp_access_check(s);
8582         assert(ok);
8583         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8584         /*
8585          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8586          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8587          */
8588     }
8589     gen_jmp_tb(s, curr_insn_len(s), 1);
8590 
8591     set_disas_label(s, nextlabel);
8592     gen_jmp(s, jmp_diff(s, a->imm));
8593     return true;
8594 }
8595 
8596 static bool trans_LE(DisasContext *s, arg_LE *a)
8597 {
8598     /*
8599      * M-profile low-overhead loop end. The architecture permits an
8600      * implementation to discard the LO_BRANCH_INFO cache at any time,
8601      * and we take the IMPDEF option to never set it in the first place
8602      * (equivalent to always discarding it immediately), because for QEMU
8603      * a "real" implementation would be complicated and wouldn't execute
8604      * any faster.
8605      */
8606     TCGv_i32 tmp;
8607     DisasLabel loopend;
8608     bool fpu_active;
8609 
8610     if (!dc_isar_feature(aa32_lob, s)) {
8611         return false;
8612     }
8613     if (a->f && a->tp) {
8614         return false;
8615     }
8616     if (s->condexec_mask) {
8617         /*
8618          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8619          * we choose to UNDEF, because otherwise our use of
8620          * gen_goto_tb(1) would clash with the use of TB exit 1
8621          * in the dc->condjmp condition-failed codepath in
8622          * arm_tr_tb_stop() and we'd get an assertion.
8623          */
8624         return false;
8625     }
8626     if (a->tp) {
8627         /* LETP */
8628         if (!dc_isar_feature(aa32_mve, s)) {
8629             return false;
8630         }
8631         if (!vfp_access_check(s)) {
8632             s->eci_handled = true;
8633             return true;
8634         }
8635     }
8636 
8637     /* LE/LETP is OK with ECI set and leaves it untouched */
8638     s->eci_handled = true;
8639 
8640     /*
8641      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8642      * UsageFault exception for the LE insn in that case. Note that we
8643      * are not directly checking FPSCR.LTPSIZE but instead check the
8644      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8645      * not currently active (ie ActiveFPState() returns false). We
8646      * can identify not-active purely from our TB state flags, as the
8647      * FPU is active only if:
8648      *  the FPU is enabled
8649      *  AND lazy state preservation is not active
8650      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8651      *
8652      * Usually we don't need to care about this distinction between
8653      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8654      * will either take an exception or clear the conditions that make
8655      * the FPU not active. But LE is an unusual case of a non-FP insn
8656      * that looks at LTPSIZE.
8657      */
8658     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8659 
8660     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8661         /* Need to do a runtime check for LTPSIZE != 4 */
8662         DisasLabel skipexc = gen_disas_label(s);
8663         tmp = load_cpu_field(v7m.ltpsize);
8664         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8665         tcg_temp_free_i32(tmp);
8666         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8667         set_disas_label(s, skipexc);
8668     }
8669 
8670     if (a->f) {
8671         /* Loop-forever: just jump back to the loop start */
8672         gen_jmp(s, jmp_diff(s, -a->imm));
8673         return true;
8674     }
8675 
8676     /*
8677      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8678      * For LE, we know at this point that LTPSIZE must be 4 and the
8679      * loop decrement value is 1. For LETP we need to calculate the decrement
8680      * value from LTPSIZE.
8681      */
8682     loopend = gen_disas_label(s);
8683     if (!a->tp) {
8684         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8685         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8686     } else {
8687         /*
8688          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8689          * so that decr stays live after the brcondi.
8690          */
8691         TCGv_i32 decr = tcg_temp_new_i32();
8692         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8693         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8694         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8695         tcg_temp_free_i32(ltpsize);
8696 
8697         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8698 
8699         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8700         tcg_temp_free_i32(decr);
8701     }
8702     /* Jump back to the loop start */
8703     gen_jmp(s, jmp_diff(s, -a->imm));
8704 
8705     set_disas_label(s, loopend);
8706     if (a->tp) {
8707         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8708         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8709     }
8710     /* End TB, continuing to following insn */
8711     gen_jmp_tb(s, curr_insn_len(s), 1);
8712     return true;
8713 }
8714 
8715 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8716 {
8717     /*
8718      * M-profile Loop Clear with Tail Predication. Since our implementation
8719      * doesn't cache branch information, all we need to do is reset
8720      * FPSCR.LTPSIZE to 4.
8721      */
8722 
8723     if (!dc_isar_feature(aa32_lob, s) ||
8724         !dc_isar_feature(aa32_mve, s)) {
8725         return false;
8726     }
8727 
8728     if (!vfp_access_check(s)) {
8729         return true;
8730     }
8731 
8732     store_cpu_field_constant(4, v7m.ltpsize);
8733     return true;
8734 }
8735 
8736 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8737 {
8738     /*
8739      * M-profile Create Vector Tail Predicate. This insn is itself
8740      * predicated and is subject to beatwise execution.
8741      */
8742     TCGv_i32 rn_shifted, masklen;
8743 
8744     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8745         return false;
8746     }
8747 
8748     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8749         return true;
8750     }
8751 
8752     /*
8753      * We pre-calculate the mask length here to avoid having
8754      * to have multiple helpers specialized for size.
8755      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8756      */
8757     rn_shifted = tcg_temp_new_i32();
8758     masklen = load_reg(s, a->rn);
8759     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8760     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8761                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8762                         rn_shifted, tcg_constant_i32(16));
8763     gen_helper_mve_vctp(cpu_env, masklen);
8764     tcg_temp_free_i32(masklen);
8765     tcg_temp_free_i32(rn_shifted);
8766     /* This insn updates predication bits */
8767     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8768     mve_update_eci(s);
8769     return true;
8770 }
8771 
8772 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8773 {
8774     TCGv_i32 addr, tmp;
8775 
8776     tmp = load_reg(s, a->rm);
8777     if (half) {
8778         tcg_gen_add_i32(tmp, tmp, tmp);
8779     }
8780     addr = load_reg(s, a->rn);
8781     tcg_gen_add_i32(addr, addr, tmp);
8782 
8783     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8784 
8785     tcg_gen_add_i32(tmp, tmp, tmp);
8786     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8787     tcg_gen_add_i32(tmp, tmp, addr);
8788     tcg_temp_free_i32(addr);
8789     store_reg(s, 15, tmp);
8790     return true;
8791 }
8792 
8793 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8794 {
8795     return op_tbranch(s, a, false);
8796 }
8797 
8798 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8799 {
8800     return op_tbranch(s, a, true);
8801 }
8802 
8803 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8804 {
8805     TCGv_i32 tmp = load_reg(s, a->rn);
8806 
8807     arm_gen_condlabel(s);
8808     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8809                         tmp, 0, s->condlabel.label);
8810     tcg_temp_free_i32(tmp);
8811     gen_jmp(s, jmp_diff(s, a->imm));
8812     return true;
8813 }
8814 
8815 /*
8816  * Supervisor call - both T32 & A32 come here so we need to check
8817  * which mode we are in when checking for semihosting.
8818  */
8819 
8820 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8821 {
8822     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8823 
8824     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8825         semihosting_enabled(s->current_el == 0) &&
8826         (a->imm == semihost_imm)) {
8827         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8828     } else {
8829         if (s->fgt_svc) {
8830             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8831             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8832         } else {
8833             gen_update_pc(s, curr_insn_len(s));
8834             s->svc_imm = a->imm;
8835             s->base.is_jmp = DISAS_SWI;
8836         }
8837     }
8838     return true;
8839 }
8840 
8841 /*
8842  * Unconditional system instructions
8843  */
8844 
8845 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8846 {
8847     static const int8_t pre_offset[4] = {
8848         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8849     };
8850     static const int8_t post_offset[4] = {
8851         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8852     };
8853     TCGv_i32 addr, t1, t2;
8854 
8855     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8856         return false;
8857     }
8858     if (IS_USER(s)) {
8859         unallocated_encoding(s);
8860         return true;
8861     }
8862 
8863     addr = load_reg(s, a->rn);
8864     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8865 
8866     /* Load PC into tmp and CPSR into tmp2.  */
8867     t1 = tcg_temp_new_i32();
8868     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8869     tcg_gen_addi_i32(addr, addr, 4);
8870     t2 = tcg_temp_new_i32();
8871     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8872 
8873     if (a->w) {
8874         /* Base writeback.  */
8875         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8876         store_reg(s, a->rn, addr);
8877     } else {
8878         tcg_temp_free_i32(addr);
8879     }
8880     gen_rfe(s, t1, t2);
8881     return true;
8882 }
8883 
8884 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8885 {
8886     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8887         return false;
8888     }
8889     gen_srs(s, a->mode, a->pu, a->w);
8890     return true;
8891 }
8892 
8893 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8894 {
8895     uint32_t mask, val;
8896 
8897     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8898         return false;
8899     }
8900     if (IS_USER(s)) {
8901         /* Implemented as NOP in user mode.  */
8902         return true;
8903     }
8904     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8905 
8906     mask = val = 0;
8907     if (a->imod & 2) {
8908         if (a->A) {
8909             mask |= CPSR_A;
8910         }
8911         if (a->I) {
8912             mask |= CPSR_I;
8913         }
8914         if (a->F) {
8915             mask |= CPSR_F;
8916         }
8917         if (a->imod & 1) {
8918             val |= mask;
8919         }
8920     }
8921     if (a->M) {
8922         mask |= CPSR_M;
8923         val |= a->mode;
8924     }
8925     if (mask) {
8926         gen_set_psr_im(s, mask, 0, val);
8927     }
8928     return true;
8929 }
8930 
8931 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8932 {
8933     TCGv_i32 tmp, addr;
8934 
8935     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8936         return false;
8937     }
8938     if (IS_USER(s)) {
8939         /* Implemented as NOP in user mode.  */
8940         return true;
8941     }
8942 
8943     tmp = tcg_constant_i32(a->im);
8944     /* FAULTMASK */
8945     if (a->F) {
8946         addr = tcg_constant_i32(19);
8947         gen_helper_v7m_msr(cpu_env, addr, tmp);
8948     }
8949     /* PRIMASK */
8950     if (a->I) {
8951         addr = tcg_constant_i32(16);
8952         gen_helper_v7m_msr(cpu_env, addr, tmp);
8953     }
8954     gen_rebuild_hflags(s, false);
8955     gen_lookup_tb(s);
8956     return true;
8957 }
8958 
8959 /*
8960  * Clear-Exclusive, Barriers
8961  */
8962 
8963 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8964 {
8965     if (s->thumb
8966         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8967         : !ENABLE_ARCH_6K) {
8968         return false;
8969     }
8970     gen_clrex(s);
8971     return true;
8972 }
8973 
8974 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8975 {
8976     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8977         return false;
8978     }
8979     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8980     return true;
8981 }
8982 
8983 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8984 {
8985     return trans_DSB(s, NULL);
8986 }
8987 
8988 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8989 {
8990     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8991         return false;
8992     }
8993     /*
8994      * We need to break the TB after this insn to execute
8995      * self-modifying code correctly and also to take
8996      * any pending interrupts immediately.
8997      */
8998     s->base.is_jmp = DISAS_TOO_MANY;
8999     return true;
9000 }
9001 
9002 static bool trans_SB(DisasContext *s, arg_SB *a)
9003 {
9004     if (!dc_isar_feature(aa32_sb, s)) {
9005         return false;
9006     }
9007     /*
9008      * TODO: There is no speculation barrier opcode
9009      * for TCG; MB and end the TB instead.
9010      */
9011     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9012     s->base.is_jmp = DISAS_TOO_MANY;
9013     return true;
9014 }
9015 
9016 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
9017 {
9018     if (!ENABLE_ARCH_6) {
9019         return false;
9020     }
9021     if (a->E != (s->be_data == MO_BE)) {
9022         gen_helper_setend(cpu_env);
9023         s->base.is_jmp = DISAS_UPDATE_EXIT;
9024     }
9025     return true;
9026 }
9027 
9028 /*
9029  * Preload instructions
9030  * All are nops, contingent on the appropriate arch level.
9031  */
9032 
9033 static bool trans_PLD(DisasContext *s, arg_PLD *a)
9034 {
9035     return ENABLE_ARCH_5TE;
9036 }
9037 
9038 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9039 {
9040     return arm_dc_feature(s, ARM_FEATURE_V7MP);
9041 }
9042 
9043 static bool trans_PLI(DisasContext *s, arg_PLD *a)
9044 {
9045     return ENABLE_ARCH_7;
9046 }
9047 
9048 /*
9049  * If-then
9050  */
9051 
9052 static bool trans_IT(DisasContext *s, arg_IT *a)
9053 {
9054     int cond_mask = a->cond_mask;
9055 
9056     /*
9057      * No actual code generated for this insn, just setup state.
9058      *
9059      * Combinations of firstcond and mask which set up an 0b1111
9060      * condition are UNPREDICTABLE; we take the CONSTRAINED
9061      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9062      * i.e. both meaning "execute always".
9063      */
9064     s->condexec_cond = (cond_mask >> 4) & 0xe;
9065     s->condexec_mask = cond_mask & 0x1f;
9066     return true;
9067 }
9068 
9069 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
9070 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9071 {
9072     TCGv_i32 rn, rm, zero;
9073     DisasCompare c;
9074 
9075     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
9076         return false;
9077     }
9078 
9079     if (a->rm == 13) {
9080         /* SEE "Related encodings" (MVE shifts) */
9081         return false;
9082     }
9083 
9084     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
9085         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
9086         return false;
9087     }
9088 
9089     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
9090     zero = tcg_constant_i32(0);
9091     if (a->rn == 15) {
9092         rn = zero;
9093     } else {
9094         rn = load_reg(s, a->rn);
9095     }
9096     if (a->rm == 15) {
9097         rm = zero;
9098     } else {
9099         rm = load_reg(s, a->rm);
9100     }
9101 
9102     switch (a->op) {
9103     case 0: /* CSEL */
9104         break;
9105     case 1: /* CSINC */
9106         tcg_gen_addi_i32(rm, rm, 1);
9107         break;
9108     case 2: /* CSINV */
9109         tcg_gen_not_i32(rm, rm);
9110         break;
9111     case 3: /* CSNEG */
9112         tcg_gen_neg_i32(rm, rm);
9113         break;
9114     default:
9115         g_assert_not_reached();
9116     }
9117 
9118     arm_test_cc(&c, a->fcond);
9119     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
9120 
9121     store_reg(s, a->rd, rn);
9122     tcg_temp_free_i32(rm);
9123 
9124     return true;
9125 }
9126 
9127 /*
9128  * Legacy decoder.
9129  */
9130 
9131 static void disas_arm_insn(DisasContext *s, unsigned int insn)
9132 {
9133     unsigned int cond = insn >> 28;
9134 
9135     /* M variants do not implement ARM mode; this must raise the INVSTATE
9136      * UsageFault exception.
9137      */
9138     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9139         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
9140         return;
9141     }
9142 
9143     if (s->pstate_il) {
9144         /*
9145          * Illegal execution state. This has priority over BTI
9146          * exceptions, but comes after instruction abort exceptions.
9147          */
9148         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9149         return;
9150     }
9151 
9152     if (cond == 0xf) {
9153         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9154          * choose to UNDEF. In ARMv5 and above the space is used
9155          * for miscellaneous unconditional instructions.
9156          */
9157         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
9158             unallocated_encoding(s);
9159             return;
9160         }
9161 
9162         /* Unconditional instructions.  */
9163         /* TODO: Perhaps merge these into one decodetree output file.  */
9164         if (disas_a32_uncond(s, insn) ||
9165             disas_vfp_uncond(s, insn) ||
9166             disas_neon_dp(s, insn) ||
9167             disas_neon_ls(s, insn) ||
9168             disas_neon_shared(s, insn)) {
9169             return;
9170         }
9171         /* fall back to legacy decoder */
9172 
9173         if ((insn & 0x0e000f00) == 0x0c000100) {
9174             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9175                 /* iWMMXt register transfer.  */
9176                 if (extract32(s->c15_cpar, 1, 1)) {
9177                     if (!disas_iwmmxt_insn(s, insn)) {
9178                         return;
9179                     }
9180                 }
9181             }
9182         }
9183         goto illegal_op;
9184     }
9185     if (cond != 0xe) {
9186         /* if not always execute, we generate a conditional jump to
9187            next instruction */
9188         arm_skip_unless(s, cond);
9189     }
9190 
9191     /* TODO: Perhaps merge these into one decodetree output file.  */
9192     if (disas_a32(s, insn) ||
9193         disas_vfp(s, insn)) {
9194         return;
9195     }
9196     /* fall back to legacy decoder */
9197     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
9198     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
9199         if (((insn & 0x0c000e00) == 0x0c000000)
9200             && ((insn & 0x03000000) != 0x03000000)) {
9201             /* Coprocessor insn, coprocessor 0 or 1 */
9202             disas_xscale_insn(s, insn);
9203             return;
9204         }
9205     }
9206 
9207 illegal_op:
9208     unallocated_encoding(s);
9209 }
9210 
9211 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9212 {
9213     /*
9214      * Return true if this is a 16 bit instruction. We must be precise
9215      * about this (matching the decode).
9216      */
9217     if ((insn >> 11) < 0x1d) {
9218         /* Definitely a 16-bit instruction */
9219         return true;
9220     }
9221 
9222     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9223      * first half of a 32-bit Thumb insn. Thumb-1 cores might
9224      * end up actually treating this as two 16-bit insns, though,
9225      * if it's half of a bl/blx pair that might span a page boundary.
9226      */
9227     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9228         arm_dc_feature(s, ARM_FEATURE_M)) {
9229         /* Thumb2 cores (including all M profile ones) always treat
9230          * 32-bit insns as 32-bit.
9231          */
9232         return false;
9233     }
9234 
9235     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9236         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9237          * is not on the next page; we merge this into a 32-bit
9238          * insn.
9239          */
9240         return false;
9241     }
9242     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9243      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9244      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9245      *  -- handle as single 16 bit insn
9246      */
9247     return true;
9248 }
9249 
9250 /* Translate a 32-bit thumb instruction. */
9251 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9252 {
9253     /*
9254      * ARMv6-M supports a limited subset of Thumb2 instructions.
9255      * Other Thumb1 architectures allow only 32-bit
9256      * combined BL/BLX prefix and suffix.
9257      */
9258     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9259         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9260         int i;
9261         bool found = false;
9262         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9263                                                0xf3b08040 /* dsb */,
9264                                                0xf3b08050 /* dmb */,
9265                                                0xf3b08060 /* isb */,
9266                                                0xf3e08000 /* mrs */,
9267                                                0xf000d000 /* bl */};
9268         static const uint32_t armv6m_mask[] = {0xffe0d000,
9269                                                0xfff0d0f0,
9270                                                0xfff0d0f0,
9271                                                0xfff0d0f0,
9272                                                0xffe0d000,
9273                                                0xf800d000};
9274 
9275         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9276             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9277                 found = true;
9278                 break;
9279             }
9280         }
9281         if (!found) {
9282             goto illegal_op;
9283         }
9284     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9285         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9286             unallocated_encoding(s);
9287             return;
9288         }
9289     }
9290 
9291     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9292         /*
9293          * NOCP takes precedence over any UNDEF for (almost) the
9294          * entire wide range of coprocessor-space encodings, so check
9295          * for it first before proceeding to actually decode eg VFP
9296          * insns. This decode also handles the few insns which are
9297          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9298          */
9299         if (disas_m_nocp(s, insn)) {
9300             return;
9301         }
9302     }
9303 
9304     if ((insn & 0xef000000) == 0xef000000) {
9305         /*
9306          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9307          * transform into
9308          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9309          */
9310         uint32_t a32_insn = (insn & 0xe2ffffff) |
9311             ((insn & (1 << 28)) >> 4) | (1 << 28);
9312 
9313         if (disas_neon_dp(s, a32_insn)) {
9314             return;
9315         }
9316     }
9317 
9318     if ((insn & 0xff100000) == 0xf9000000) {
9319         /*
9320          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9321          * transform into
9322          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9323          */
9324         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9325 
9326         if (disas_neon_ls(s, a32_insn)) {
9327             return;
9328         }
9329     }
9330 
9331     /*
9332      * TODO: Perhaps merge these into one decodetree output file.
9333      * Note disas_vfp is written for a32 with cond field in the
9334      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9335      */
9336     if (disas_t32(s, insn) ||
9337         disas_vfp_uncond(s, insn) ||
9338         disas_neon_shared(s, insn) ||
9339         disas_mve(s, insn) ||
9340         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9341         return;
9342     }
9343 
9344 illegal_op:
9345     unallocated_encoding(s);
9346 }
9347 
9348 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9349 {
9350     if (!disas_t16(s, insn)) {
9351         unallocated_encoding(s);
9352     }
9353 }
9354 
9355 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9356 {
9357     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9358      * (False positives are OK, false negatives are not.)
9359      * We know this is a Thumb insn, and our caller ensures we are
9360      * only called if dc->base.pc_next is less than 4 bytes from the page
9361      * boundary, so we cross the page if the first 16 bits indicate
9362      * that this is a 32 bit insn.
9363      */
9364     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9365 
9366     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9367 }
9368 
9369 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9370 {
9371     DisasContext *dc = container_of(dcbase, DisasContext, base);
9372     CPUARMState *env = cs->env_ptr;
9373     ARMCPU *cpu = env_archcpu(env);
9374     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9375     uint32_t condexec, core_mmu_idx;
9376 
9377     dc->isar = &cpu->isar;
9378     dc->condjmp = 0;
9379     dc->pc_save = dc->base.pc_first;
9380     dc->aarch64 = false;
9381     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9382     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9383     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9384     /*
9385      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9386      * is always the IT bits. On M-profile, some of the reserved encodings
9387      * of IT are used instead to indicate either ICI or ECI, which
9388      * indicate partial progress of a restartable insn that was interrupted
9389      * partway through by an exception:
9390      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9391      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9392      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9393      * insn, behave normally".
9394      */
9395     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9396     dc->eci_handled = false;
9397     if (condexec & 0xf) {
9398         dc->condexec_mask = (condexec & 0xf) << 1;
9399         dc->condexec_cond = condexec >> 4;
9400     } else {
9401         if (arm_feature(env, ARM_FEATURE_M)) {
9402             dc->eci = condexec >> 4;
9403         }
9404     }
9405 
9406     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9407     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9408     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9409 #if !defined(CONFIG_USER_ONLY)
9410     dc->user = (dc->current_el == 0);
9411 #endif
9412     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9413     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9414     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9415     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9416     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9417 
9418     if (arm_feature(env, ARM_FEATURE_M)) {
9419         dc->vfp_enabled = 1;
9420         dc->be_data = MO_TE;
9421         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9422         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9423         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9424         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9425         dc->v7m_new_fp_ctxt_needed =
9426             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9427         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9428         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9429     } else {
9430         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9431         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9432         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9433         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9434         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9435             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9436         } else {
9437             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9438             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9439         }
9440         dc->sme_trap_nonstreaming =
9441             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9442     }
9443     dc->cp_regs = cpu->cp_regs;
9444     dc->features = env->features;
9445 
9446     /* Single step state. The code-generation logic here is:
9447      *  SS_ACTIVE == 0:
9448      *   generate code with no special handling for single-stepping (except
9449      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9450      *   this happens anyway because those changes are all system register or
9451      *   PSTATE writes).
9452      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9453      *   emit code for one insn
9454      *   emit code to clear PSTATE.SS
9455      *   emit code to generate software step exception for completed step
9456      *   end TB (as usual for having generated an exception)
9457      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9458      *   emit code to generate a software step exception
9459      *   end the TB
9460      */
9461     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9462     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9463     dc->is_ldex = false;
9464 
9465     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9466 
9467     /* If architectural single step active, limit to 1.  */
9468     if (dc->ss_active) {
9469         dc->base.max_insns = 1;
9470     }
9471 
9472     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9473        to those left on the page.  */
9474     if (!dc->thumb) {
9475         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9476         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9477     }
9478 
9479     cpu_V0 = tcg_temp_new_i64();
9480     cpu_V1 = tcg_temp_new_i64();
9481     cpu_M0 = tcg_temp_new_i64();
9482 }
9483 
9484 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9485 {
9486     DisasContext *dc = container_of(dcbase, DisasContext, base);
9487 
9488     /* A note on handling of the condexec (IT) bits:
9489      *
9490      * We want to avoid the overhead of having to write the updated condexec
9491      * bits back to the CPUARMState for every instruction in an IT block. So:
9492      * (1) if the condexec bits are not already zero then we write
9493      * zero back into the CPUARMState now. This avoids complications trying
9494      * to do it at the end of the block. (For example if we don't do this
9495      * it's hard to identify whether we can safely skip writing condexec
9496      * at the end of the TB, which we definitely want to do for the case
9497      * where a TB doesn't do anything with the IT state at all.)
9498      * (2) if we are going to leave the TB then we call gen_set_condexec()
9499      * which will write the correct value into CPUARMState if zero is wrong.
9500      * This is done both for leaving the TB at the end, and for leaving
9501      * it because of an exception we know will happen, which is done in
9502      * gen_exception_insn(). The latter is necessary because we need to
9503      * leave the TB with the PC/IT state just prior to execution of the
9504      * instruction which caused the exception.
9505      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9506      * then the CPUARMState will be wrong and we need to reset it.
9507      * This is handled in the same way as restoration of the
9508      * PC in these situations; we save the value of the condexec bits
9509      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9510      * then uses this to restore them after an exception.
9511      *
9512      * Note that there are no instructions which can read the condexec
9513      * bits, and none which can write non-static values to them, so
9514      * we don't need to care about whether CPUARMState is correct in the
9515      * middle of a TB.
9516      */
9517 
9518     /* Reset the conditional execution bits immediately. This avoids
9519        complications trying to do it at the end of the block.  */
9520     if (dc->condexec_mask || dc->condexec_cond) {
9521         store_cpu_field_constant(0, condexec_bits);
9522     }
9523 }
9524 
9525 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9526 {
9527     DisasContext *dc = container_of(dcbase, DisasContext, base);
9528     /*
9529      * The ECI/ICI bits share PSR bits with the IT bits, so we
9530      * need to reconstitute the bits from the split-out DisasContext
9531      * fields here.
9532      */
9533     uint32_t condexec_bits;
9534     target_ulong pc_arg = dc->base.pc_next;
9535 
9536     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9537         pc_arg &= ~TARGET_PAGE_MASK;
9538     }
9539     if (dc->eci) {
9540         condexec_bits = dc->eci << 4;
9541     } else {
9542         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9543     }
9544     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9545     dc->insn_start = tcg_last_op();
9546 }
9547 
9548 static bool arm_check_kernelpage(DisasContext *dc)
9549 {
9550 #ifdef CONFIG_USER_ONLY
9551     /* Intercept jump to the magic kernel page.  */
9552     if (dc->base.pc_next >= 0xffff0000) {
9553         /* We always get here via a jump, so know we are not in a
9554            conditional execution block.  */
9555         gen_exception_internal(EXCP_KERNEL_TRAP);
9556         dc->base.is_jmp = DISAS_NORETURN;
9557         return true;
9558     }
9559 #endif
9560     return false;
9561 }
9562 
9563 static bool arm_check_ss_active(DisasContext *dc)
9564 {
9565     if (dc->ss_active && !dc->pstate_ss) {
9566         /* Singlestep state is Active-pending.
9567          * If we're in this state at the start of a TB then either
9568          *  a) we just took an exception to an EL which is being debugged
9569          *     and this is the first insn in the exception handler
9570          *  b) debug exceptions were masked and we just unmasked them
9571          *     without changing EL (eg by clearing PSTATE.D)
9572          * In either case we're going to take a swstep exception in the
9573          * "did not step an insn" case, and so the syndrome ISV and EX
9574          * bits should be zero.
9575          */
9576         assert(dc->base.num_insns == 1);
9577         gen_swstep_exception(dc, 0, 0);
9578         dc->base.is_jmp = DISAS_NORETURN;
9579         return true;
9580     }
9581 
9582     return false;
9583 }
9584 
9585 static void arm_post_translate_insn(DisasContext *dc)
9586 {
9587     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9588         if (dc->pc_save != dc->condlabel.pc_save) {
9589             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9590         }
9591         gen_set_label(dc->condlabel.label);
9592         dc->condjmp = 0;
9593     }
9594 }
9595 
9596 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9597 {
9598     DisasContext *dc = container_of(dcbase, DisasContext, base);
9599     CPUARMState *env = cpu->env_ptr;
9600     uint32_t pc = dc->base.pc_next;
9601     unsigned int insn;
9602 
9603     /* Singlestep exceptions have the highest priority. */
9604     if (arm_check_ss_active(dc)) {
9605         dc->base.pc_next = pc + 4;
9606         return;
9607     }
9608 
9609     if (pc & 3) {
9610         /*
9611          * PC alignment fault.  This has priority over the instruction abort
9612          * that we would receive from a translation fault via arm_ldl_code
9613          * (or the execution of the kernelpage entrypoint). This should only
9614          * be possible after an indirect branch, at the start of the TB.
9615          */
9616         assert(dc->base.num_insns == 1);
9617         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9618         dc->base.is_jmp = DISAS_NORETURN;
9619         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9620         return;
9621     }
9622 
9623     if (arm_check_kernelpage(dc)) {
9624         dc->base.pc_next = pc + 4;
9625         return;
9626     }
9627 
9628     dc->pc_curr = pc;
9629     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9630     dc->insn = insn;
9631     dc->base.pc_next = pc + 4;
9632     disas_arm_insn(dc, insn);
9633 
9634     arm_post_translate_insn(dc);
9635 
9636     /* ARM is a fixed-length ISA.  We performed the cross-page check
9637        in init_disas_context by adjusting max_insns.  */
9638 }
9639 
9640 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9641 {
9642     /* Return true if this Thumb insn is always unconditional,
9643      * even inside an IT block. This is true of only a very few
9644      * instructions: BKPT, HLT, and SG.
9645      *
9646      * A larger class of instructions are UNPREDICTABLE if used
9647      * inside an IT block; we do not need to detect those here, because
9648      * what we do by default (perform the cc check and update the IT
9649      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9650      * choice for those situations.
9651      *
9652      * insn is either a 16-bit or a 32-bit instruction; the two are
9653      * distinguishable because for the 16-bit case the top 16 bits
9654      * are zeroes, and that isn't a valid 32-bit encoding.
9655      */
9656     if ((insn & 0xffffff00) == 0xbe00) {
9657         /* BKPT */
9658         return true;
9659     }
9660 
9661     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9662         !arm_dc_feature(s, ARM_FEATURE_M)) {
9663         /* HLT: v8A only. This is unconditional even when it is going to
9664          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9665          * For v7 cores this was a plain old undefined encoding and so
9666          * honours its cc check. (We might be using the encoding as
9667          * a semihosting trap, but we don't change the cc check behaviour
9668          * on that account, because a debugger connected to a real v7A
9669          * core and emulating semihosting traps by catching the UNDEF
9670          * exception would also only see cases where the cc check passed.
9671          * No guest code should be trying to do a HLT semihosting trap
9672          * in an IT block anyway.
9673          */
9674         return true;
9675     }
9676 
9677     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9678         arm_dc_feature(s, ARM_FEATURE_M)) {
9679         /* SG: v8M only */
9680         return true;
9681     }
9682 
9683     return false;
9684 }
9685 
9686 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9687 {
9688     DisasContext *dc = container_of(dcbase, DisasContext, base);
9689     CPUARMState *env = cpu->env_ptr;
9690     uint32_t pc = dc->base.pc_next;
9691     uint32_t insn;
9692     bool is_16bit;
9693     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9694     TCGOp *insn_eci_rewind = NULL;
9695     target_ulong insn_eci_pc_save = -1;
9696 
9697     /* Misaligned thumb PC is architecturally impossible. */
9698     assert((dc->base.pc_next & 1) == 0);
9699 
9700     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9701         dc->base.pc_next = pc + 2;
9702         return;
9703     }
9704 
9705     dc->pc_curr = pc;
9706     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9707     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9708     pc += 2;
9709     if (!is_16bit) {
9710         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9711         insn = insn << 16 | insn2;
9712         pc += 2;
9713     }
9714     dc->base.pc_next = pc;
9715     dc->insn = insn;
9716 
9717     if (dc->pstate_il) {
9718         /*
9719          * Illegal execution state. This has priority over BTI
9720          * exceptions, but comes after instruction abort exceptions.
9721          */
9722         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9723         return;
9724     }
9725 
9726     if (dc->eci) {
9727         /*
9728          * For M-profile continuable instructions, ECI/ICI handling
9729          * falls into these cases:
9730          *  - interrupt-continuable instructions
9731          *     These are the various load/store multiple insns (both
9732          *     integer and fp). The ICI bits indicate the register
9733          *     where the load/store can resume. We make the IMPDEF
9734          *     choice to always do "instruction restart", ie ignore
9735          *     the ICI value and always execute the ldm/stm from the
9736          *     start. So all we need to do is zero PSR.ICI if the
9737          *     insn executes.
9738          *  - MVE instructions subject to beat-wise execution
9739          *     Here the ECI bits indicate which beats have already been
9740          *     executed, and we must honour this. Each insn of this
9741          *     type will handle it correctly. We will update PSR.ECI
9742          *     in the helper function for the insn (some ECI values
9743          *     mean that the following insn also has been partially
9744          *     executed).
9745          *  - Special cases which don't advance ECI
9746          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9747          *     bits untouched.
9748          *  - all other insns (the common case)
9749          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9750          *     We place a rewind-marker here. Insns in the previous
9751          *     three categories will set a flag in the DisasContext.
9752          *     If the flag isn't set after we call disas_thumb_insn()
9753          *     or disas_thumb2_insn() then we know we have a "some other
9754          *     insn" case. We will rewind to the marker (ie throwing away
9755          *     all the generated code) and instead emit "take exception".
9756          */
9757         insn_eci_rewind = tcg_last_op();
9758         insn_eci_pc_save = dc->pc_save;
9759     }
9760 
9761     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9762         uint32_t cond = dc->condexec_cond;
9763 
9764         /*
9765          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9766          * "always"; 0xf is not "never".
9767          */
9768         if (cond < 0x0e) {
9769             arm_skip_unless(dc, cond);
9770         }
9771     }
9772 
9773     if (is_16bit) {
9774         disas_thumb_insn(dc, insn);
9775     } else {
9776         disas_thumb2_insn(dc, insn);
9777     }
9778 
9779     /* Advance the Thumb condexec condition.  */
9780     if (dc->condexec_mask) {
9781         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9782                              ((dc->condexec_mask >> 4) & 1));
9783         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9784         if (dc->condexec_mask == 0) {
9785             dc->condexec_cond = 0;
9786         }
9787     }
9788 
9789     if (dc->eci && !dc->eci_handled) {
9790         /*
9791          * Insn wasn't valid for ECI/ICI at all: undo what we
9792          * just generated and instead emit an exception
9793          */
9794         tcg_remove_ops_after(insn_eci_rewind);
9795         dc->pc_save = insn_eci_pc_save;
9796         dc->condjmp = 0;
9797         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9798     }
9799 
9800     arm_post_translate_insn(dc);
9801 
9802     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9803      * will touch a new page.  This ensures that prefetch aborts occur at
9804      * the right place.
9805      *
9806      * We want to stop the TB if the next insn starts in a new page,
9807      * or if it spans between this page and the next. This means that
9808      * if we're looking at the last halfword in the page we need to
9809      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9810      * or a 32-bit Thumb insn (which won't).
9811      * This is to avoid generating a silly TB with a single 16-bit insn
9812      * in it at the end of this page (which would execute correctly
9813      * but isn't very efficient).
9814      */
9815     if (dc->base.is_jmp == DISAS_NEXT
9816         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9817             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9818                 && insn_crosses_page(env, dc)))) {
9819         dc->base.is_jmp = DISAS_TOO_MANY;
9820     }
9821 }
9822 
9823 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9824 {
9825     DisasContext *dc = container_of(dcbase, DisasContext, base);
9826 
9827     /* At this stage dc->condjmp will only be set when the skipped
9828        instruction was a conditional branch or trap, and the PC has
9829        already been written.  */
9830     gen_set_condexec(dc);
9831     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9832         /* Exception return branches need some special case code at the
9833          * end of the TB, which is complex enough that it has to
9834          * handle the single-step vs not and the condition-failed
9835          * insn codepath itself.
9836          */
9837         gen_bx_excret_final_code(dc);
9838     } else if (unlikely(dc->ss_active)) {
9839         /* Unconditional and "condition passed" instruction codepath. */
9840         switch (dc->base.is_jmp) {
9841         case DISAS_SWI:
9842             gen_ss_advance(dc);
9843             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9844             break;
9845         case DISAS_HVC:
9846             gen_ss_advance(dc);
9847             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9848             break;
9849         case DISAS_SMC:
9850             gen_ss_advance(dc);
9851             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9852             break;
9853         case DISAS_NEXT:
9854         case DISAS_TOO_MANY:
9855         case DISAS_UPDATE_EXIT:
9856         case DISAS_UPDATE_NOCHAIN:
9857             gen_update_pc(dc, curr_insn_len(dc));
9858             /* fall through */
9859         default:
9860             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9861             gen_singlestep_exception(dc);
9862             break;
9863         case DISAS_NORETURN:
9864             break;
9865         }
9866     } else {
9867         /* While branches must always occur at the end of an IT block,
9868            there are a few other things that can cause us to terminate
9869            the TB in the middle of an IT block:
9870             - Exception generating instructions (bkpt, swi, undefined).
9871             - Page boundaries.
9872             - Hardware watchpoints.
9873            Hardware breakpoints have already been handled and skip this code.
9874          */
9875         switch (dc->base.is_jmp) {
9876         case DISAS_NEXT:
9877         case DISAS_TOO_MANY:
9878             gen_goto_tb(dc, 1, curr_insn_len(dc));
9879             break;
9880         case DISAS_UPDATE_NOCHAIN:
9881             gen_update_pc(dc, curr_insn_len(dc));
9882             /* fall through */
9883         case DISAS_JUMP:
9884             gen_goto_ptr();
9885             break;
9886         case DISAS_UPDATE_EXIT:
9887             gen_update_pc(dc, curr_insn_len(dc));
9888             /* fall through */
9889         default:
9890             /* indicate that the hash table must be used to find the next TB */
9891             tcg_gen_exit_tb(NULL, 0);
9892             break;
9893         case DISAS_NORETURN:
9894             /* nothing more to generate */
9895             break;
9896         case DISAS_WFI:
9897             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9898             /*
9899              * The helper doesn't necessarily throw an exception, but we
9900              * must go back to the main loop to check for interrupts anyway.
9901              */
9902             tcg_gen_exit_tb(NULL, 0);
9903             break;
9904         case DISAS_WFE:
9905             gen_helper_wfe(cpu_env);
9906             break;
9907         case DISAS_YIELD:
9908             gen_helper_yield(cpu_env);
9909             break;
9910         case DISAS_SWI:
9911             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9912             break;
9913         case DISAS_HVC:
9914             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9915             break;
9916         case DISAS_SMC:
9917             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9918             break;
9919         }
9920     }
9921 
9922     if (dc->condjmp) {
9923         /* "Condition failed" instruction codepath for the branch/trap insn */
9924         set_disas_label(dc, dc->condlabel);
9925         gen_set_condexec(dc);
9926         if (unlikely(dc->ss_active)) {
9927             gen_update_pc(dc, curr_insn_len(dc));
9928             gen_singlestep_exception(dc);
9929         } else {
9930             gen_goto_tb(dc, 1, curr_insn_len(dc));
9931         }
9932     }
9933 }
9934 
9935 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9936                              CPUState *cpu, FILE *logfile)
9937 {
9938     DisasContext *dc = container_of(dcbase, DisasContext, base);
9939 
9940     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9941     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9942 }
9943 
9944 static const TranslatorOps arm_translator_ops = {
9945     .init_disas_context = arm_tr_init_disas_context,
9946     .tb_start           = arm_tr_tb_start,
9947     .insn_start         = arm_tr_insn_start,
9948     .translate_insn     = arm_tr_translate_insn,
9949     .tb_stop            = arm_tr_tb_stop,
9950     .disas_log          = arm_tr_disas_log,
9951 };
9952 
9953 static const TranslatorOps thumb_translator_ops = {
9954     .init_disas_context = arm_tr_init_disas_context,
9955     .tb_start           = arm_tr_tb_start,
9956     .insn_start         = arm_tr_insn_start,
9957     .translate_insn     = thumb_tr_translate_insn,
9958     .tb_stop            = arm_tr_tb_stop,
9959     .disas_log          = arm_tr_disas_log,
9960 };
9961 
9962 /* generate intermediate code for basic block 'tb'.  */
9963 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9964                            target_ulong pc, void *host_pc)
9965 {
9966     DisasContext dc = { };
9967     const TranslatorOps *ops = &arm_translator_ops;
9968     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9969 
9970     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9971         ops = &thumb_translator_ops;
9972     }
9973 #ifdef TARGET_AARCH64
9974     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9975         ops = &aarch64_translator_ops;
9976     }
9977 #endif
9978 
9979     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9980 }
9981