xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 84156ff0)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "semihosting/semihost.h"
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 #include "exec/log.h"
36 #include "cpregs.h"
37 
38 
39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
41 /* currently all emulated v5 cores are also v5TE, so don't bother */
42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
49 
50 #include "translate.h"
51 #include "translate-a32.h"
52 
53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
55 /* These are TCG globals which alias CPUARMState fields */
56 static TCGv_i32 cpu_R[16];
57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
58 TCGv_i64 cpu_exclusive_addr;
59 TCGv_i64 cpu_exclusive_val;
60 
61 #include "exec/gen-icount.h"
62 
63 static const char * const regnames[] =
64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
66 
67 
68 /* initialize TCG globals.  */
69 void arm_translate_init(void)
70 {
71     int i;
72 
73     for (i = 0; i < 16; i++) {
74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
75                                           offsetof(CPUARMState, regs[i]),
76                                           regnames[i]);
77     }
78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
82 
83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
87 
88     a64_translate_init();
89 }
90 
91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
92 {
93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
94     switch (cmode) {
95     case 0: case 1:
96         /* no-op */
97         break;
98     case 2: case 3:
99         imm <<= 8;
100         break;
101     case 4: case 5:
102         imm <<= 16;
103         break;
104     case 6: case 7:
105         imm <<= 24;
106         break;
107     case 8: case 9:
108         imm |= imm << 16;
109         break;
110     case 10: case 11:
111         imm = (imm << 8) | (imm << 24);
112         break;
113     case 12:
114         imm = (imm << 8) | 0xff;
115         break;
116     case 13:
117         imm = (imm << 16) | 0xffff;
118         break;
119     case 14:
120         if (op) {
121             /*
122              * This and cmode == 15 op == 1 are the only cases where
123              * the top and bottom 32 bits of the encoded constant differ.
124              */
125             uint64_t imm64 = 0;
126             int n;
127 
128             for (n = 0; n < 8; n++) {
129                 if (imm & (1 << n)) {
130                     imm64 |= (0xffULL << (n * 8));
131                 }
132             }
133             return imm64;
134         }
135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
136         break;
137     case 15:
138         if (op) {
139             /* Reserved encoding for AArch32; valid for AArch64 */
140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
141             if (imm & 0x80) {
142                 imm64 |= 0x8000000000000000ULL;
143             }
144             if (imm & 0x40) {
145                 imm64 |= 0x3fc0000000000000ULL;
146             } else {
147                 imm64 |= 0x4000000000000000ULL;
148             }
149             return imm64;
150         }
151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
153         break;
154     }
155     if (op) {
156         imm = ~imm;
157     }
158     return dup_const(MO_32, imm);
159 }
160 
161 /* Generate a label used for skipping this instruction */
162 void arm_gen_condlabel(DisasContext *s)
163 {
164     if (!s->condjmp) {
165         s->condlabel = gen_disas_label(s);
166         s->condjmp = 1;
167     }
168 }
169 
170 /* Flags for the disas_set_da_iss info argument:
171  * lower bits hold the Rt register number, higher bits are flags.
172  */
173 typedef enum ISSInfo {
174     ISSNone = 0,
175     ISSRegMask = 0x1f,
176     ISSInvalid = (1 << 5),
177     ISSIsAcqRel = (1 << 6),
178     ISSIsWrite = (1 << 7),
179     ISSIs16Bit = (1 << 8),
180 } ISSInfo;
181 
182 /*
183  * Store var into env + offset to a member with size bytes.
184  * Free var after use.
185  */
186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
187 {
188     switch (size) {
189     case 1:
190         tcg_gen_st8_i32(var, cpu_env, offset);
191         break;
192     case 4:
193         tcg_gen_st_i32(var, cpu_env, offset);
194         break;
195     default:
196         g_assert_not_reached();
197     }
198     tcg_temp_free_i32(var);
199 }
200 
201 /* Save the syndrome information for a Data Abort */
202 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
203 {
204     uint32_t syn;
205     int sas = memop & MO_SIZE;
206     bool sse = memop & MO_SIGN;
207     bool is_acqrel = issinfo & ISSIsAcqRel;
208     bool is_write = issinfo & ISSIsWrite;
209     bool is_16bit = issinfo & ISSIs16Bit;
210     int srt = issinfo & ISSRegMask;
211 
212     if (issinfo & ISSInvalid) {
213         /* Some callsites want to conditionally provide ISS info,
214          * eg "only if this was not a writeback"
215          */
216         return;
217     }
218 
219     if (srt == 15) {
220         /* For AArch32, insns where the src/dest is R15 never generate
221          * ISS information. Catching that here saves checking at all
222          * the call sites.
223          */
224         return;
225     }
226 
227     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
228                                   0, 0, 0, is_write, 0, is_16bit);
229     disas_set_insn_syndrome(s, syn);
230 }
231 
232 static inline int get_a32_user_mem_index(DisasContext *s)
233 {
234     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
235      * insns:
236      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
237      *  otherwise, access as if at PL0.
238      */
239     switch (s->mmu_idx) {
240     case ARMMMUIdx_E3:
241     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
242     case ARMMMUIdx_E10_0:
243     case ARMMMUIdx_E10_1:
244     case ARMMMUIdx_E10_1_PAN:
245         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
246     case ARMMMUIdx_MUser:
247     case ARMMMUIdx_MPriv:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
249     case ARMMMUIdx_MUserNegPri:
250     case ARMMMUIdx_MPrivNegPri:
251         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
252     case ARMMMUIdx_MSUser:
253     case ARMMMUIdx_MSPriv:
254         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
255     case ARMMMUIdx_MSUserNegPri:
256     case ARMMMUIdx_MSPrivNegPri:
257         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
258     default:
259         g_assert_not_reached();
260     }
261 }
262 
263 /* The pc_curr difference for an architectural jump. */
264 static target_long jmp_diff(DisasContext *s, target_long diff)
265 {
266     return diff + (s->thumb ? 4 : 8);
267 }
268 
269 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
270 {
271     assert(s->pc_save != -1);
272     if (tb_cflags(s->base.tb) & CF_PCREL) {
273         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
274     } else {
275         tcg_gen_movi_i32(var, s->pc_curr + diff);
276     }
277 }
278 
279 /* Set a variable to the value of a CPU register.  */
280 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
281 {
282     if (reg == 15) {
283         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
284     } else {
285         tcg_gen_mov_i32(var, cpu_R[reg]);
286     }
287 }
288 
289 /*
290  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
291  * This is used for load/store for which use of PC implies (literal),
292  * or ADD that implies ADR.
293  */
294 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
295 {
296     TCGv_i32 tmp = tcg_temp_new_i32();
297 
298     if (reg == 15) {
299         /*
300          * This address is computed from an aligned PC:
301          * subtract off the low bits.
302          */
303         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
304     } else {
305         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
306     }
307     return tmp;
308 }
309 
310 /* Set a CPU register.  The source must be a temporary and will be
311    marked as dead.  */
312 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
313 {
314     if (reg == 15) {
315         /* In Thumb mode, we must ignore bit 0.
316          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
317          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
318          * We choose to ignore [1:0] in ARM mode for all architecture versions.
319          */
320         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
321         s->base.is_jmp = DISAS_JUMP;
322         s->pc_save = -1;
323     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
324         /* For M-profile SP bits [1:0] are always zero */
325         tcg_gen_andi_i32(var, var, ~3);
326     }
327     tcg_gen_mov_i32(cpu_R[reg], var);
328     tcg_temp_free_i32(var);
329 }
330 
331 /*
332  * Variant of store_reg which applies v8M stack-limit checks before updating
333  * SP. If the check fails this will result in an exception being taken.
334  * We disable the stack checks for CONFIG_USER_ONLY because we have
335  * no idea what the stack limits should be in that case.
336  * If stack checking is not being done this just acts like store_reg().
337  */
338 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
339 {
340 #ifndef CONFIG_USER_ONLY
341     if (s->v8m_stackcheck) {
342         gen_helper_v8m_stackcheck(cpu_env, var);
343     }
344 #endif
345     store_reg(s, 13, var);
346 }
347 
348 /* Value extensions.  */
349 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
350 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
351 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
352 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
353 
354 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
355 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
356 
357 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
358 {
359     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
360 }
361 
362 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
363 {
364     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
365 
366     if (new_el) {
367         if (m_profile) {
368             gen_helper_rebuild_hflags_m32_newel(cpu_env);
369         } else {
370             gen_helper_rebuild_hflags_a32_newel(cpu_env);
371         }
372     } else {
373         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
374         if (m_profile) {
375             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
376         } else {
377             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
378         }
379     }
380 }
381 
382 static void gen_exception_internal(int excp)
383 {
384     assert(excp_is_internal(excp));
385     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
386 }
387 
388 static void gen_singlestep_exception(DisasContext *s)
389 {
390     /* We just completed step of an insn. Move from Active-not-pending
391      * to Active-pending, and then also take the swstep exception.
392      * This corresponds to making the (IMPDEF) choice to prioritize
393      * swstep exceptions over asynchronous exceptions taken to an exception
394      * level where debug is disabled. This choice has the advantage that
395      * we do not need to maintain internal state corresponding to the
396      * ISV/EX syndrome bits between completion of the step and generation
397      * of the exception, and our syndrome information is always correct.
398      */
399     gen_ss_advance(s);
400     gen_swstep_exception(s, 1, s->is_ldex);
401     s->base.is_jmp = DISAS_NORETURN;
402 }
403 
404 void clear_eci_state(DisasContext *s)
405 {
406     /*
407      * Clear any ECI/ICI state: used when a load multiple/store
408      * multiple insn executes.
409      */
410     if (s->eci) {
411         store_cpu_field_constant(0, condexec_bits);
412         s->eci = 0;
413     }
414 }
415 
416 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
417 {
418     TCGv_i32 tmp1 = tcg_temp_new_i32();
419     TCGv_i32 tmp2 = tcg_temp_new_i32();
420     tcg_gen_ext16s_i32(tmp1, a);
421     tcg_gen_ext16s_i32(tmp2, b);
422     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
423     tcg_temp_free_i32(tmp2);
424     tcg_gen_sari_i32(a, a, 16);
425     tcg_gen_sari_i32(b, b, 16);
426     tcg_gen_mul_i32(b, b, a);
427     tcg_gen_mov_i32(a, tmp1);
428     tcg_temp_free_i32(tmp1);
429 }
430 
431 /* Byteswap each halfword.  */
432 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
433 {
434     TCGv_i32 tmp = tcg_temp_new_i32();
435     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
436     tcg_gen_shri_i32(tmp, var, 8);
437     tcg_gen_and_i32(tmp, tmp, mask);
438     tcg_gen_and_i32(var, var, mask);
439     tcg_gen_shli_i32(var, var, 8);
440     tcg_gen_or_i32(dest, var, tmp);
441     tcg_temp_free_i32(tmp);
442 }
443 
444 /* Byteswap low halfword and sign extend.  */
445 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
446 {
447     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
448 }
449 
450 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
451     tmp = (t0 ^ t1) & 0x8000;
452     t0 &= ~0x8000;
453     t1 &= ~0x8000;
454     t0 = (t0 + t1) ^ tmp;
455  */
456 
457 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
458 {
459     TCGv_i32 tmp = tcg_temp_new_i32();
460     tcg_gen_xor_i32(tmp, t0, t1);
461     tcg_gen_andi_i32(tmp, tmp, 0x8000);
462     tcg_gen_andi_i32(t0, t0, ~0x8000);
463     tcg_gen_andi_i32(t1, t1, ~0x8000);
464     tcg_gen_add_i32(t0, t0, t1);
465     tcg_gen_xor_i32(dest, t0, tmp);
466     tcg_temp_free_i32(tmp);
467 }
468 
469 /* Set N and Z flags from var.  */
470 static inline void gen_logic_CC(TCGv_i32 var)
471 {
472     tcg_gen_mov_i32(cpu_NF, var);
473     tcg_gen_mov_i32(cpu_ZF, var);
474 }
475 
476 /* dest = T0 + T1 + CF. */
477 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
478 {
479     tcg_gen_add_i32(dest, t0, t1);
480     tcg_gen_add_i32(dest, dest, cpu_CF);
481 }
482 
483 /* dest = T0 - T1 + CF - 1.  */
484 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
485 {
486     tcg_gen_sub_i32(dest, t0, t1);
487     tcg_gen_add_i32(dest, dest, cpu_CF);
488     tcg_gen_subi_i32(dest, dest, 1);
489 }
490 
491 /* dest = T0 + T1. Compute C, N, V and Z flags */
492 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
493 {
494     TCGv_i32 tmp = tcg_temp_new_i32();
495     tcg_gen_movi_i32(tmp, 0);
496     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
497     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
499     tcg_gen_xor_i32(tmp, t0, t1);
500     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
501     tcg_temp_free_i32(tmp);
502     tcg_gen_mov_i32(dest, cpu_NF);
503 }
504 
505 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
506 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
507 {
508     TCGv_i32 tmp = tcg_temp_new_i32();
509     if (TCG_TARGET_HAS_add2_i32) {
510         tcg_gen_movi_i32(tmp, 0);
511         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
512         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
513     } else {
514         TCGv_i64 q0 = tcg_temp_new_i64();
515         TCGv_i64 q1 = tcg_temp_new_i64();
516         tcg_gen_extu_i32_i64(q0, t0);
517         tcg_gen_extu_i32_i64(q1, t1);
518         tcg_gen_add_i64(q0, q0, q1);
519         tcg_gen_extu_i32_i64(q1, cpu_CF);
520         tcg_gen_add_i64(q0, q0, q1);
521         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
522         tcg_temp_free_i64(q0);
523         tcg_temp_free_i64(q1);
524     }
525     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
526     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
527     tcg_gen_xor_i32(tmp, t0, t1);
528     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
529     tcg_temp_free_i32(tmp);
530     tcg_gen_mov_i32(dest, cpu_NF);
531 }
532 
533 /* dest = T0 - T1. Compute C, N, V and Z flags */
534 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
535 {
536     TCGv_i32 tmp;
537     tcg_gen_sub_i32(cpu_NF, t0, t1);
538     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
539     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
540     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
541     tmp = tcg_temp_new_i32();
542     tcg_gen_xor_i32(tmp, t0, t1);
543     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
544     tcg_temp_free_i32(tmp);
545     tcg_gen_mov_i32(dest, cpu_NF);
546 }
547 
548 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
549 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
550 {
551     TCGv_i32 tmp = tcg_temp_new_i32();
552     tcg_gen_not_i32(tmp, t1);
553     gen_adc_CC(dest, t0, tmp);
554     tcg_temp_free_i32(tmp);
555 }
556 
557 #define GEN_SHIFT(name)                                               \
558 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
559 {                                                                     \
560     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
561     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
562     TCGv_i32 zero = tcg_constant_i32(0);                              \
563     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
564     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
565     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
566     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
567     tcg_temp_free_i32(tmpd);                                          \
568     tcg_temp_free_i32(tmp1);                                          \
569 }
570 GEN_SHIFT(shl)
571 GEN_SHIFT(shr)
572 #undef GEN_SHIFT
573 
574 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
575 {
576     TCGv_i32 tmp1 = tcg_temp_new_i32();
577 
578     tcg_gen_andi_i32(tmp1, t1, 0xff);
579     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
580     tcg_gen_sar_i32(dest, t0, tmp1);
581     tcg_temp_free_i32(tmp1);
582 }
583 
584 static void shifter_out_im(TCGv_i32 var, int shift)
585 {
586     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
587 }
588 
589 /* Shift by immediate.  Includes special handling for shift == 0.  */
590 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
591                                     int shift, int flags)
592 {
593     switch (shiftop) {
594     case 0: /* LSL */
595         if (shift != 0) {
596             if (flags)
597                 shifter_out_im(var, 32 - shift);
598             tcg_gen_shli_i32(var, var, shift);
599         }
600         break;
601     case 1: /* LSR */
602         if (shift == 0) {
603             if (flags) {
604                 tcg_gen_shri_i32(cpu_CF, var, 31);
605             }
606             tcg_gen_movi_i32(var, 0);
607         } else {
608             if (flags)
609                 shifter_out_im(var, shift - 1);
610             tcg_gen_shri_i32(var, var, shift);
611         }
612         break;
613     case 2: /* ASR */
614         if (shift == 0)
615             shift = 32;
616         if (flags)
617             shifter_out_im(var, shift - 1);
618         if (shift == 32)
619           shift = 31;
620         tcg_gen_sari_i32(var, var, shift);
621         break;
622     case 3: /* ROR/RRX */
623         if (shift != 0) {
624             if (flags)
625                 shifter_out_im(var, shift - 1);
626             tcg_gen_rotri_i32(var, var, shift); break;
627         } else {
628             TCGv_i32 tmp = tcg_temp_new_i32();
629             tcg_gen_shli_i32(tmp, cpu_CF, 31);
630             if (flags)
631                 shifter_out_im(var, 0);
632             tcg_gen_shri_i32(var, var, 1);
633             tcg_gen_or_i32(var, var, tmp);
634             tcg_temp_free_i32(tmp);
635         }
636     }
637 };
638 
639 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
640                                      TCGv_i32 shift, int flags)
641 {
642     if (flags) {
643         switch (shiftop) {
644         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
645         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
646         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
647         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
648         }
649     } else {
650         switch (shiftop) {
651         case 0:
652             gen_shl(var, var, shift);
653             break;
654         case 1:
655             gen_shr(var, var, shift);
656             break;
657         case 2:
658             gen_sar(var, var, shift);
659             break;
660         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
661                 tcg_gen_rotr_i32(var, var, shift); break;
662         }
663     }
664     tcg_temp_free_i32(shift);
665 }
666 
667 /*
668  * Generate a conditional based on ARM condition code cc.
669  * This is common between ARM and Aarch64 targets.
670  */
671 void arm_test_cc(DisasCompare *cmp, int cc)
672 {
673     TCGv_i32 value;
674     TCGCond cond;
675 
676     switch (cc) {
677     case 0: /* eq: Z */
678     case 1: /* ne: !Z */
679         cond = TCG_COND_EQ;
680         value = cpu_ZF;
681         break;
682 
683     case 2: /* cs: C */
684     case 3: /* cc: !C */
685         cond = TCG_COND_NE;
686         value = cpu_CF;
687         break;
688 
689     case 4: /* mi: N */
690     case 5: /* pl: !N */
691         cond = TCG_COND_LT;
692         value = cpu_NF;
693         break;
694 
695     case 6: /* vs: V */
696     case 7: /* vc: !V */
697         cond = TCG_COND_LT;
698         value = cpu_VF;
699         break;
700 
701     case 8: /* hi: C && !Z */
702     case 9: /* ls: !C || Z -> !(C && !Z) */
703         cond = TCG_COND_NE;
704         value = tcg_temp_new_i32();
705         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
706            ZF is non-zero for !Z; so AND the two subexpressions.  */
707         tcg_gen_neg_i32(value, cpu_CF);
708         tcg_gen_and_i32(value, value, cpu_ZF);
709         break;
710 
711     case 10: /* ge: N == V -> N ^ V == 0 */
712     case 11: /* lt: N != V -> N ^ V != 0 */
713         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
714         cond = TCG_COND_GE;
715         value = tcg_temp_new_i32();
716         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
717         break;
718 
719     case 12: /* gt: !Z && N == V */
720     case 13: /* le: Z || N != V */
721         cond = TCG_COND_NE;
722         value = tcg_temp_new_i32();
723         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
724          * the sign bit then AND with ZF to yield the result.  */
725         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
726         tcg_gen_sari_i32(value, value, 31);
727         tcg_gen_andc_i32(value, cpu_ZF, value);
728         break;
729 
730     case 14: /* always */
731     case 15: /* always */
732         /* Use the ALWAYS condition, which will fold early.
733          * It doesn't matter what we use for the value.  */
734         cond = TCG_COND_ALWAYS;
735         value = cpu_ZF;
736         goto no_invert;
737 
738     default:
739         fprintf(stderr, "Bad condition code 0x%x\n", cc);
740         abort();
741     }
742 
743     if (cc & 1) {
744         cond = tcg_invert_cond(cond);
745     }
746 
747  no_invert:
748     cmp->cond = cond;
749     cmp->value = value;
750 }
751 
752 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
753 {
754     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
755 }
756 
757 void arm_gen_test_cc(int cc, TCGLabel *label)
758 {
759     DisasCompare cmp;
760     arm_test_cc(&cmp, cc);
761     arm_jump_cc(&cmp, label);
762 }
763 
764 void gen_set_condexec(DisasContext *s)
765 {
766     if (s->condexec_mask) {
767         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
768 
769         store_cpu_field_constant(val, condexec_bits);
770     }
771 }
772 
773 void gen_update_pc(DisasContext *s, target_long diff)
774 {
775     gen_pc_plus_diff(s, cpu_R[15], diff);
776     s->pc_save = s->pc_curr + diff;
777 }
778 
779 /* Set PC and Thumb state from var.  var is marked as dead.  */
780 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
781 {
782     s->base.is_jmp = DISAS_JUMP;
783     tcg_gen_andi_i32(cpu_R[15], var, ~1);
784     tcg_gen_andi_i32(var, var, 1);
785     store_cpu_field(var, thumb);
786     s->pc_save = -1;
787 }
788 
789 /*
790  * Set PC and Thumb state from var. var is marked as dead.
791  * For M-profile CPUs, include logic to detect exception-return
792  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
793  * and BX reg, and no others, and happens only for code in Handler mode.
794  * The Security Extension also requires us to check for the FNC_RETURN
795  * which signals a function return from non-secure state; this can happen
796  * in both Handler and Thread mode.
797  * To avoid having to do multiple comparisons in inline generated code,
798  * we make the check we do here loose, so it will match for EXC_RETURN
799  * in Thread mode. For system emulation do_v7m_exception_exit() checks
800  * for these spurious cases and returns without doing anything (giving
801  * the same behaviour as for a branch to a non-magic address).
802  *
803  * In linux-user mode it is unclear what the right behaviour for an
804  * attempted FNC_RETURN should be, because in real hardware this will go
805  * directly to Secure code (ie not the Linux kernel) which will then treat
806  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
807  * attempt behave the way it would on a CPU without the security extension,
808  * which is to say "like a normal branch". That means we can simply treat
809  * all branches as normal with no magic address behaviour.
810  */
811 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
812 {
813     /* Generate the same code here as for a simple bx, but flag via
814      * s->base.is_jmp that we need to do the rest of the work later.
815      */
816     gen_bx(s, var);
817 #ifndef CONFIG_USER_ONLY
818     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
819         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
820         s->base.is_jmp = DISAS_BX_EXCRET;
821     }
822 #endif
823 }
824 
825 static inline void gen_bx_excret_final_code(DisasContext *s)
826 {
827     /* Generate the code to finish possible exception return and end the TB */
828     DisasLabel excret_label = gen_disas_label(s);
829     uint32_t min_magic;
830 
831     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
832         /* Covers FNC_RETURN and EXC_RETURN magic */
833         min_magic = FNC_RETURN_MIN_MAGIC;
834     } else {
835         /* EXC_RETURN magic only */
836         min_magic = EXC_RETURN_MIN_MAGIC;
837     }
838 
839     /* Is the new PC value in the magic range indicating exception return? */
840     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
841     /* No: end the TB as we would for a DISAS_JMP */
842     if (s->ss_active) {
843         gen_singlestep_exception(s);
844     } else {
845         tcg_gen_exit_tb(NULL, 0);
846     }
847     set_disas_label(s, excret_label);
848     /* Yes: this is an exception return.
849      * At this point in runtime env->regs[15] and env->thumb will hold
850      * the exception-return magic number, which do_v7m_exception_exit()
851      * will read. Nothing else will be able to see those values because
852      * the cpu-exec main loop guarantees that we will always go straight
853      * from raising the exception to the exception-handling code.
854      *
855      * gen_ss_advance(s) does nothing on M profile currently but
856      * calling it is conceptually the right thing as we have executed
857      * this instruction (compare SWI, HVC, SMC handling).
858      */
859     gen_ss_advance(s);
860     gen_exception_internal(EXCP_EXCEPTION_EXIT);
861 }
862 
863 static inline void gen_bxns(DisasContext *s, int rm)
864 {
865     TCGv_i32 var = load_reg(s, rm);
866 
867     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
868      * we need to sync state before calling it, but:
869      *  - we don't need to do gen_update_pc() because the bxns helper will
870      *    always set the PC itself
871      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
872      *    unless it's outside an IT block or the last insn in an IT block,
873      *    so we know that condexec == 0 (already set at the top of the TB)
874      *    is correct in the non-UNPREDICTABLE cases, and we can choose
875      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
876      */
877     gen_helper_v7m_bxns(cpu_env, var);
878     tcg_temp_free_i32(var);
879     s->base.is_jmp = DISAS_EXIT;
880 }
881 
882 static inline void gen_blxns(DisasContext *s, int rm)
883 {
884     TCGv_i32 var = load_reg(s, rm);
885 
886     /* We don't need to sync condexec state, for the same reason as bxns.
887      * We do however need to set the PC, because the blxns helper reads it.
888      * The blxns helper may throw an exception.
889      */
890     gen_update_pc(s, curr_insn_len(s));
891     gen_helper_v7m_blxns(cpu_env, var);
892     tcg_temp_free_i32(var);
893     s->base.is_jmp = DISAS_EXIT;
894 }
895 
896 /* Variant of store_reg which uses branch&exchange logic when storing
897    to r15 in ARM architecture v7 and above. The source must be a temporary
898    and will be marked as dead. */
899 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
900 {
901     if (reg == 15 && ENABLE_ARCH_7) {
902         gen_bx(s, var);
903     } else {
904         store_reg(s, reg, var);
905     }
906 }
907 
908 /* Variant of store_reg which uses branch&exchange logic when storing
909  * to r15 in ARM architecture v5T and above. This is used for storing
910  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
911  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
912 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
913 {
914     if (reg == 15 && ENABLE_ARCH_5) {
915         gen_bx_excret(s, var);
916     } else {
917         store_reg(s, reg, var);
918     }
919 }
920 
921 #ifdef CONFIG_USER_ONLY
922 #define IS_USER_ONLY 1
923 #else
924 #define IS_USER_ONLY 0
925 #endif
926 
927 MemOp pow2_align(unsigned i)
928 {
929     static const MemOp mop_align[] = {
930         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
931         /*
932          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
933          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
934          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
935          */
936         MO_ALIGN_16
937     };
938     g_assert(i < ARRAY_SIZE(mop_align));
939     return mop_align[i];
940 }
941 
942 /*
943  * Abstractions of "generate code to do a guest load/store for
944  * AArch32", where a vaddr is always 32 bits (and is zero
945  * extended if we're a 64 bit core) and  data is also
946  * 32 bits unless specifically doing a 64 bit access.
947  * These functions work like tcg_gen_qemu_{ld,st}* except
948  * that the address argument is TCGv_i32 rather than TCGv.
949  */
950 
951 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
952 {
953     TCGv addr = tcg_temp_new();
954     tcg_gen_extu_i32_tl(addr, a32);
955 
956     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
957     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
958         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
959     }
960     return addr;
961 }
962 
963 /*
964  * Internal routines are used for NEON cases where the endianness
965  * and/or alignment has already been taken into account and manipulated.
966  */
967 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971     tcg_gen_qemu_ld_i32(val, addr, index, opc);
972     tcg_temp_free(addr);
973 }
974 
975 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
976                               TCGv_i32 a32, int index, MemOp opc)
977 {
978     TCGv addr = gen_aa32_addr(s, a32, opc);
979     tcg_gen_qemu_st_i32(val, addr, index, opc);
980     tcg_temp_free(addr);
981 }
982 
983 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
984                               TCGv_i32 a32, int index, MemOp opc)
985 {
986     TCGv addr = gen_aa32_addr(s, a32, opc);
987 
988     tcg_gen_qemu_ld_i64(val, addr, index, opc);
989 
990     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
991     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
992         tcg_gen_rotri_i64(val, val, 32);
993     }
994     tcg_temp_free(addr);
995 }
996 
997 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
998                               TCGv_i32 a32, int index, MemOp opc)
999 {
1000     TCGv addr = gen_aa32_addr(s, a32, opc);
1001 
1002     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1003     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1004         TCGv_i64 tmp = tcg_temp_new_i64();
1005         tcg_gen_rotri_i64(tmp, val, 32);
1006         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1007         tcg_temp_free_i64(tmp);
1008     } else {
1009         tcg_gen_qemu_st_i64(val, addr, index, opc);
1010     }
1011     tcg_temp_free(addr);
1012 }
1013 
1014 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1015                      int index, MemOp opc)
1016 {
1017     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1018 }
1019 
1020 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1021                      int index, MemOp opc)
1022 {
1023     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1024 }
1025 
1026 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1027                      int index, MemOp opc)
1028 {
1029     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1030 }
1031 
1032 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1033                      int index, MemOp opc)
1034 {
1035     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1036 }
1037 
1038 #define DO_GEN_LD(SUFF, OPC)                                            \
1039     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1040                                          TCGv_i32 a32, int index)       \
1041     {                                                                   \
1042         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1043     }
1044 
1045 #define DO_GEN_ST(SUFF, OPC)                                            \
1046     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1047                                          TCGv_i32 a32, int index)       \
1048     {                                                                   \
1049         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1050     }
1051 
1052 static inline void gen_hvc(DisasContext *s, int imm16)
1053 {
1054     /* The pre HVC helper handles cases when HVC gets trapped
1055      * as an undefined insn by runtime configuration (ie before
1056      * the insn really executes).
1057      */
1058     gen_update_pc(s, 0);
1059     gen_helper_pre_hvc(cpu_env);
1060     /* Otherwise we will treat this as a real exception which
1061      * happens after execution of the insn. (The distinction matters
1062      * for the PC value reported to the exception handler and also
1063      * for single stepping.)
1064      */
1065     s->svc_imm = imm16;
1066     gen_update_pc(s, curr_insn_len(s));
1067     s->base.is_jmp = DISAS_HVC;
1068 }
1069 
1070 static inline void gen_smc(DisasContext *s)
1071 {
1072     /* As with HVC, we may take an exception either before or after
1073      * the insn executes.
1074      */
1075     gen_update_pc(s, 0);
1076     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1077     gen_update_pc(s, curr_insn_len(s));
1078     s->base.is_jmp = DISAS_SMC;
1079 }
1080 
1081 static void gen_exception_internal_insn(DisasContext *s, int excp)
1082 {
1083     gen_set_condexec(s);
1084     gen_update_pc(s, 0);
1085     gen_exception_internal(excp);
1086     s->base.is_jmp = DISAS_NORETURN;
1087 }
1088 
1089 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1090 {
1091     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1092                                           tcg_constant_i32(syndrome), tcg_el);
1093 }
1094 
1095 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1096 {
1097     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1098 }
1099 
1100 static void gen_exception(int excp, uint32_t syndrome)
1101 {
1102     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1103                                        tcg_constant_i32(syndrome));
1104 }
1105 
1106 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1107                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1108 {
1109     if (s->aarch64) {
1110         gen_a64_update_pc(s, pc_diff);
1111     } else {
1112         gen_set_condexec(s);
1113         gen_update_pc(s, pc_diff);
1114     }
1115     gen_exception_el_v(excp, syn, tcg_el);
1116     s->base.is_jmp = DISAS_NORETURN;
1117 }
1118 
1119 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1120                            uint32_t syn, uint32_t target_el)
1121 {
1122     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1123                             tcg_constant_i32(target_el));
1124 }
1125 
1126 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1127                         int excp, uint32_t syn)
1128 {
1129     if (s->aarch64) {
1130         gen_a64_update_pc(s, pc_diff);
1131     } else {
1132         gen_set_condexec(s);
1133         gen_update_pc(s, pc_diff);
1134     }
1135     gen_exception(excp, syn);
1136     s->base.is_jmp = DISAS_NORETURN;
1137 }
1138 
1139 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1140 {
1141     gen_set_condexec(s);
1142     gen_update_pc(s, 0);
1143     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1144     s->base.is_jmp = DISAS_NORETURN;
1145 }
1146 
1147 void unallocated_encoding(DisasContext *s)
1148 {
1149     /* Unallocated and reserved encodings are uncategorized */
1150     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1151 }
1152 
1153 /* Force a TB lookup after an instruction that changes the CPU state.  */
1154 void gen_lookup_tb(DisasContext *s)
1155 {
1156     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1157     s->base.is_jmp = DISAS_EXIT;
1158 }
1159 
1160 static inline void gen_hlt(DisasContext *s, int imm)
1161 {
1162     /* HLT. This has two purposes.
1163      * Architecturally, it is an external halting debug instruction.
1164      * Since QEMU doesn't implement external debug, we treat this as
1165      * it is required for halting debug disabled: it will UNDEF.
1166      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1167      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1168      * must trigger semihosting even for ARMv7 and earlier, where
1169      * HLT was an undefined encoding.
1170      * In system mode, we don't allow userspace access to
1171      * semihosting, to provide some semblance of security
1172      * (and for consistency with our 32-bit semihosting).
1173      */
1174     if (semihosting_enabled(s->current_el == 0) &&
1175         (imm == (s->thumb ? 0x3c : 0xf000))) {
1176         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1177         return;
1178     }
1179 
1180     unallocated_encoding(s);
1181 }
1182 
1183 /*
1184  * Return the offset of a "full" NEON Dreg.
1185  */
1186 long neon_full_reg_offset(unsigned reg)
1187 {
1188     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1189 }
1190 
1191 /*
1192  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1193  * where 0 is the least significant end of the register.
1194  */
1195 long neon_element_offset(int reg, int element, MemOp memop)
1196 {
1197     int element_size = 1 << (memop & MO_SIZE);
1198     int ofs = element * element_size;
1199 #if HOST_BIG_ENDIAN
1200     /*
1201      * Calculate the offset assuming fully little-endian,
1202      * then XOR to account for the order of the 8-byte units.
1203      */
1204     if (element_size < 8) {
1205         ofs ^= 8 - element_size;
1206     }
1207 #endif
1208     return neon_full_reg_offset(reg) + ofs;
1209 }
1210 
1211 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1212 long vfp_reg_offset(bool dp, unsigned reg)
1213 {
1214     if (dp) {
1215         return neon_element_offset(reg, 0, MO_64);
1216     } else {
1217         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1218     }
1219 }
1220 
1221 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1222 {
1223     long off = neon_element_offset(reg, ele, memop);
1224 
1225     switch (memop) {
1226     case MO_SB:
1227         tcg_gen_ld8s_i32(dest, cpu_env, off);
1228         break;
1229     case MO_UB:
1230         tcg_gen_ld8u_i32(dest, cpu_env, off);
1231         break;
1232     case MO_SW:
1233         tcg_gen_ld16s_i32(dest, cpu_env, off);
1234         break;
1235     case MO_UW:
1236         tcg_gen_ld16u_i32(dest, cpu_env, off);
1237         break;
1238     case MO_UL:
1239     case MO_SL:
1240         tcg_gen_ld_i32(dest, cpu_env, off);
1241         break;
1242     default:
1243         g_assert_not_reached();
1244     }
1245 }
1246 
1247 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1248 {
1249     long off = neon_element_offset(reg, ele, memop);
1250 
1251     switch (memop) {
1252     case MO_SL:
1253         tcg_gen_ld32s_i64(dest, cpu_env, off);
1254         break;
1255     case MO_UL:
1256         tcg_gen_ld32u_i64(dest, cpu_env, off);
1257         break;
1258     case MO_UQ:
1259         tcg_gen_ld_i64(dest, cpu_env, off);
1260         break;
1261     default:
1262         g_assert_not_reached();
1263     }
1264 }
1265 
1266 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1267 {
1268     long off = neon_element_offset(reg, ele, memop);
1269 
1270     switch (memop) {
1271     case MO_8:
1272         tcg_gen_st8_i32(src, cpu_env, off);
1273         break;
1274     case MO_16:
1275         tcg_gen_st16_i32(src, cpu_env, off);
1276         break;
1277     case MO_32:
1278         tcg_gen_st_i32(src, cpu_env, off);
1279         break;
1280     default:
1281         g_assert_not_reached();
1282     }
1283 }
1284 
1285 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1286 {
1287     long off = neon_element_offset(reg, ele, memop);
1288 
1289     switch (memop) {
1290     case MO_32:
1291         tcg_gen_st32_i64(src, cpu_env, off);
1292         break;
1293     case MO_64:
1294         tcg_gen_st_i64(src, cpu_env, off);
1295         break;
1296     default:
1297         g_assert_not_reached();
1298     }
1299 }
1300 
1301 #define ARM_CP_RW_BIT   (1 << 20)
1302 
1303 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1304 {
1305     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1306 }
1307 
1308 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1309 {
1310     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1311 }
1312 
1313 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1314 {
1315     TCGv_i32 var = tcg_temp_new_i32();
1316     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1317     return var;
1318 }
1319 
1320 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1321 {
1322     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1323     tcg_temp_free_i32(var);
1324 }
1325 
1326 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1327 {
1328     iwmmxt_store_reg(cpu_M0, rn);
1329 }
1330 
1331 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1332 {
1333     iwmmxt_load_reg(cpu_M0, rn);
1334 }
1335 
1336 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1337 {
1338     iwmmxt_load_reg(cpu_V1, rn);
1339     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1340 }
1341 
1342 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1343 {
1344     iwmmxt_load_reg(cpu_V1, rn);
1345     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1346 }
1347 
1348 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_V1, rn);
1351     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1352 }
1353 
1354 #define IWMMXT_OP(name) \
1355 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1356 { \
1357     iwmmxt_load_reg(cpu_V1, rn); \
1358     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1359 }
1360 
1361 #define IWMMXT_OP_ENV(name) \
1362 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1363 { \
1364     iwmmxt_load_reg(cpu_V1, rn); \
1365     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1366 }
1367 
1368 #define IWMMXT_OP_ENV_SIZE(name) \
1369 IWMMXT_OP_ENV(name##b) \
1370 IWMMXT_OP_ENV(name##w) \
1371 IWMMXT_OP_ENV(name##l)
1372 
1373 #define IWMMXT_OP_ENV1(name) \
1374 static inline void gen_op_iwmmxt_##name##_M0(void) \
1375 { \
1376     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1377 }
1378 
1379 IWMMXT_OP(maddsq)
1380 IWMMXT_OP(madduq)
1381 IWMMXT_OP(sadb)
1382 IWMMXT_OP(sadw)
1383 IWMMXT_OP(mulslw)
1384 IWMMXT_OP(mulshw)
1385 IWMMXT_OP(mululw)
1386 IWMMXT_OP(muluhw)
1387 IWMMXT_OP(macsw)
1388 IWMMXT_OP(macuw)
1389 
1390 IWMMXT_OP_ENV_SIZE(unpackl)
1391 IWMMXT_OP_ENV_SIZE(unpackh)
1392 
1393 IWMMXT_OP_ENV1(unpacklub)
1394 IWMMXT_OP_ENV1(unpackluw)
1395 IWMMXT_OP_ENV1(unpacklul)
1396 IWMMXT_OP_ENV1(unpackhub)
1397 IWMMXT_OP_ENV1(unpackhuw)
1398 IWMMXT_OP_ENV1(unpackhul)
1399 IWMMXT_OP_ENV1(unpacklsb)
1400 IWMMXT_OP_ENV1(unpacklsw)
1401 IWMMXT_OP_ENV1(unpacklsl)
1402 IWMMXT_OP_ENV1(unpackhsb)
1403 IWMMXT_OP_ENV1(unpackhsw)
1404 IWMMXT_OP_ENV1(unpackhsl)
1405 
1406 IWMMXT_OP_ENV_SIZE(cmpeq)
1407 IWMMXT_OP_ENV_SIZE(cmpgtu)
1408 IWMMXT_OP_ENV_SIZE(cmpgts)
1409 
1410 IWMMXT_OP_ENV_SIZE(mins)
1411 IWMMXT_OP_ENV_SIZE(minu)
1412 IWMMXT_OP_ENV_SIZE(maxs)
1413 IWMMXT_OP_ENV_SIZE(maxu)
1414 
1415 IWMMXT_OP_ENV_SIZE(subn)
1416 IWMMXT_OP_ENV_SIZE(addn)
1417 IWMMXT_OP_ENV_SIZE(subu)
1418 IWMMXT_OP_ENV_SIZE(addu)
1419 IWMMXT_OP_ENV_SIZE(subs)
1420 IWMMXT_OP_ENV_SIZE(adds)
1421 
1422 IWMMXT_OP_ENV(avgb0)
1423 IWMMXT_OP_ENV(avgb1)
1424 IWMMXT_OP_ENV(avgw0)
1425 IWMMXT_OP_ENV(avgw1)
1426 
1427 IWMMXT_OP_ENV(packuw)
1428 IWMMXT_OP_ENV(packul)
1429 IWMMXT_OP_ENV(packuq)
1430 IWMMXT_OP_ENV(packsw)
1431 IWMMXT_OP_ENV(packsl)
1432 IWMMXT_OP_ENV(packsq)
1433 
1434 static void gen_op_iwmmxt_set_mup(void)
1435 {
1436     TCGv_i32 tmp;
1437     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1438     tcg_gen_ori_i32(tmp, tmp, 2);
1439     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1440 }
1441 
1442 static void gen_op_iwmmxt_set_cup(void)
1443 {
1444     TCGv_i32 tmp;
1445     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1446     tcg_gen_ori_i32(tmp, tmp, 1);
1447     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1448 }
1449 
1450 static void gen_op_iwmmxt_setpsr_nz(void)
1451 {
1452     TCGv_i32 tmp = tcg_temp_new_i32();
1453     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1454     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1455 }
1456 
1457 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1458 {
1459     iwmmxt_load_reg(cpu_V1, rn);
1460     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1461     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1462 }
1463 
1464 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1465                                      TCGv_i32 dest)
1466 {
1467     int rd;
1468     uint32_t offset;
1469     TCGv_i32 tmp;
1470 
1471     rd = (insn >> 16) & 0xf;
1472     tmp = load_reg(s, rd);
1473 
1474     offset = (insn & 0xff) << ((insn >> 7) & 2);
1475     if (insn & (1 << 24)) {
1476         /* Pre indexed */
1477         if (insn & (1 << 23))
1478             tcg_gen_addi_i32(tmp, tmp, offset);
1479         else
1480             tcg_gen_addi_i32(tmp, tmp, -offset);
1481         tcg_gen_mov_i32(dest, tmp);
1482         if (insn & (1 << 21))
1483             store_reg(s, rd, tmp);
1484         else
1485             tcg_temp_free_i32(tmp);
1486     } else if (insn & (1 << 21)) {
1487         /* Post indexed */
1488         tcg_gen_mov_i32(dest, tmp);
1489         if (insn & (1 << 23))
1490             tcg_gen_addi_i32(tmp, tmp, offset);
1491         else
1492             tcg_gen_addi_i32(tmp, tmp, -offset);
1493         store_reg(s, rd, tmp);
1494     } else if (!(insn & (1 << 23)))
1495         return 1;
1496     return 0;
1497 }
1498 
1499 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1500 {
1501     int rd = (insn >> 0) & 0xf;
1502     TCGv_i32 tmp;
1503 
1504     if (insn & (1 << 8)) {
1505         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1506             return 1;
1507         } else {
1508             tmp = iwmmxt_load_creg(rd);
1509         }
1510     } else {
1511         tmp = tcg_temp_new_i32();
1512         iwmmxt_load_reg(cpu_V0, rd);
1513         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1514     }
1515     tcg_gen_andi_i32(tmp, tmp, mask);
1516     tcg_gen_mov_i32(dest, tmp);
1517     tcg_temp_free_i32(tmp);
1518     return 0;
1519 }
1520 
1521 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1522    (ie. an undefined instruction).  */
1523 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1524 {
1525     int rd, wrd;
1526     int rdhi, rdlo, rd0, rd1, i;
1527     TCGv_i32 addr;
1528     TCGv_i32 tmp, tmp2, tmp3;
1529 
1530     if ((insn & 0x0e000e00) == 0x0c000000) {
1531         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1532             wrd = insn & 0xf;
1533             rdlo = (insn >> 12) & 0xf;
1534             rdhi = (insn >> 16) & 0xf;
1535             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1536                 iwmmxt_load_reg(cpu_V0, wrd);
1537                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1538                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1539             } else {                                    /* TMCRR */
1540                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1541                 iwmmxt_store_reg(cpu_V0, wrd);
1542                 gen_op_iwmmxt_set_mup();
1543             }
1544             return 0;
1545         }
1546 
1547         wrd = (insn >> 12) & 0xf;
1548         addr = tcg_temp_new_i32();
1549         if (gen_iwmmxt_address(s, insn, addr)) {
1550             tcg_temp_free_i32(addr);
1551             return 1;
1552         }
1553         if (insn & ARM_CP_RW_BIT) {
1554             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1555                 tmp = tcg_temp_new_i32();
1556                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1557                 iwmmxt_store_creg(wrd, tmp);
1558             } else {
1559                 i = 1;
1560                 if (insn & (1 << 8)) {
1561                     if (insn & (1 << 22)) {             /* WLDRD */
1562                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1563                         i = 0;
1564                     } else {                            /* WLDRW wRd */
1565                         tmp = tcg_temp_new_i32();
1566                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1567                     }
1568                 } else {
1569                     tmp = tcg_temp_new_i32();
1570                     if (insn & (1 << 22)) {             /* WLDRH */
1571                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1572                     } else {                            /* WLDRB */
1573                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1574                     }
1575                 }
1576                 if (i) {
1577                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1578                     tcg_temp_free_i32(tmp);
1579                 }
1580                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1581             }
1582         } else {
1583             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1584                 tmp = iwmmxt_load_creg(wrd);
1585                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1586             } else {
1587                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1588                 tmp = tcg_temp_new_i32();
1589                 if (insn & (1 << 8)) {
1590                     if (insn & (1 << 22)) {             /* WSTRD */
1591                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1592                     } else {                            /* WSTRW wRd */
1593                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1594                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1595                     }
1596                 } else {
1597                     if (insn & (1 << 22)) {             /* WSTRH */
1598                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1599                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1600                     } else {                            /* WSTRB */
1601                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1602                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1603                     }
1604                 }
1605             }
1606             tcg_temp_free_i32(tmp);
1607         }
1608         tcg_temp_free_i32(addr);
1609         return 0;
1610     }
1611 
1612     if ((insn & 0x0f000000) != 0x0e000000)
1613         return 1;
1614 
1615     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1616     case 0x000:                                                 /* WOR */
1617         wrd = (insn >> 12) & 0xf;
1618         rd0 = (insn >> 0) & 0xf;
1619         rd1 = (insn >> 16) & 0xf;
1620         gen_op_iwmmxt_movq_M0_wRn(rd0);
1621         gen_op_iwmmxt_orq_M0_wRn(rd1);
1622         gen_op_iwmmxt_setpsr_nz();
1623         gen_op_iwmmxt_movq_wRn_M0(wrd);
1624         gen_op_iwmmxt_set_mup();
1625         gen_op_iwmmxt_set_cup();
1626         break;
1627     case 0x011:                                                 /* TMCR */
1628         if (insn & 0xf)
1629             return 1;
1630         rd = (insn >> 12) & 0xf;
1631         wrd = (insn >> 16) & 0xf;
1632         switch (wrd) {
1633         case ARM_IWMMXT_wCID:
1634         case ARM_IWMMXT_wCASF:
1635             break;
1636         case ARM_IWMMXT_wCon:
1637             gen_op_iwmmxt_set_cup();
1638             /* Fall through.  */
1639         case ARM_IWMMXT_wCSSF:
1640             tmp = iwmmxt_load_creg(wrd);
1641             tmp2 = load_reg(s, rd);
1642             tcg_gen_andc_i32(tmp, tmp, tmp2);
1643             tcg_temp_free_i32(tmp2);
1644             iwmmxt_store_creg(wrd, tmp);
1645             break;
1646         case ARM_IWMMXT_wCGR0:
1647         case ARM_IWMMXT_wCGR1:
1648         case ARM_IWMMXT_wCGR2:
1649         case ARM_IWMMXT_wCGR3:
1650             gen_op_iwmmxt_set_cup();
1651             tmp = load_reg(s, rd);
1652             iwmmxt_store_creg(wrd, tmp);
1653             break;
1654         default:
1655             return 1;
1656         }
1657         break;
1658     case 0x100:                                                 /* WXOR */
1659         wrd = (insn >> 12) & 0xf;
1660         rd0 = (insn >> 0) & 0xf;
1661         rd1 = (insn >> 16) & 0xf;
1662         gen_op_iwmmxt_movq_M0_wRn(rd0);
1663         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1664         gen_op_iwmmxt_setpsr_nz();
1665         gen_op_iwmmxt_movq_wRn_M0(wrd);
1666         gen_op_iwmmxt_set_mup();
1667         gen_op_iwmmxt_set_cup();
1668         break;
1669     case 0x111:                                                 /* TMRC */
1670         if (insn & 0xf)
1671             return 1;
1672         rd = (insn >> 12) & 0xf;
1673         wrd = (insn >> 16) & 0xf;
1674         tmp = iwmmxt_load_creg(wrd);
1675         store_reg(s, rd, tmp);
1676         break;
1677     case 0x300:                                                 /* WANDN */
1678         wrd = (insn >> 12) & 0xf;
1679         rd0 = (insn >> 0) & 0xf;
1680         rd1 = (insn >> 16) & 0xf;
1681         gen_op_iwmmxt_movq_M0_wRn(rd0);
1682         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1683         gen_op_iwmmxt_andq_M0_wRn(rd1);
1684         gen_op_iwmmxt_setpsr_nz();
1685         gen_op_iwmmxt_movq_wRn_M0(wrd);
1686         gen_op_iwmmxt_set_mup();
1687         gen_op_iwmmxt_set_cup();
1688         break;
1689     case 0x200:                                                 /* WAND */
1690         wrd = (insn >> 12) & 0xf;
1691         rd0 = (insn >> 0) & 0xf;
1692         rd1 = (insn >> 16) & 0xf;
1693         gen_op_iwmmxt_movq_M0_wRn(rd0);
1694         gen_op_iwmmxt_andq_M0_wRn(rd1);
1695         gen_op_iwmmxt_setpsr_nz();
1696         gen_op_iwmmxt_movq_wRn_M0(wrd);
1697         gen_op_iwmmxt_set_mup();
1698         gen_op_iwmmxt_set_cup();
1699         break;
1700     case 0x810: case 0xa10:                             /* WMADD */
1701         wrd = (insn >> 12) & 0xf;
1702         rd0 = (insn >> 0) & 0xf;
1703         rd1 = (insn >> 16) & 0xf;
1704         gen_op_iwmmxt_movq_M0_wRn(rd0);
1705         if (insn & (1 << 21))
1706             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1707         else
1708             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1709         gen_op_iwmmxt_movq_wRn_M0(wrd);
1710         gen_op_iwmmxt_set_mup();
1711         break;
1712     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1713         wrd = (insn >> 12) & 0xf;
1714         rd0 = (insn >> 16) & 0xf;
1715         rd1 = (insn >> 0) & 0xf;
1716         gen_op_iwmmxt_movq_M0_wRn(rd0);
1717         switch ((insn >> 22) & 3) {
1718         case 0:
1719             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1720             break;
1721         case 1:
1722             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1723             break;
1724         case 2:
1725             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1726             break;
1727         case 3:
1728             return 1;
1729         }
1730         gen_op_iwmmxt_movq_wRn_M0(wrd);
1731         gen_op_iwmmxt_set_mup();
1732         gen_op_iwmmxt_set_cup();
1733         break;
1734     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1735         wrd = (insn >> 12) & 0xf;
1736         rd0 = (insn >> 16) & 0xf;
1737         rd1 = (insn >> 0) & 0xf;
1738         gen_op_iwmmxt_movq_M0_wRn(rd0);
1739         switch ((insn >> 22) & 3) {
1740         case 0:
1741             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1742             break;
1743         case 1:
1744             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1745             break;
1746         case 2:
1747             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1748             break;
1749         case 3:
1750             return 1;
1751         }
1752         gen_op_iwmmxt_movq_wRn_M0(wrd);
1753         gen_op_iwmmxt_set_mup();
1754         gen_op_iwmmxt_set_cup();
1755         break;
1756     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1757         wrd = (insn >> 12) & 0xf;
1758         rd0 = (insn >> 16) & 0xf;
1759         rd1 = (insn >> 0) & 0xf;
1760         gen_op_iwmmxt_movq_M0_wRn(rd0);
1761         if (insn & (1 << 22))
1762             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1763         else
1764             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1765         if (!(insn & (1 << 20)))
1766             gen_op_iwmmxt_addl_M0_wRn(wrd);
1767         gen_op_iwmmxt_movq_wRn_M0(wrd);
1768         gen_op_iwmmxt_set_mup();
1769         break;
1770     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1771         wrd = (insn >> 12) & 0xf;
1772         rd0 = (insn >> 16) & 0xf;
1773         rd1 = (insn >> 0) & 0xf;
1774         gen_op_iwmmxt_movq_M0_wRn(rd0);
1775         if (insn & (1 << 21)) {
1776             if (insn & (1 << 20))
1777                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1778             else
1779                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1780         } else {
1781             if (insn & (1 << 20))
1782                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1783             else
1784                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1785         }
1786         gen_op_iwmmxt_movq_wRn_M0(wrd);
1787         gen_op_iwmmxt_set_mup();
1788         break;
1789     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1790         wrd = (insn >> 12) & 0xf;
1791         rd0 = (insn >> 16) & 0xf;
1792         rd1 = (insn >> 0) & 0xf;
1793         gen_op_iwmmxt_movq_M0_wRn(rd0);
1794         if (insn & (1 << 21))
1795             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1796         else
1797             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1798         if (!(insn & (1 << 20))) {
1799             iwmmxt_load_reg(cpu_V1, wrd);
1800             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1801         }
1802         gen_op_iwmmxt_movq_wRn_M0(wrd);
1803         gen_op_iwmmxt_set_mup();
1804         break;
1805     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1806         wrd = (insn >> 12) & 0xf;
1807         rd0 = (insn >> 16) & 0xf;
1808         rd1 = (insn >> 0) & 0xf;
1809         gen_op_iwmmxt_movq_M0_wRn(rd0);
1810         switch ((insn >> 22) & 3) {
1811         case 0:
1812             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1813             break;
1814         case 1:
1815             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1816             break;
1817         case 2:
1818             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1819             break;
1820         case 3:
1821             return 1;
1822         }
1823         gen_op_iwmmxt_movq_wRn_M0(wrd);
1824         gen_op_iwmmxt_set_mup();
1825         gen_op_iwmmxt_set_cup();
1826         break;
1827     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1828         wrd = (insn >> 12) & 0xf;
1829         rd0 = (insn >> 16) & 0xf;
1830         rd1 = (insn >> 0) & 0xf;
1831         gen_op_iwmmxt_movq_M0_wRn(rd0);
1832         if (insn & (1 << 22)) {
1833             if (insn & (1 << 20))
1834                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1835             else
1836                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1837         } else {
1838             if (insn & (1 << 20))
1839                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1840             else
1841                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1842         }
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         gen_op_iwmmxt_set_cup();
1846         break;
1847     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1848         wrd = (insn >> 12) & 0xf;
1849         rd0 = (insn >> 16) & 0xf;
1850         rd1 = (insn >> 0) & 0xf;
1851         gen_op_iwmmxt_movq_M0_wRn(rd0);
1852         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1853         tcg_gen_andi_i32(tmp, tmp, 7);
1854         iwmmxt_load_reg(cpu_V1, rd1);
1855         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1856         tcg_temp_free_i32(tmp);
1857         gen_op_iwmmxt_movq_wRn_M0(wrd);
1858         gen_op_iwmmxt_set_mup();
1859         break;
1860     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1861         if (((insn >> 6) & 3) == 3)
1862             return 1;
1863         rd = (insn >> 12) & 0xf;
1864         wrd = (insn >> 16) & 0xf;
1865         tmp = load_reg(s, rd);
1866         gen_op_iwmmxt_movq_M0_wRn(wrd);
1867         switch ((insn >> 6) & 3) {
1868         case 0:
1869             tmp2 = tcg_constant_i32(0xff);
1870             tmp3 = tcg_constant_i32((insn & 7) << 3);
1871             break;
1872         case 1:
1873             tmp2 = tcg_constant_i32(0xffff);
1874             tmp3 = tcg_constant_i32((insn & 3) << 4);
1875             break;
1876         case 2:
1877             tmp2 = tcg_constant_i32(0xffffffff);
1878             tmp3 = tcg_constant_i32((insn & 1) << 5);
1879             break;
1880         default:
1881             g_assert_not_reached();
1882         }
1883         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1884         tcg_temp_free_i32(tmp);
1885         gen_op_iwmmxt_movq_wRn_M0(wrd);
1886         gen_op_iwmmxt_set_mup();
1887         break;
1888     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1889         rd = (insn >> 12) & 0xf;
1890         wrd = (insn >> 16) & 0xf;
1891         if (rd == 15 || ((insn >> 22) & 3) == 3)
1892             return 1;
1893         gen_op_iwmmxt_movq_M0_wRn(wrd);
1894         tmp = tcg_temp_new_i32();
1895         switch ((insn >> 22) & 3) {
1896         case 0:
1897             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1898             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1899             if (insn & 8) {
1900                 tcg_gen_ext8s_i32(tmp, tmp);
1901             } else {
1902                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1903             }
1904             break;
1905         case 1:
1906             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1907             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1908             if (insn & 8) {
1909                 tcg_gen_ext16s_i32(tmp, tmp);
1910             } else {
1911                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1912             }
1913             break;
1914         case 2:
1915             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1916             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1917             break;
1918         }
1919         store_reg(s, rd, tmp);
1920         break;
1921     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1922         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1923             return 1;
1924         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1925         switch ((insn >> 22) & 3) {
1926         case 0:
1927             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1928             break;
1929         case 1:
1930             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1931             break;
1932         case 2:
1933             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1934             break;
1935         }
1936         tcg_gen_shli_i32(tmp, tmp, 28);
1937         gen_set_nzcv(tmp);
1938         tcg_temp_free_i32(tmp);
1939         break;
1940     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1941         if (((insn >> 6) & 3) == 3)
1942             return 1;
1943         rd = (insn >> 12) & 0xf;
1944         wrd = (insn >> 16) & 0xf;
1945         tmp = load_reg(s, rd);
1946         switch ((insn >> 6) & 3) {
1947         case 0:
1948             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1949             break;
1950         case 1:
1951             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1952             break;
1953         case 2:
1954             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1955             break;
1956         }
1957         tcg_temp_free_i32(tmp);
1958         gen_op_iwmmxt_movq_wRn_M0(wrd);
1959         gen_op_iwmmxt_set_mup();
1960         break;
1961     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1962         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1963             return 1;
1964         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1965         tmp2 = tcg_temp_new_i32();
1966         tcg_gen_mov_i32(tmp2, tmp);
1967         switch ((insn >> 22) & 3) {
1968         case 0:
1969             for (i = 0; i < 7; i ++) {
1970                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1971                 tcg_gen_and_i32(tmp, tmp, tmp2);
1972             }
1973             break;
1974         case 1:
1975             for (i = 0; i < 3; i ++) {
1976                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1977                 tcg_gen_and_i32(tmp, tmp, tmp2);
1978             }
1979             break;
1980         case 2:
1981             tcg_gen_shli_i32(tmp2, tmp2, 16);
1982             tcg_gen_and_i32(tmp, tmp, tmp2);
1983             break;
1984         }
1985         gen_set_nzcv(tmp);
1986         tcg_temp_free_i32(tmp2);
1987         tcg_temp_free_i32(tmp);
1988         break;
1989     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1990         wrd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         gen_op_iwmmxt_movq_M0_wRn(rd0);
1993         switch ((insn >> 22) & 3) {
1994         case 0:
1995             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1996             break;
1997         case 1:
1998             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1999             break;
2000         case 2:
2001             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2002             break;
2003         case 3:
2004             return 1;
2005         }
2006         gen_op_iwmmxt_movq_wRn_M0(wrd);
2007         gen_op_iwmmxt_set_mup();
2008         break;
2009     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2010         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2011             return 1;
2012         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2013         tmp2 = tcg_temp_new_i32();
2014         tcg_gen_mov_i32(tmp2, tmp);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             for (i = 0; i < 7; i ++) {
2018                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2019                 tcg_gen_or_i32(tmp, tmp, tmp2);
2020             }
2021             break;
2022         case 1:
2023             for (i = 0; i < 3; i ++) {
2024                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2025                 tcg_gen_or_i32(tmp, tmp, tmp2);
2026             }
2027             break;
2028         case 2:
2029             tcg_gen_shli_i32(tmp2, tmp2, 16);
2030             tcg_gen_or_i32(tmp, tmp, tmp2);
2031             break;
2032         }
2033         gen_set_nzcv(tmp);
2034         tcg_temp_free_i32(tmp2);
2035         tcg_temp_free_i32(tmp);
2036         break;
2037     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2038         rd = (insn >> 12) & 0xf;
2039         rd0 = (insn >> 16) & 0xf;
2040         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2041             return 1;
2042         gen_op_iwmmxt_movq_M0_wRn(rd0);
2043         tmp = tcg_temp_new_i32();
2044         switch ((insn >> 22) & 3) {
2045         case 0:
2046             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2047             break;
2048         case 1:
2049             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2050             break;
2051         case 2:
2052             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2053             break;
2054         }
2055         store_reg(s, rd, tmp);
2056         break;
2057     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2058     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2059         wrd = (insn >> 12) & 0xf;
2060         rd0 = (insn >> 16) & 0xf;
2061         rd1 = (insn >> 0) & 0xf;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         switch ((insn >> 22) & 3) {
2064         case 0:
2065             if (insn & (1 << 21))
2066                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2067             else
2068                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2069             break;
2070         case 1:
2071             if (insn & (1 << 21))
2072                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2073             else
2074                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2075             break;
2076         case 2:
2077             if (insn & (1 << 21))
2078                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2079             else
2080                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2081             break;
2082         case 3:
2083             return 1;
2084         }
2085         gen_op_iwmmxt_movq_wRn_M0(wrd);
2086         gen_op_iwmmxt_set_mup();
2087         gen_op_iwmmxt_set_cup();
2088         break;
2089     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2090     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2091         wrd = (insn >> 12) & 0xf;
2092         rd0 = (insn >> 16) & 0xf;
2093         gen_op_iwmmxt_movq_M0_wRn(rd0);
2094         switch ((insn >> 22) & 3) {
2095         case 0:
2096             if (insn & (1 << 21))
2097                 gen_op_iwmmxt_unpacklsb_M0();
2098             else
2099                 gen_op_iwmmxt_unpacklub_M0();
2100             break;
2101         case 1:
2102             if (insn & (1 << 21))
2103                 gen_op_iwmmxt_unpacklsw_M0();
2104             else
2105                 gen_op_iwmmxt_unpackluw_M0();
2106             break;
2107         case 2:
2108             if (insn & (1 << 21))
2109                 gen_op_iwmmxt_unpacklsl_M0();
2110             else
2111                 gen_op_iwmmxt_unpacklul_M0();
2112             break;
2113         case 3:
2114             return 1;
2115         }
2116         gen_op_iwmmxt_movq_wRn_M0(wrd);
2117         gen_op_iwmmxt_set_mup();
2118         gen_op_iwmmxt_set_cup();
2119         break;
2120     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2121     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2122         wrd = (insn >> 12) & 0xf;
2123         rd0 = (insn >> 16) & 0xf;
2124         gen_op_iwmmxt_movq_M0_wRn(rd0);
2125         switch ((insn >> 22) & 3) {
2126         case 0:
2127             if (insn & (1 << 21))
2128                 gen_op_iwmmxt_unpackhsb_M0();
2129             else
2130                 gen_op_iwmmxt_unpackhub_M0();
2131             break;
2132         case 1:
2133             if (insn & (1 << 21))
2134                 gen_op_iwmmxt_unpackhsw_M0();
2135             else
2136                 gen_op_iwmmxt_unpackhuw_M0();
2137             break;
2138         case 2:
2139             if (insn & (1 << 21))
2140                 gen_op_iwmmxt_unpackhsl_M0();
2141             else
2142                 gen_op_iwmmxt_unpackhul_M0();
2143             break;
2144         case 3:
2145             return 1;
2146         }
2147         gen_op_iwmmxt_movq_wRn_M0(wrd);
2148         gen_op_iwmmxt_set_mup();
2149         gen_op_iwmmxt_set_cup();
2150         break;
2151     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2152     case 0x214: case 0x614: case 0xa14: case 0xe14:
2153         if (((insn >> 22) & 3) == 0)
2154             return 1;
2155         wrd = (insn >> 12) & 0xf;
2156         rd0 = (insn >> 16) & 0xf;
2157         gen_op_iwmmxt_movq_M0_wRn(rd0);
2158         tmp = tcg_temp_new_i32();
2159         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2160             tcg_temp_free_i32(tmp);
2161             return 1;
2162         }
2163         switch ((insn >> 22) & 3) {
2164         case 1:
2165             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2166             break;
2167         case 2:
2168             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2169             break;
2170         case 3:
2171             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2172             break;
2173         }
2174         tcg_temp_free_i32(tmp);
2175         gen_op_iwmmxt_movq_wRn_M0(wrd);
2176         gen_op_iwmmxt_set_mup();
2177         gen_op_iwmmxt_set_cup();
2178         break;
2179     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2180     case 0x014: case 0x414: case 0x814: case 0xc14:
2181         if (((insn >> 22) & 3) == 0)
2182             return 1;
2183         wrd = (insn >> 12) & 0xf;
2184         rd0 = (insn >> 16) & 0xf;
2185         gen_op_iwmmxt_movq_M0_wRn(rd0);
2186         tmp = tcg_temp_new_i32();
2187         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2188             tcg_temp_free_i32(tmp);
2189             return 1;
2190         }
2191         switch ((insn >> 22) & 3) {
2192         case 1:
2193             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2194             break;
2195         case 2:
2196             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2197             break;
2198         case 3:
2199             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2200             break;
2201         }
2202         tcg_temp_free_i32(tmp);
2203         gen_op_iwmmxt_movq_wRn_M0(wrd);
2204         gen_op_iwmmxt_set_mup();
2205         gen_op_iwmmxt_set_cup();
2206         break;
2207     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2208     case 0x114: case 0x514: case 0x914: case 0xd14:
2209         if (((insn >> 22) & 3) == 0)
2210             return 1;
2211         wrd = (insn >> 12) & 0xf;
2212         rd0 = (insn >> 16) & 0xf;
2213         gen_op_iwmmxt_movq_M0_wRn(rd0);
2214         tmp = tcg_temp_new_i32();
2215         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2216             tcg_temp_free_i32(tmp);
2217             return 1;
2218         }
2219         switch ((insn >> 22) & 3) {
2220         case 1:
2221             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2222             break;
2223         case 2:
2224             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2225             break;
2226         case 3:
2227             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2228             break;
2229         }
2230         tcg_temp_free_i32(tmp);
2231         gen_op_iwmmxt_movq_wRn_M0(wrd);
2232         gen_op_iwmmxt_set_mup();
2233         gen_op_iwmmxt_set_cup();
2234         break;
2235     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2236     case 0x314: case 0x714: case 0xb14: case 0xf14:
2237         if (((insn >> 22) & 3) == 0)
2238             return 1;
2239         wrd = (insn >> 12) & 0xf;
2240         rd0 = (insn >> 16) & 0xf;
2241         gen_op_iwmmxt_movq_M0_wRn(rd0);
2242         tmp = tcg_temp_new_i32();
2243         switch ((insn >> 22) & 3) {
2244         case 1:
2245             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2246                 tcg_temp_free_i32(tmp);
2247                 return 1;
2248             }
2249             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2250             break;
2251         case 2:
2252             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2253                 tcg_temp_free_i32(tmp);
2254                 return 1;
2255             }
2256             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2257             break;
2258         case 3:
2259             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2260                 tcg_temp_free_i32(tmp);
2261                 return 1;
2262             }
2263             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2264             break;
2265         }
2266         tcg_temp_free_i32(tmp);
2267         gen_op_iwmmxt_movq_wRn_M0(wrd);
2268         gen_op_iwmmxt_set_mup();
2269         gen_op_iwmmxt_set_cup();
2270         break;
2271     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2272     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2273         wrd = (insn >> 12) & 0xf;
2274         rd0 = (insn >> 16) & 0xf;
2275         rd1 = (insn >> 0) & 0xf;
2276         gen_op_iwmmxt_movq_M0_wRn(rd0);
2277         switch ((insn >> 22) & 3) {
2278         case 0:
2279             if (insn & (1 << 21))
2280                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2281             else
2282                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2283             break;
2284         case 1:
2285             if (insn & (1 << 21))
2286                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2287             else
2288                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2289             break;
2290         case 2:
2291             if (insn & (1 << 21))
2292                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2293             else
2294                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2295             break;
2296         case 3:
2297             return 1;
2298         }
2299         gen_op_iwmmxt_movq_wRn_M0(wrd);
2300         gen_op_iwmmxt_set_mup();
2301         break;
2302     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2303     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2304         wrd = (insn >> 12) & 0xf;
2305         rd0 = (insn >> 16) & 0xf;
2306         rd1 = (insn >> 0) & 0xf;
2307         gen_op_iwmmxt_movq_M0_wRn(rd0);
2308         switch ((insn >> 22) & 3) {
2309         case 0:
2310             if (insn & (1 << 21))
2311                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2312             else
2313                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2314             break;
2315         case 1:
2316             if (insn & (1 << 21))
2317                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2318             else
2319                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2320             break;
2321         case 2:
2322             if (insn & (1 << 21))
2323                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2324             else
2325                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2326             break;
2327         case 3:
2328             return 1;
2329         }
2330         gen_op_iwmmxt_movq_wRn_M0(wrd);
2331         gen_op_iwmmxt_set_mup();
2332         break;
2333     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2334     case 0x402: case 0x502: case 0x602: case 0x702:
2335         wrd = (insn >> 12) & 0xf;
2336         rd0 = (insn >> 16) & 0xf;
2337         rd1 = (insn >> 0) & 0xf;
2338         gen_op_iwmmxt_movq_M0_wRn(rd0);
2339         iwmmxt_load_reg(cpu_V1, rd1);
2340         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2341                                 tcg_constant_i32((insn >> 20) & 3));
2342         gen_op_iwmmxt_movq_wRn_M0(wrd);
2343         gen_op_iwmmxt_set_mup();
2344         break;
2345     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2346     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2347     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2348     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2349         wrd = (insn >> 12) & 0xf;
2350         rd0 = (insn >> 16) & 0xf;
2351         rd1 = (insn >> 0) & 0xf;
2352         gen_op_iwmmxt_movq_M0_wRn(rd0);
2353         switch ((insn >> 20) & 0xf) {
2354         case 0x0:
2355             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2356             break;
2357         case 0x1:
2358             gen_op_iwmmxt_subub_M0_wRn(rd1);
2359             break;
2360         case 0x3:
2361             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2362             break;
2363         case 0x4:
2364             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2365             break;
2366         case 0x5:
2367             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2368             break;
2369         case 0x7:
2370             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2371             break;
2372         case 0x8:
2373             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2374             break;
2375         case 0x9:
2376             gen_op_iwmmxt_subul_M0_wRn(rd1);
2377             break;
2378         case 0xb:
2379             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2380             break;
2381         default:
2382             return 1;
2383         }
2384         gen_op_iwmmxt_movq_wRn_M0(wrd);
2385         gen_op_iwmmxt_set_mup();
2386         gen_op_iwmmxt_set_cup();
2387         break;
2388     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2389     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2390     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2391     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         gen_op_iwmmxt_movq_M0_wRn(rd0);
2395         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2396         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2397         gen_op_iwmmxt_movq_wRn_M0(wrd);
2398         gen_op_iwmmxt_set_mup();
2399         gen_op_iwmmxt_set_cup();
2400         break;
2401     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2402     case 0x418: case 0x518: case 0x618: case 0x718:
2403     case 0x818: case 0x918: case 0xa18: case 0xb18:
2404     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2405         wrd = (insn >> 12) & 0xf;
2406         rd0 = (insn >> 16) & 0xf;
2407         rd1 = (insn >> 0) & 0xf;
2408         gen_op_iwmmxt_movq_M0_wRn(rd0);
2409         switch ((insn >> 20) & 0xf) {
2410         case 0x0:
2411             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2412             break;
2413         case 0x1:
2414             gen_op_iwmmxt_addub_M0_wRn(rd1);
2415             break;
2416         case 0x3:
2417             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2418             break;
2419         case 0x4:
2420             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2421             break;
2422         case 0x5:
2423             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2424             break;
2425         case 0x7:
2426             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2427             break;
2428         case 0x8:
2429             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2430             break;
2431         case 0x9:
2432             gen_op_iwmmxt_addul_M0_wRn(rd1);
2433             break;
2434         case 0xb:
2435             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2436             break;
2437         default:
2438             return 1;
2439         }
2440         gen_op_iwmmxt_movq_wRn_M0(wrd);
2441         gen_op_iwmmxt_set_mup();
2442         gen_op_iwmmxt_set_cup();
2443         break;
2444     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2445     case 0x408: case 0x508: case 0x608: case 0x708:
2446     case 0x808: case 0x908: case 0xa08: case 0xb08:
2447     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2448         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2449             return 1;
2450         wrd = (insn >> 12) & 0xf;
2451         rd0 = (insn >> 16) & 0xf;
2452         rd1 = (insn >> 0) & 0xf;
2453         gen_op_iwmmxt_movq_M0_wRn(rd0);
2454         switch ((insn >> 22) & 3) {
2455         case 1:
2456             if (insn & (1 << 21))
2457                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2458             else
2459                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2460             break;
2461         case 2:
2462             if (insn & (1 << 21))
2463                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2464             else
2465                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2466             break;
2467         case 3:
2468             if (insn & (1 << 21))
2469                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2470             else
2471                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2472             break;
2473         }
2474         gen_op_iwmmxt_movq_wRn_M0(wrd);
2475         gen_op_iwmmxt_set_mup();
2476         gen_op_iwmmxt_set_cup();
2477         break;
2478     case 0x201: case 0x203: case 0x205: case 0x207:
2479     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2480     case 0x211: case 0x213: case 0x215: case 0x217:
2481     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2482         wrd = (insn >> 5) & 0xf;
2483         rd0 = (insn >> 12) & 0xf;
2484         rd1 = (insn >> 0) & 0xf;
2485         if (rd0 == 0xf || rd1 == 0xf)
2486             return 1;
2487         gen_op_iwmmxt_movq_M0_wRn(wrd);
2488         tmp = load_reg(s, rd0);
2489         tmp2 = load_reg(s, rd1);
2490         switch ((insn >> 16) & 0xf) {
2491         case 0x0:                                       /* TMIA */
2492             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         case 0x8:                                       /* TMIAPH */
2495             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2496             break;
2497         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2498             if (insn & (1 << 16))
2499                 tcg_gen_shri_i32(tmp, tmp, 16);
2500             if (insn & (1 << 17))
2501                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2502             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2503             break;
2504         default:
2505             tcg_temp_free_i32(tmp2);
2506             tcg_temp_free_i32(tmp);
2507             return 1;
2508         }
2509         tcg_temp_free_i32(tmp2);
2510         tcg_temp_free_i32(tmp);
2511         gen_op_iwmmxt_movq_wRn_M0(wrd);
2512         gen_op_iwmmxt_set_mup();
2513         break;
2514     default:
2515         return 1;
2516     }
2517 
2518     return 0;
2519 }
2520 
2521 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2522    (ie. an undefined instruction).  */
2523 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2524 {
2525     int acc, rd0, rd1, rdhi, rdlo;
2526     TCGv_i32 tmp, tmp2;
2527 
2528     if ((insn & 0x0ff00f10) == 0x0e200010) {
2529         /* Multiply with Internal Accumulate Format */
2530         rd0 = (insn >> 12) & 0xf;
2531         rd1 = insn & 0xf;
2532         acc = (insn >> 5) & 7;
2533 
2534         if (acc != 0)
2535             return 1;
2536 
2537         tmp = load_reg(s, rd0);
2538         tmp2 = load_reg(s, rd1);
2539         switch ((insn >> 16) & 0xf) {
2540         case 0x0:                                       /* MIA */
2541             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2542             break;
2543         case 0x8:                                       /* MIAPH */
2544             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2545             break;
2546         case 0xc:                                       /* MIABB */
2547         case 0xd:                                       /* MIABT */
2548         case 0xe:                                       /* MIATB */
2549         case 0xf:                                       /* MIATT */
2550             if (insn & (1 << 16))
2551                 tcg_gen_shri_i32(tmp, tmp, 16);
2552             if (insn & (1 << 17))
2553                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2554             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2555             break;
2556         default:
2557             return 1;
2558         }
2559         tcg_temp_free_i32(tmp2);
2560         tcg_temp_free_i32(tmp);
2561 
2562         gen_op_iwmmxt_movq_wRn_M0(acc);
2563         return 0;
2564     }
2565 
2566     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2567         /* Internal Accumulator Access Format */
2568         rdhi = (insn >> 16) & 0xf;
2569         rdlo = (insn >> 12) & 0xf;
2570         acc = insn & 7;
2571 
2572         if (acc != 0)
2573             return 1;
2574 
2575         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2576             iwmmxt_load_reg(cpu_V0, acc);
2577             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2578             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2579             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2580         } else {                                        /* MAR */
2581             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2582             iwmmxt_store_reg(cpu_V0, acc);
2583         }
2584         return 0;
2585     }
2586 
2587     return 1;
2588 }
2589 
2590 static void gen_goto_ptr(void)
2591 {
2592     tcg_gen_lookup_and_goto_ptr();
2593 }
2594 
2595 /* This will end the TB but doesn't guarantee we'll return to
2596  * cpu_loop_exec. Any live exit_requests will be processed as we
2597  * enter the next TB.
2598  */
2599 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2600 {
2601     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2602         /*
2603          * For pcrel, the pc must always be up-to-date on entry to
2604          * the linked TB, so that it can use simple additions for all
2605          * further adjustments.  For !pcrel, the linked TB is compiled
2606          * to know its full virtual address, so we can delay the
2607          * update to pc to the unlinked path.  A long chain of links
2608          * can thus avoid many updates to the PC.
2609          */
2610         if (tb_cflags(s->base.tb) & CF_PCREL) {
2611             gen_update_pc(s, diff);
2612             tcg_gen_goto_tb(n);
2613         } else {
2614             tcg_gen_goto_tb(n);
2615             gen_update_pc(s, diff);
2616         }
2617         tcg_gen_exit_tb(s->base.tb, n);
2618     } else {
2619         gen_update_pc(s, diff);
2620         gen_goto_ptr();
2621     }
2622     s->base.is_jmp = DISAS_NORETURN;
2623 }
2624 
2625 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2626 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2627 {
2628     if (unlikely(s->ss_active)) {
2629         /* An indirect jump so that we still trigger the debug exception.  */
2630         gen_update_pc(s, diff);
2631         s->base.is_jmp = DISAS_JUMP;
2632         return;
2633     }
2634     switch (s->base.is_jmp) {
2635     case DISAS_NEXT:
2636     case DISAS_TOO_MANY:
2637     case DISAS_NORETURN:
2638         /*
2639          * The normal case: just go to the destination TB.
2640          * NB: NORETURN happens if we generate code like
2641          *    gen_brcondi(l);
2642          *    gen_jmp();
2643          *    gen_set_label(l);
2644          *    gen_jmp();
2645          * on the second call to gen_jmp().
2646          */
2647         gen_goto_tb(s, tbno, diff);
2648         break;
2649     case DISAS_UPDATE_NOCHAIN:
2650     case DISAS_UPDATE_EXIT:
2651         /*
2652          * We already decided we're leaving the TB for some other reason.
2653          * Avoid using goto_tb so we really do exit back to the main loop
2654          * and don't chain to another TB.
2655          */
2656         gen_update_pc(s, diff);
2657         gen_goto_ptr();
2658         s->base.is_jmp = DISAS_NORETURN;
2659         break;
2660     default:
2661         /*
2662          * We shouldn't be emitting code for a jump and also have
2663          * is_jmp set to one of the special cases like DISAS_SWI.
2664          */
2665         g_assert_not_reached();
2666     }
2667 }
2668 
2669 static inline void gen_jmp(DisasContext *s, target_long diff)
2670 {
2671     gen_jmp_tb(s, diff, 0);
2672 }
2673 
2674 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2675 {
2676     if (x)
2677         tcg_gen_sari_i32(t0, t0, 16);
2678     else
2679         gen_sxth(t0);
2680     if (y)
2681         tcg_gen_sari_i32(t1, t1, 16);
2682     else
2683         gen_sxth(t1);
2684     tcg_gen_mul_i32(t0, t0, t1);
2685 }
2686 
2687 /* Return the mask of PSR bits set by a MSR instruction.  */
2688 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2689 {
2690     uint32_t mask = 0;
2691 
2692     if (flags & (1 << 0)) {
2693         mask |= 0xff;
2694     }
2695     if (flags & (1 << 1)) {
2696         mask |= 0xff00;
2697     }
2698     if (flags & (1 << 2)) {
2699         mask |= 0xff0000;
2700     }
2701     if (flags & (1 << 3)) {
2702         mask |= 0xff000000;
2703     }
2704 
2705     /* Mask out undefined and reserved bits.  */
2706     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2707 
2708     /* Mask out execution state.  */
2709     if (!spsr) {
2710         mask &= ~CPSR_EXEC;
2711     }
2712 
2713     /* Mask out privileged bits.  */
2714     if (IS_USER(s)) {
2715         mask &= CPSR_USER;
2716     }
2717     return mask;
2718 }
2719 
2720 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2721 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2722 {
2723     TCGv_i32 tmp;
2724     if (spsr) {
2725         /* ??? This is also undefined in system mode.  */
2726         if (IS_USER(s))
2727             return 1;
2728 
2729         tmp = load_cpu_field(spsr);
2730         tcg_gen_andi_i32(tmp, tmp, ~mask);
2731         tcg_gen_andi_i32(t0, t0, mask);
2732         tcg_gen_or_i32(tmp, tmp, t0);
2733         store_cpu_field(tmp, spsr);
2734     } else {
2735         gen_set_cpsr(t0, mask);
2736     }
2737     tcg_temp_free_i32(t0);
2738     gen_lookup_tb(s);
2739     return 0;
2740 }
2741 
2742 /* Returns nonzero if access to the PSR is not permitted.  */
2743 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2744 {
2745     TCGv_i32 tmp;
2746     tmp = tcg_temp_new_i32();
2747     tcg_gen_movi_i32(tmp, val);
2748     return gen_set_psr(s, mask, spsr, tmp);
2749 }
2750 
2751 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2752                                      int *tgtmode, int *regno)
2753 {
2754     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2755      * the target mode and register number, and identify the various
2756      * unpredictable cases.
2757      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2758      *  + executed in user mode
2759      *  + using R15 as the src/dest register
2760      *  + accessing an unimplemented register
2761      *  + accessing a register that's inaccessible at current PL/security state*
2762      *  + accessing a register that you could access with a different insn
2763      * We choose to UNDEF in all these cases.
2764      * Since we don't know which of the various AArch32 modes we are in
2765      * we have to defer some checks to runtime.
2766      * Accesses to Monitor mode registers from Secure EL1 (which implies
2767      * that EL3 is AArch64) must trap to EL3.
2768      *
2769      * If the access checks fail this function will emit code to take
2770      * an exception and return false. Otherwise it will return true,
2771      * and set *tgtmode and *regno appropriately.
2772      */
2773     /* These instructions are present only in ARMv8, or in ARMv7 with the
2774      * Virtualization Extensions.
2775      */
2776     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2777         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2778         goto undef;
2779     }
2780 
2781     if (IS_USER(s) || rn == 15) {
2782         goto undef;
2783     }
2784 
2785     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2786      * of registers into (r, sysm).
2787      */
2788     if (r) {
2789         /* SPSRs for other modes */
2790         switch (sysm) {
2791         case 0xe: /* SPSR_fiq */
2792             *tgtmode = ARM_CPU_MODE_FIQ;
2793             break;
2794         case 0x10: /* SPSR_irq */
2795             *tgtmode = ARM_CPU_MODE_IRQ;
2796             break;
2797         case 0x12: /* SPSR_svc */
2798             *tgtmode = ARM_CPU_MODE_SVC;
2799             break;
2800         case 0x14: /* SPSR_abt */
2801             *tgtmode = ARM_CPU_MODE_ABT;
2802             break;
2803         case 0x16: /* SPSR_und */
2804             *tgtmode = ARM_CPU_MODE_UND;
2805             break;
2806         case 0x1c: /* SPSR_mon */
2807             *tgtmode = ARM_CPU_MODE_MON;
2808             break;
2809         case 0x1e: /* SPSR_hyp */
2810             *tgtmode = ARM_CPU_MODE_HYP;
2811             break;
2812         default: /* unallocated */
2813             goto undef;
2814         }
2815         /* We arbitrarily assign SPSR a register number of 16. */
2816         *regno = 16;
2817     } else {
2818         /* general purpose registers for other modes */
2819         switch (sysm) {
2820         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2821             *tgtmode = ARM_CPU_MODE_USR;
2822             *regno = sysm + 8;
2823             break;
2824         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2825             *tgtmode = ARM_CPU_MODE_FIQ;
2826             *regno = sysm;
2827             break;
2828         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2829             *tgtmode = ARM_CPU_MODE_IRQ;
2830             *regno = sysm & 1 ? 13 : 14;
2831             break;
2832         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2833             *tgtmode = ARM_CPU_MODE_SVC;
2834             *regno = sysm & 1 ? 13 : 14;
2835             break;
2836         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2837             *tgtmode = ARM_CPU_MODE_ABT;
2838             *regno = sysm & 1 ? 13 : 14;
2839             break;
2840         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2841             *tgtmode = ARM_CPU_MODE_UND;
2842             *regno = sysm & 1 ? 13 : 14;
2843             break;
2844         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2845             *tgtmode = ARM_CPU_MODE_MON;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2849             *tgtmode = ARM_CPU_MODE_HYP;
2850             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2851             *regno = sysm & 1 ? 13 : 17;
2852             break;
2853         default: /* unallocated */
2854             goto undef;
2855         }
2856     }
2857 
2858     /* Catch the 'accessing inaccessible register' cases we can detect
2859      * at translate time.
2860      */
2861     switch (*tgtmode) {
2862     case ARM_CPU_MODE_MON:
2863         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2864             goto undef;
2865         }
2866         if (s->current_el == 1) {
2867             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2868              * then accesses to Mon registers trap to Secure EL2, if it exists,
2869              * otherwise EL3.
2870              */
2871             TCGv_i32 tcg_el;
2872 
2873             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2874                 dc_isar_feature(aa64_sel2, s)) {
2875                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2876                 tcg_el = load_cpu_field(cp15.scr_el3);
2877                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2878                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2879             } else {
2880                 tcg_el = tcg_constant_i32(3);
2881             }
2882 
2883             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2884                                     syn_uncategorized(), tcg_el);
2885             tcg_temp_free_i32(tcg_el);
2886             return false;
2887         }
2888         break;
2889     case ARM_CPU_MODE_HYP:
2890         /*
2891          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2892          * (and so we can forbid accesses from EL2 or below). elr_hyp
2893          * can be accessed also from Hyp mode, so forbid accesses from
2894          * EL0 or EL1.
2895          */
2896         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2897             (s->current_el < 3 && *regno != 17)) {
2898             goto undef;
2899         }
2900         break;
2901     default:
2902         break;
2903     }
2904 
2905     return true;
2906 
2907 undef:
2908     /* If we get here then some access check did not pass */
2909     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2910     return false;
2911 }
2912 
2913 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2914 {
2915     TCGv_i32 tcg_reg;
2916     int tgtmode = 0, regno = 0;
2917 
2918     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2919         return;
2920     }
2921 
2922     /* Sync state because msr_banked() can raise exceptions */
2923     gen_set_condexec(s);
2924     gen_update_pc(s, 0);
2925     tcg_reg = load_reg(s, rn);
2926     gen_helper_msr_banked(cpu_env, tcg_reg,
2927                           tcg_constant_i32(tgtmode),
2928                           tcg_constant_i32(regno));
2929     tcg_temp_free_i32(tcg_reg);
2930     s->base.is_jmp = DISAS_UPDATE_EXIT;
2931 }
2932 
2933 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2934 {
2935     TCGv_i32 tcg_reg;
2936     int tgtmode = 0, regno = 0;
2937 
2938     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2939         return;
2940     }
2941 
2942     /* Sync state because mrs_banked() can raise exceptions */
2943     gen_set_condexec(s);
2944     gen_update_pc(s, 0);
2945     tcg_reg = tcg_temp_new_i32();
2946     gen_helper_mrs_banked(tcg_reg, cpu_env,
2947                           tcg_constant_i32(tgtmode),
2948                           tcg_constant_i32(regno));
2949     store_reg(s, rn, tcg_reg);
2950     s->base.is_jmp = DISAS_UPDATE_EXIT;
2951 }
2952 
2953 /* Store value to PC as for an exception return (ie don't
2954  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2955  * will do the masking based on the new value of the Thumb bit.
2956  */
2957 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2958 {
2959     tcg_gen_mov_i32(cpu_R[15], pc);
2960     tcg_temp_free_i32(pc);
2961 }
2962 
2963 /* Generate a v6 exception return.  Marks both values as dead.  */
2964 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2965 {
2966     store_pc_exc_ret(s, pc);
2967     /* The cpsr_write_eret helper will mask the low bits of PC
2968      * appropriately depending on the new Thumb bit, so it must
2969      * be called after storing the new PC.
2970      */
2971     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2972         gen_io_start();
2973     }
2974     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2975     tcg_temp_free_i32(cpsr);
2976     /* Must exit loop to check un-masked IRQs */
2977     s->base.is_jmp = DISAS_EXIT;
2978 }
2979 
2980 /* Generate an old-style exception return. Marks pc as dead. */
2981 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2982 {
2983     gen_rfe(s, pc, load_cpu_field(spsr));
2984 }
2985 
2986 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2987                             uint32_t opr_sz, uint32_t max_sz,
2988                             gen_helper_gvec_3_ptr *fn)
2989 {
2990     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2991 
2992     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2993     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2994                        opr_sz, max_sz, 0, fn);
2995     tcg_temp_free_ptr(qc_ptr);
2996 }
2997 
2998 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2999                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3000 {
3001     static gen_helper_gvec_3_ptr * const fns[2] = {
3002         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3003     };
3004     tcg_debug_assert(vece >= 1 && vece <= 2);
3005     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3006 }
3007 
3008 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3009                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3010 {
3011     static gen_helper_gvec_3_ptr * const fns[2] = {
3012         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3013     };
3014     tcg_debug_assert(vece >= 1 && vece <= 2);
3015     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3016 }
3017 
3018 #define GEN_CMP0(NAME, COND)                                            \
3019     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3020     {                                                                   \
3021         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3022         tcg_gen_neg_i32(d, d);                                          \
3023     }                                                                   \
3024     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3025     {                                                                   \
3026         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3027         tcg_gen_neg_i64(d, d);                                          \
3028     }                                                                   \
3029     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3030     {                                                                   \
3031         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
3032         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3033     }                                                                   \
3034     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3035                             uint32_t opr_sz, uint32_t max_sz)           \
3036     {                                                                   \
3037         const GVecGen2 op[4] = {                                        \
3038             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3039               .fniv = gen_##NAME##0_vec,                                \
3040               .opt_opc = vecop_list_cmp,                                \
3041               .vece = MO_8 },                                           \
3042             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3043               .fniv = gen_##NAME##0_vec,                                \
3044               .opt_opc = vecop_list_cmp,                                \
3045               .vece = MO_16 },                                          \
3046             { .fni4 = gen_##NAME##0_i32,                                \
3047               .fniv = gen_##NAME##0_vec,                                \
3048               .opt_opc = vecop_list_cmp,                                \
3049               .vece = MO_32 },                                          \
3050             { .fni8 = gen_##NAME##0_i64,                                \
3051               .fniv = gen_##NAME##0_vec,                                \
3052               .opt_opc = vecop_list_cmp,                                \
3053               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3054               .vece = MO_64 },                                          \
3055         };                                                              \
3056         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3057     }
3058 
3059 static const TCGOpcode vecop_list_cmp[] = {
3060     INDEX_op_cmp_vec, 0
3061 };
3062 
3063 GEN_CMP0(ceq, TCG_COND_EQ)
3064 GEN_CMP0(cle, TCG_COND_LE)
3065 GEN_CMP0(cge, TCG_COND_GE)
3066 GEN_CMP0(clt, TCG_COND_LT)
3067 GEN_CMP0(cgt, TCG_COND_GT)
3068 
3069 #undef GEN_CMP0
3070 
3071 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3072 {
3073     tcg_gen_vec_sar8i_i64(a, a, shift);
3074     tcg_gen_vec_add8_i64(d, d, a);
3075 }
3076 
3077 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3078 {
3079     tcg_gen_vec_sar16i_i64(a, a, shift);
3080     tcg_gen_vec_add16_i64(d, d, a);
3081 }
3082 
3083 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3084 {
3085     tcg_gen_sari_i32(a, a, shift);
3086     tcg_gen_add_i32(d, d, a);
3087 }
3088 
3089 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3090 {
3091     tcg_gen_sari_i64(a, a, shift);
3092     tcg_gen_add_i64(d, d, a);
3093 }
3094 
3095 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3096 {
3097     tcg_gen_sari_vec(vece, a, a, sh);
3098     tcg_gen_add_vec(vece, d, d, a);
3099 }
3100 
3101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3102                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3103 {
3104     static const TCGOpcode vecop_list[] = {
3105         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3106     };
3107     static const GVecGen2i ops[4] = {
3108         { .fni8 = gen_ssra8_i64,
3109           .fniv = gen_ssra_vec,
3110           .fno = gen_helper_gvec_ssra_b,
3111           .load_dest = true,
3112           .opt_opc = vecop_list,
3113           .vece = MO_8 },
3114         { .fni8 = gen_ssra16_i64,
3115           .fniv = gen_ssra_vec,
3116           .fno = gen_helper_gvec_ssra_h,
3117           .load_dest = true,
3118           .opt_opc = vecop_list,
3119           .vece = MO_16 },
3120         { .fni4 = gen_ssra32_i32,
3121           .fniv = gen_ssra_vec,
3122           .fno = gen_helper_gvec_ssra_s,
3123           .load_dest = true,
3124           .opt_opc = vecop_list,
3125           .vece = MO_32 },
3126         { .fni8 = gen_ssra64_i64,
3127           .fniv = gen_ssra_vec,
3128           .fno = gen_helper_gvec_ssra_b,
3129           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3130           .opt_opc = vecop_list,
3131           .load_dest = true,
3132           .vece = MO_64 },
3133     };
3134 
3135     /* tszimm encoding produces immediates in the range [1..esize]. */
3136     tcg_debug_assert(shift > 0);
3137     tcg_debug_assert(shift <= (8 << vece));
3138 
3139     /*
3140      * Shifts larger than the element size are architecturally valid.
3141      * Signed results in all sign bits.
3142      */
3143     shift = MIN(shift, (8 << vece) - 1);
3144     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3145 }
3146 
3147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3148 {
3149     tcg_gen_vec_shr8i_i64(a, a, shift);
3150     tcg_gen_vec_add8_i64(d, d, a);
3151 }
3152 
3153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3154 {
3155     tcg_gen_vec_shr16i_i64(a, a, shift);
3156     tcg_gen_vec_add16_i64(d, d, a);
3157 }
3158 
3159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3160 {
3161     tcg_gen_shri_i32(a, a, shift);
3162     tcg_gen_add_i32(d, d, a);
3163 }
3164 
3165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3166 {
3167     tcg_gen_shri_i64(a, a, shift);
3168     tcg_gen_add_i64(d, d, a);
3169 }
3170 
3171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3172 {
3173     tcg_gen_shri_vec(vece, a, a, sh);
3174     tcg_gen_add_vec(vece, d, d, a);
3175 }
3176 
3177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3178                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3179 {
3180     static const TCGOpcode vecop_list[] = {
3181         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3182     };
3183     static const GVecGen2i ops[4] = {
3184         { .fni8 = gen_usra8_i64,
3185           .fniv = gen_usra_vec,
3186           .fno = gen_helper_gvec_usra_b,
3187           .load_dest = true,
3188           .opt_opc = vecop_list,
3189           .vece = MO_8, },
3190         { .fni8 = gen_usra16_i64,
3191           .fniv = gen_usra_vec,
3192           .fno = gen_helper_gvec_usra_h,
3193           .load_dest = true,
3194           .opt_opc = vecop_list,
3195           .vece = MO_16, },
3196         { .fni4 = gen_usra32_i32,
3197           .fniv = gen_usra_vec,
3198           .fno = gen_helper_gvec_usra_s,
3199           .load_dest = true,
3200           .opt_opc = vecop_list,
3201           .vece = MO_32, },
3202         { .fni8 = gen_usra64_i64,
3203           .fniv = gen_usra_vec,
3204           .fno = gen_helper_gvec_usra_d,
3205           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3206           .load_dest = true,
3207           .opt_opc = vecop_list,
3208           .vece = MO_64, },
3209     };
3210 
3211     /* tszimm encoding produces immediates in the range [1..esize]. */
3212     tcg_debug_assert(shift > 0);
3213     tcg_debug_assert(shift <= (8 << vece));
3214 
3215     /*
3216      * Shifts larger than the element size are architecturally valid.
3217      * Unsigned results in all zeros as input to accumulate: nop.
3218      */
3219     if (shift < (8 << vece)) {
3220         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3221     } else {
3222         /* Nop, but we do need to clear the tail. */
3223         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3224     }
3225 }
3226 
3227 /*
3228  * Shift one less than the requested amount, and the low bit is
3229  * the rounding bit.  For the 8 and 16-bit operations, because we
3230  * mask the low bit, we can perform a normal integer shift instead
3231  * of a vector shift.
3232  */
3233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3234 {
3235     TCGv_i64 t = tcg_temp_new_i64();
3236 
3237     tcg_gen_shri_i64(t, a, sh - 1);
3238     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3239     tcg_gen_vec_sar8i_i64(d, a, sh);
3240     tcg_gen_vec_add8_i64(d, d, t);
3241     tcg_temp_free_i64(t);
3242 }
3243 
3244 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3245 {
3246     TCGv_i64 t = tcg_temp_new_i64();
3247 
3248     tcg_gen_shri_i64(t, a, sh - 1);
3249     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3250     tcg_gen_vec_sar16i_i64(d, a, sh);
3251     tcg_gen_vec_add16_i64(d, d, t);
3252     tcg_temp_free_i64(t);
3253 }
3254 
3255 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3256 {
3257     TCGv_i32 t;
3258 
3259     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3260     if (sh == 32) {
3261         tcg_gen_movi_i32(d, 0);
3262         return;
3263     }
3264     t = tcg_temp_new_i32();
3265     tcg_gen_extract_i32(t, a, sh - 1, 1);
3266     tcg_gen_sari_i32(d, a, sh);
3267     tcg_gen_add_i32(d, d, t);
3268     tcg_temp_free_i32(t);
3269 }
3270 
3271 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3272 {
3273     TCGv_i64 t = tcg_temp_new_i64();
3274 
3275     tcg_gen_extract_i64(t, a, sh - 1, 1);
3276     tcg_gen_sari_i64(d, a, sh);
3277     tcg_gen_add_i64(d, d, t);
3278     tcg_temp_free_i64(t);
3279 }
3280 
3281 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3282 {
3283     TCGv_vec t = tcg_temp_new_vec_matching(d);
3284     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3285 
3286     tcg_gen_shri_vec(vece, t, a, sh - 1);
3287     tcg_gen_dupi_vec(vece, ones, 1);
3288     tcg_gen_and_vec(vece, t, t, ones);
3289     tcg_gen_sari_vec(vece, d, a, sh);
3290     tcg_gen_add_vec(vece, d, d, t);
3291 
3292     tcg_temp_free_vec(t);
3293     tcg_temp_free_vec(ones);
3294 }
3295 
3296 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3297                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3298 {
3299     static const TCGOpcode vecop_list[] = {
3300         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3301     };
3302     static const GVecGen2i ops[4] = {
3303         { .fni8 = gen_srshr8_i64,
3304           .fniv = gen_srshr_vec,
3305           .fno = gen_helper_gvec_srshr_b,
3306           .opt_opc = vecop_list,
3307           .vece = MO_8 },
3308         { .fni8 = gen_srshr16_i64,
3309           .fniv = gen_srshr_vec,
3310           .fno = gen_helper_gvec_srshr_h,
3311           .opt_opc = vecop_list,
3312           .vece = MO_16 },
3313         { .fni4 = gen_srshr32_i32,
3314           .fniv = gen_srshr_vec,
3315           .fno = gen_helper_gvec_srshr_s,
3316           .opt_opc = vecop_list,
3317           .vece = MO_32 },
3318         { .fni8 = gen_srshr64_i64,
3319           .fniv = gen_srshr_vec,
3320           .fno = gen_helper_gvec_srshr_d,
3321           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3322           .opt_opc = vecop_list,
3323           .vece = MO_64 },
3324     };
3325 
3326     /* tszimm encoding produces immediates in the range [1..esize] */
3327     tcg_debug_assert(shift > 0);
3328     tcg_debug_assert(shift <= (8 << vece));
3329 
3330     if (shift == (8 << vece)) {
3331         /*
3332          * Shifts larger than the element size are architecturally valid.
3333          * Signed results in all sign bits.  With rounding, this produces
3334          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3335          * I.e. always zero.
3336          */
3337         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3338     } else {
3339         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3340     }
3341 }
3342 
3343 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3344 {
3345     TCGv_i64 t = tcg_temp_new_i64();
3346 
3347     gen_srshr8_i64(t, a, sh);
3348     tcg_gen_vec_add8_i64(d, d, t);
3349     tcg_temp_free_i64(t);
3350 }
3351 
3352 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3353 {
3354     TCGv_i64 t = tcg_temp_new_i64();
3355 
3356     gen_srshr16_i64(t, a, sh);
3357     tcg_gen_vec_add16_i64(d, d, t);
3358     tcg_temp_free_i64(t);
3359 }
3360 
3361 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3362 {
3363     TCGv_i32 t = tcg_temp_new_i32();
3364 
3365     gen_srshr32_i32(t, a, sh);
3366     tcg_gen_add_i32(d, d, t);
3367     tcg_temp_free_i32(t);
3368 }
3369 
3370 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3371 {
3372     TCGv_i64 t = tcg_temp_new_i64();
3373 
3374     gen_srshr64_i64(t, a, sh);
3375     tcg_gen_add_i64(d, d, t);
3376     tcg_temp_free_i64(t);
3377 }
3378 
3379 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3380 {
3381     TCGv_vec t = tcg_temp_new_vec_matching(d);
3382 
3383     gen_srshr_vec(vece, t, a, sh);
3384     tcg_gen_add_vec(vece, d, d, t);
3385     tcg_temp_free_vec(t);
3386 }
3387 
3388 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3389                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3390 {
3391     static const TCGOpcode vecop_list[] = {
3392         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3393     };
3394     static const GVecGen2i ops[4] = {
3395         { .fni8 = gen_srsra8_i64,
3396           .fniv = gen_srsra_vec,
3397           .fno = gen_helper_gvec_srsra_b,
3398           .opt_opc = vecop_list,
3399           .load_dest = true,
3400           .vece = MO_8 },
3401         { .fni8 = gen_srsra16_i64,
3402           .fniv = gen_srsra_vec,
3403           .fno = gen_helper_gvec_srsra_h,
3404           .opt_opc = vecop_list,
3405           .load_dest = true,
3406           .vece = MO_16 },
3407         { .fni4 = gen_srsra32_i32,
3408           .fniv = gen_srsra_vec,
3409           .fno = gen_helper_gvec_srsra_s,
3410           .opt_opc = vecop_list,
3411           .load_dest = true,
3412           .vece = MO_32 },
3413         { .fni8 = gen_srsra64_i64,
3414           .fniv = gen_srsra_vec,
3415           .fno = gen_helper_gvec_srsra_d,
3416           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3417           .opt_opc = vecop_list,
3418           .load_dest = true,
3419           .vece = MO_64 },
3420     };
3421 
3422     /* tszimm encoding produces immediates in the range [1..esize] */
3423     tcg_debug_assert(shift > 0);
3424     tcg_debug_assert(shift <= (8 << vece));
3425 
3426     /*
3427      * Shifts larger than the element size are architecturally valid.
3428      * Signed results in all sign bits.  With rounding, this produces
3429      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3430      * I.e. always zero.  With accumulation, this leaves D unchanged.
3431      */
3432     if (shift == (8 << vece)) {
3433         /* Nop, but we do need to clear the tail. */
3434         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3435     } else {
3436         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3437     }
3438 }
3439 
3440 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3441 {
3442     TCGv_i64 t = tcg_temp_new_i64();
3443 
3444     tcg_gen_shri_i64(t, a, sh - 1);
3445     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3446     tcg_gen_vec_shr8i_i64(d, a, sh);
3447     tcg_gen_vec_add8_i64(d, d, t);
3448     tcg_temp_free_i64(t);
3449 }
3450 
3451 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3452 {
3453     TCGv_i64 t = tcg_temp_new_i64();
3454 
3455     tcg_gen_shri_i64(t, a, sh - 1);
3456     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3457     tcg_gen_vec_shr16i_i64(d, a, sh);
3458     tcg_gen_vec_add16_i64(d, d, t);
3459     tcg_temp_free_i64(t);
3460 }
3461 
3462 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3463 {
3464     TCGv_i32 t;
3465 
3466     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3467     if (sh == 32) {
3468         tcg_gen_extract_i32(d, a, sh - 1, 1);
3469         return;
3470     }
3471     t = tcg_temp_new_i32();
3472     tcg_gen_extract_i32(t, a, sh - 1, 1);
3473     tcg_gen_shri_i32(d, a, sh);
3474     tcg_gen_add_i32(d, d, t);
3475     tcg_temp_free_i32(t);
3476 }
3477 
3478 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3479 {
3480     TCGv_i64 t = tcg_temp_new_i64();
3481 
3482     tcg_gen_extract_i64(t, a, sh - 1, 1);
3483     tcg_gen_shri_i64(d, a, sh);
3484     tcg_gen_add_i64(d, d, t);
3485     tcg_temp_free_i64(t);
3486 }
3487 
3488 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3489 {
3490     TCGv_vec t = tcg_temp_new_vec_matching(d);
3491     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3492 
3493     tcg_gen_shri_vec(vece, t, a, shift - 1);
3494     tcg_gen_dupi_vec(vece, ones, 1);
3495     tcg_gen_and_vec(vece, t, t, ones);
3496     tcg_gen_shri_vec(vece, d, a, shift);
3497     tcg_gen_add_vec(vece, d, d, t);
3498 
3499     tcg_temp_free_vec(t);
3500     tcg_temp_free_vec(ones);
3501 }
3502 
3503 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3504                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3505 {
3506     static const TCGOpcode vecop_list[] = {
3507         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3508     };
3509     static const GVecGen2i ops[4] = {
3510         { .fni8 = gen_urshr8_i64,
3511           .fniv = gen_urshr_vec,
3512           .fno = gen_helper_gvec_urshr_b,
3513           .opt_opc = vecop_list,
3514           .vece = MO_8 },
3515         { .fni8 = gen_urshr16_i64,
3516           .fniv = gen_urshr_vec,
3517           .fno = gen_helper_gvec_urshr_h,
3518           .opt_opc = vecop_list,
3519           .vece = MO_16 },
3520         { .fni4 = gen_urshr32_i32,
3521           .fniv = gen_urshr_vec,
3522           .fno = gen_helper_gvec_urshr_s,
3523           .opt_opc = vecop_list,
3524           .vece = MO_32 },
3525         { .fni8 = gen_urshr64_i64,
3526           .fniv = gen_urshr_vec,
3527           .fno = gen_helper_gvec_urshr_d,
3528           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3529           .opt_opc = vecop_list,
3530           .vece = MO_64 },
3531     };
3532 
3533     /* tszimm encoding produces immediates in the range [1..esize] */
3534     tcg_debug_assert(shift > 0);
3535     tcg_debug_assert(shift <= (8 << vece));
3536 
3537     if (shift == (8 << vece)) {
3538         /*
3539          * Shifts larger than the element size are architecturally valid.
3540          * Unsigned results in zero.  With rounding, this produces a
3541          * copy of the most significant bit.
3542          */
3543         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3544     } else {
3545         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3546     }
3547 }
3548 
3549 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3550 {
3551     TCGv_i64 t = tcg_temp_new_i64();
3552 
3553     if (sh == 8) {
3554         tcg_gen_vec_shr8i_i64(t, a, 7);
3555     } else {
3556         gen_urshr8_i64(t, a, sh);
3557     }
3558     tcg_gen_vec_add8_i64(d, d, t);
3559     tcg_temp_free_i64(t);
3560 }
3561 
3562 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3563 {
3564     TCGv_i64 t = tcg_temp_new_i64();
3565 
3566     if (sh == 16) {
3567         tcg_gen_vec_shr16i_i64(t, a, 15);
3568     } else {
3569         gen_urshr16_i64(t, a, sh);
3570     }
3571     tcg_gen_vec_add16_i64(d, d, t);
3572     tcg_temp_free_i64(t);
3573 }
3574 
3575 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3576 {
3577     TCGv_i32 t = tcg_temp_new_i32();
3578 
3579     if (sh == 32) {
3580         tcg_gen_shri_i32(t, a, 31);
3581     } else {
3582         gen_urshr32_i32(t, a, sh);
3583     }
3584     tcg_gen_add_i32(d, d, t);
3585     tcg_temp_free_i32(t);
3586 }
3587 
3588 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3589 {
3590     TCGv_i64 t = tcg_temp_new_i64();
3591 
3592     if (sh == 64) {
3593         tcg_gen_shri_i64(t, a, 63);
3594     } else {
3595         gen_urshr64_i64(t, a, sh);
3596     }
3597     tcg_gen_add_i64(d, d, t);
3598     tcg_temp_free_i64(t);
3599 }
3600 
3601 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3602 {
3603     TCGv_vec t = tcg_temp_new_vec_matching(d);
3604 
3605     if (sh == (8 << vece)) {
3606         tcg_gen_shri_vec(vece, t, a, sh - 1);
3607     } else {
3608         gen_urshr_vec(vece, t, a, sh);
3609     }
3610     tcg_gen_add_vec(vece, d, d, t);
3611     tcg_temp_free_vec(t);
3612 }
3613 
3614 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3615                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3616 {
3617     static const TCGOpcode vecop_list[] = {
3618         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3619     };
3620     static const GVecGen2i ops[4] = {
3621         { .fni8 = gen_ursra8_i64,
3622           .fniv = gen_ursra_vec,
3623           .fno = gen_helper_gvec_ursra_b,
3624           .opt_opc = vecop_list,
3625           .load_dest = true,
3626           .vece = MO_8 },
3627         { .fni8 = gen_ursra16_i64,
3628           .fniv = gen_ursra_vec,
3629           .fno = gen_helper_gvec_ursra_h,
3630           .opt_opc = vecop_list,
3631           .load_dest = true,
3632           .vece = MO_16 },
3633         { .fni4 = gen_ursra32_i32,
3634           .fniv = gen_ursra_vec,
3635           .fno = gen_helper_gvec_ursra_s,
3636           .opt_opc = vecop_list,
3637           .load_dest = true,
3638           .vece = MO_32 },
3639         { .fni8 = gen_ursra64_i64,
3640           .fniv = gen_ursra_vec,
3641           .fno = gen_helper_gvec_ursra_d,
3642           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3643           .opt_opc = vecop_list,
3644           .load_dest = true,
3645           .vece = MO_64 },
3646     };
3647 
3648     /* tszimm encoding produces immediates in the range [1..esize] */
3649     tcg_debug_assert(shift > 0);
3650     tcg_debug_assert(shift <= (8 << vece));
3651 
3652     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3653 }
3654 
3655 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3656 {
3657     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3658     TCGv_i64 t = tcg_temp_new_i64();
3659 
3660     tcg_gen_shri_i64(t, a, shift);
3661     tcg_gen_andi_i64(t, t, mask);
3662     tcg_gen_andi_i64(d, d, ~mask);
3663     tcg_gen_or_i64(d, d, t);
3664     tcg_temp_free_i64(t);
3665 }
3666 
3667 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3668 {
3669     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3670     TCGv_i64 t = tcg_temp_new_i64();
3671 
3672     tcg_gen_shri_i64(t, a, shift);
3673     tcg_gen_andi_i64(t, t, mask);
3674     tcg_gen_andi_i64(d, d, ~mask);
3675     tcg_gen_or_i64(d, d, t);
3676     tcg_temp_free_i64(t);
3677 }
3678 
3679 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3680 {
3681     tcg_gen_shri_i32(a, a, shift);
3682     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3683 }
3684 
3685 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3686 {
3687     tcg_gen_shri_i64(a, a, shift);
3688     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3689 }
3690 
3691 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3692 {
3693     TCGv_vec t = tcg_temp_new_vec_matching(d);
3694     TCGv_vec m = tcg_temp_new_vec_matching(d);
3695 
3696     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3697     tcg_gen_shri_vec(vece, t, a, sh);
3698     tcg_gen_and_vec(vece, d, d, m);
3699     tcg_gen_or_vec(vece, d, d, t);
3700 
3701     tcg_temp_free_vec(t);
3702     tcg_temp_free_vec(m);
3703 }
3704 
3705 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3706                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3707 {
3708     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3709     const GVecGen2i ops[4] = {
3710         { .fni8 = gen_shr8_ins_i64,
3711           .fniv = gen_shr_ins_vec,
3712           .fno = gen_helper_gvec_sri_b,
3713           .load_dest = true,
3714           .opt_opc = vecop_list,
3715           .vece = MO_8 },
3716         { .fni8 = gen_shr16_ins_i64,
3717           .fniv = gen_shr_ins_vec,
3718           .fno = gen_helper_gvec_sri_h,
3719           .load_dest = true,
3720           .opt_opc = vecop_list,
3721           .vece = MO_16 },
3722         { .fni4 = gen_shr32_ins_i32,
3723           .fniv = gen_shr_ins_vec,
3724           .fno = gen_helper_gvec_sri_s,
3725           .load_dest = true,
3726           .opt_opc = vecop_list,
3727           .vece = MO_32 },
3728         { .fni8 = gen_shr64_ins_i64,
3729           .fniv = gen_shr_ins_vec,
3730           .fno = gen_helper_gvec_sri_d,
3731           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3732           .load_dest = true,
3733           .opt_opc = vecop_list,
3734           .vece = MO_64 },
3735     };
3736 
3737     /* tszimm encoding produces immediates in the range [1..esize]. */
3738     tcg_debug_assert(shift > 0);
3739     tcg_debug_assert(shift <= (8 << vece));
3740 
3741     /* Shift of esize leaves destination unchanged. */
3742     if (shift < (8 << vece)) {
3743         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3744     } else {
3745         /* Nop, but we do need to clear the tail. */
3746         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3747     }
3748 }
3749 
3750 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3751 {
3752     uint64_t mask = dup_const(MO_8, 0xff << shift);
3753     TCGv_i64 t = tcg_temp_new_i64();
3754 
3755     tcg_gen_shli_i64(t, a, shift);
3756     tcg_gen_andi_i64(t, t, mask);
3757     tcg_gen_andi_i64(d, d, ~mask);
3758     tcg_gen_or_i64(d, d, t);
3759     tcg_temp_free_i64(t);
3760 }
3761 
3762 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3763 {
3764     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3765     TCGv_i64 t = tcg_temp_new_i64();
3766 
3767     tcg_gen_shli_i64(t, a, shift);
3768     tcg_gen_andi_i64(t, t, mask);
3769     tcg_gen_andi_i64(d, d, ~mask);
3770     tcg_gen_or_i64(d, d, t);
3771     tcg_temp_free_i64(t);
3772 }
3773 
3774 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3775 {
3776     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3777 }
3778 
3779 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3780 {
3781     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3782 }
3783 
3784 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3785 {
3786     TCGv_vec t = tcg_temp_new_vec_matching(d);
3787     TCGv_vec m = tcg_temp_new_vec_matching(d);
3788 
3789     tcg_gen_shli_vec(vece, t, a, sh);
3790     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3791     tcg_gen_and_vec(vece, d, d, m);
3792     tcg_gen_or_vec(vece, d, d, t);
3793 
3794     tcg_temp_free_vec(t);
3795     tcg_temp_free_vec(m);
3796 }
3797 
3798 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3799                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3800 {
3801     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3802     const GVecGen2i ops[4] = {
3803         { .fni8 = gen_shl8_ins_i64,
3804           .fniv = gen_shl_ins_vec,
3805           .fno = gen_helper_gvec_sli_b,
3806           .load_dest = true,
3807           .opt_opc = vecop_list,
3808           .vece = MO_8 },
3809         { .fni8 = gen_shl16_ins_i64,
3810           .fniv = gen_shl_ins_vec,
3811           .fno = gen_helper_gvec_sli_h,
3812           .load_dest = true,
3813           .opt_opc = vecop_list,
3814           .vece = MO_16 },
3815         { .fni4 = gen_shl32_ins_i32,
3816           .fniv = gen_shl_ins_vec,
3817           .fno = gen_helper_gvec_sli_s,
3818           .load_dest = true,
3819           .opt_opc = vecop_list,
3820           .vece = MO_32 },
3821         { .fni8 = gen_shl64_ins_i64,
3822           .fniv = gen_shl_ins_vec,
3823           .fno = gen_helper_gvec_sli_d,
3824           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3825           .load_dest = true,
3826           .opt_opc = vecop_list,
3827           .vece = MO_64 },
3828     };
3829 
3830     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3831     tcg_debug_assert(shift >= 0);
3832     tcg_debug_assert(shift < (8 << vece));
3833 
3834     if (shift == 0) {
3835         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3836     } else {
3837         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3838     }
3839 }
3840 
3841 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3842 {
3843     gen_helper_neon_mul_u8(a, a, b);
3844     gen_helper_neon_add_u8(d, d, a);
3845 }
3846 
3847 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3848 {
3849     gen_helper_neon_mul_u8(a, a, b);
3850     gen_helper_neon_sub_u8(d, d, a);
3851 }
3852 
3853 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3854 {
3855     gen_helper_neon_mul_u16(a, a, b);
3856     gen_helper_neon_add_u16(d, d, a);
3857 }
3858 
3859 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3860 {
3861     gen_helper_neon_mul_u16(a, a, b);
3862     gen_helper_neon_sub_u16(d, d, a);
3863 }
3864 
3865 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3866 {
3867     tcg_gen_mul_i32(a, a, b);
3868     tcg_gen_add_i32(d, d, a);
3869 }
3870 
3871 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3872 {
3873     tcg_gen_mul_i32(a, a, b);
3874     tcg_gen_sub_i32(d, d, a);
3875 }
3876 
3877 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3878 {
3879     tcg_gen_mul_i64(a, a, b);
3880     tcg_gen_add_i64(d, d, a);
3881 }
3882 
3883 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3884 {
3885     tcg_gen_mul_i64(a, a, b);
3886     tcg_gen_sub_i64(d, d, a);
3887 }
3888 
3889 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3890 {
3891     tcg_gen_mul_vec(vece, a, a, b);
3892     tcg_gen_add_vec(vece, d, d, a);
3893 }
3894 
3895 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3896 {
3897     tcg_gen_mul_vec(vece, a, a, b);
3898     tcg_gen_sub_vec(vece, d, d, a);
3899 }
3900 
3901 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3902  * these tables are shared with AArch64 which does support them.
3903  */
3904 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3905                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3906 {
3907     static const TCGOpcode vecop_list[] = {
3908         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3909     };
3910     static const GVecGen3 ops[4] = {
3911         { .fni4 = gen_mla8_i32,
3912           .fniv = gen_mla_vec,
3913           .load_dest = true,
3914           .opt_opc = vecop_list,
3915           .vece = MO_8 },
3916         { .fni4 = gen_mla16_i32,
3917           .fniv = gen_mla_vec,
3918           .load_dest = true,
3919           .opt_opc = vecop_list,
3920           .vece = MO_16 },
3921         { .fni4 = gen_mla32_i32,
3922           .fniv = gen_mla_vec,
3923           .load_dest = true,
3924           .opt_opc = vecop_list,
3925           .vece = MO_32 },
3926         { .fni8 = gen_mla64_i64,
3927           .fniv = gen_mla_vec,
3928           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3929           .load_dest = true,
3930           .opt_opc = vecop_list,
3931           .vece = MO_64 },
3932     };
3933     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3934 }
3935 
3936 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3937                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3938 {
3939     static const TCGOpcode vecop_list[] = {
3940         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3941     };
3942     static const GVecGen3 ops[4] = {
3943         { .fni4 = gen_mls8_i32,
3944           .fniv = gen_mls_vec,
3945           .load_dest = true,
3946           .opt_opc = vecop_list,
3947           .vece = MO_8 },
3948         { .fni4 = gen_mls16_i32,
3949           .fniv = gen_mls_vec,
3950           .load_dest = true,
3951           .opt_opc = vecop_list,
3952           .vece = MO_16 },
3953         { .fni4 = gen_mls32_i32,
3954           .fniv = gen_mls_vec,
3955           .load_dest = true,
3956           .opt_opc = vecop_list,
3957           .vece = MO_32 },
3958         { .fni8 = gen_mls64_i64,
3959           .fniv = gen_mls_vec,
3960           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3961           .load_dest = true,
3962           .opt_opc = vecop_list,
3963           .vece = MO_64 },
3964     };
3965     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3966 }
3967 
3968 /* CMTST : test is "if (X & Y != 0)". */
3969 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3970 {
3971     tcg_gen_and_i32(d, a, b);
3972     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3973     tcg_gen_neg_i32(d, d);
3974 }
3975 
3976 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3977 {
3978     tcg_gen_and_i64(d, a, b);
3979     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3980     tcg_gen_neg_i64(d, d);
3981 }
3982 
3983 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3984 {
3985     tcg_gen_and_vec(vece, d, a, b);
3986     tcg_gen_dupi_vec(vece, a, 0);
3987     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3988 }
3989 
3990 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3991                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3992 {
3993     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3994     static const GVecGen3 ops[4] = {
3995         { .fni4 = gen_helper_neon_tst_u8,
3996           .fniv = gen_cmtst_vec,
3997           .opt_opc = vecop_list,
3998           .vece = MO_8 },
3999         { .fni4 = gen_helper_neon_tst_u16,
4000           .fniv = gen_cmtst_vec,
4001           .opt_opc = vecop_list,
4002           .vece = MO_16 },
4003         { .fni4 = gen_cmtst_i32,
4004           .fniv = gen_cmtst_vec,
4005           .opt_opc = vecop_list,
4006           .vece = MO_32 },
4007         { .fni8 = gen_cmtst_i64,
4008           .fniv = gen_cmtst_vec,
4009           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4010           .opt_opc = vecop_list,
4011           .vece = MO_64 },
4012     };
4013     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4014 }
4015 
4016 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4017 {
4018     TCGv_i32 lval = tcg_temp_new_i32();
4019     TCGv_i32 rval = tcg_temp_new_i32();
4020     TCGv_i32 lsh = tcg_temp_new_i32();
4021     TCGv_i32 rsh = tcg_temp_new_i32();
4022     TCGv_i32 zero = tcg_constant_i32(0);
4023     TCGv_i32 max = tcg_constant_i32(32);
4024 
4025     /*
4026      * Rely on the TCG guarantee that out of range shifts produce
4027      * unspecified results, not undefined behaviour (i.e. no trap).
4028      * Discard out-of-range results after the fact.
4029      */
4030     tcg_gen_ext8s_i32(lsh, shift);
4031     tcg_gen_neg_i32(rsh, lsh);
4032     tcg_gen_shl_i32(lval, src, lsh);
4033     tcg_gen_shr_i32(rval, src, rsh);
4034     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4035     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4036 
4037     tcg_temp_free_i32(lval);
4038     tcg_temp_free_i32(rval);
4039     tcg_temp_free_i32(lsh);
4040     tcg_temp_free_i32(rsh);
4041 }
4042 
4043 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4044 {
4045     TCGv_i64 lval = tcg_temp_new_i64();
4046     TCGv_i64 rval = tcg_temp_new_i64();
4047     TCGv_i64 lsh = tcg_temp_new_i64();
4048     TCGv_i64 rsh = tcg_temp_new_i64();
4049     TCGv_i64 zero = tcg_constant_i64(0);
4050     TCGv_i64 max = tcg_constant_i64(64);
4051 
4052     /*
4053      * Rely on the TCG guarantee that out of range shifts produce
4054      * unspecified results, not undefined behaviour (i.e. no trap).
4055      * Discard out-of-range results after the fact.
4056      */
4057     tcg_gen_ext8s_i64(lsh, shift);
4058     tcg_gen_neg_i64(rsh, lsh);
4059     tcg_gen_shl_i64(lval, src, lsh);
4060     tcg_gen_shr_i64(rval, src, rsh);
4061     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4062     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4063 
4064     tcg_temp_free_i64(lval);
4065     tcg_temp_free_i64(rval);
4066     tcg_temp_free_i64(lsh);
4067     tcg_temp_free_i64(rsh);
4068 }
4069 
4070 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4071                          TCGv_vec src, TCGv_vec shift)
4072 {
4073     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4074     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4075     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4076     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4077     TCGv_vec msk, max;
4078 
4079     tcg_gen_neg_vec(vece, rsh, shift);
4080     if (vece == MO_8) {
4081         tcg_gen_mov_vec(lsh, shift);
4082     } else {
4083         msk = tcg_temp_new_vec_matching(dst);
4084         tcg_gen_dupi_vec(vece, msk, 0xff);
4085         tcg_gen_and_vec(vece, lsh, shift, msk);
4086         tcg_gen_and_vec(vece, rsh, rsh, msk);
4087         tcg_temp_free_vec(msk);
4088     }
4089 
4090     /*
4091      * Rely on the TCG guarantee that out of range shifts produce
4092      * unspecified results, not undefined behaviour (i.e. no trap).
4093      * Discard out-of-range results after the fact.
4094      */
4095     tcg_gen_shlv_vec(vece, lval, src, lsh);
4096     tcg_gen_shrv_vec(vece, rval, src, rsh);
4097 
4098     max = tcg_temp_new_vec_matching(dst);
4099     tcg_gen_dupi_vec(vece, max, 8 << vece);
4100 
4101     /*
4102      * The choice of LT (signed) and GEU (unsigned) are biased toward
4103      * the instructions of the x86_64 host.  For MO_8, the whole byte
4104      * is significant so we must use an unsigned compare; otherwise we
4105      * have already masked to a byte and so a signed compare works.
4106      * Other tcg hosts have a full set of comparisons and do not care.
4107      */
4108     if (vece == MO_8) {
4109         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4110         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4111         tcg_gen_andc_vec(vece, lval, lval, lsh);
4112         tcg_gen_andc_vec(vece, rval, rval, rsh);
4113     } else {
4114         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4115         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4116         tcg_gen_and_vec(vece, lval, lval, lsh);
4117         tcg_gen_and_vec(vece, rval, rval, rsh);
4118     }
4119     tcg_gen_or_vec(vece, dst, lval, rval);
4120 
4121     tcg_temp_free_vec(max);
4122     tcg_temp_free_vec(lval);
4123     tcg_temp_free_vec(rval);
4124     tcg_temp_free_vec(lsh);
4125     tcg_temp_free_vec(rsh);
4126 }
4127 
4128 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4129                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4130 {
4131     static const TCGOpcode vecop_list[] = {
4132         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4133         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4134     };
4135     static const GVecGen3 ops[4] = {
4136         { .fniv = gen_ushl_vec,
4137           .fno = gen_helper_gvec_ushl_b,
4138           .opt_opc = vecop_list,
4139           .vece = MO_8 },
4140         { .fniv = gen_ushl_vec,
4141           .fno = gen_helper_gvec_ushl_h,
4142           .opt_opc = vecop_list,
4143           .vece = MO_16 },
4144         { .fni4 = gen_ushl_i32,
4145           .fniv = gen_ushl_vec,
4146           .opt_opc = vecop_list,
4147           .vece = MO_32 },
4148         { .fni8 = gen_ushl_i64,
4149           .fniv = gen_ushl_vec,
4150           .opt_opc = vecop_list,
4151           .vece = MO_64 },
4152     };
4153     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4154 }
4155 
4156 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4157 {
4158     TCGv_i32 lval = tcg_temp_new_i32();
4159     TCGv_i32 rval = tcg_temp_new_i32();
4160     TCGv_i32 lsh = tcg_temp_new_i32();
4161     TCGv_i32 rsh = tcg_temp_new_i32();
4162     TCGv_i32 zero = tcg_constant_i32(0);
4163     TCGv_i32 max = tcg_constant_i32(31);
4164 
4165     /*
4166      * Rely on the TCG guarantee that out of range shifts produce
4167      * unspecified results, not undefined behaviour (i.e. no trap).
4168      * Discard out-of-range results after the fact.
4169      */
4170     tcg_gen_ext8s_i32(lsh, shift);
4171     tcg_gen_neg_i32(rsh, lsh);
4172     tcg_gen_shl_i32(lval, src, lsh);
4173     tcg_gen_umin_i32(rsh, rsh, max);
4174     tcg_gen_sar_i32(rval, src, rsh);
4175     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4176     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4177 
4178     tcg_temp_free_i32(lval);
4179     tcg_temp_free_i32(rval);
4180     tcg_temp_free_i32(lsh);
4181     tcg_temp_free_i32(rsh);
4182 }
4183 
4184 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4185 {
4186     TCGv_i64 lval = tcg_temp_new_i64();
4187     TCGv_i64 rval = tcg_temp_new_i64();
4188     TCGv_i64 lsh = tcg_temp_new_i64();
4189     TCGv_i64 rsh = tcg_temp_new_i64();
4190     TCGv_i64 zero = tcg_constant_i64(0);
4191     TCGv_i64 max = tcg_constant_i64(63);
4192 
4193     /*
4194      * Rely on the TCG guarantee that out of range shifts produce
4195      * unspecified results, not undefined behaviour (i.e. no trap).
4196      * Discard out-of-range results after the fact.
4197      */
4198     tcg_gen_ext8s_i64(lsh, shift);
4199     tcg_gen_neg_i64(rsh, lsh);
4200     tcg_gen_shl_i64(lval, src, lsh);
4201     tcg_gen_umin_i64(rsh, rsh, max);
4202     tcg_gen_sar_i64(rval, src, rsh);
4203     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4204     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4205 
4206     tcg_temp_free_i64(lval);
4207     tcg_temp_free_i64(rval);
4208     tcg_temp_free_i64(lsh);
4209     tcg_temp_free_i64(rsh);
4210 }
4211 
4212 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4213                          TCGv_vec src, TCGv_vec shift)
4214 {
4215     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4216     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4217     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4218     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4219     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4220 
4221     /*
4222      * Rely on the TCG guarantee that out of range shifts produce
4223      * unspecified results, not undefined behaviour (i.e. no trap).
4224      * Discard out-of-range results after the fact.
4225      */
4226     tcg_gen_neg_vec(vece, rsh, shift);
4227     if (vece == MO_8) {
4228         tcg_gen_mov_vec(lsh, shift);
4229     } else {
4230         tcg_gen_dupi_vec(vece, tmp, 0xff);
4231         tcg_gen_and_vec(vece, lsh, shift, tmp);
4232         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4233     }
4234 
4235     /* Bound rsh so out of bound right shift gets -1.  */
4236     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4237     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4238     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4239 
4240     tcg_gen_shlv_vec(vece, lval, src, lsh);
4241     tcg_gen_sarv_vec(vece, rval, src, rsh);
4242 
4243     /* Select in-bound left shift.  */
4244     tcg_gen_andc_vec(vece, lval, lval, tmp);
4245 
4246     /* Select between left and right shift.  */
4247     if (vece == MO_8) {
4248         tcg_gen_dupi_vec(vece, tmp, 0);
4249         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4250     } else {
4251         tcg_gen_dupi_vec(vece, tmp, 0x80);
4252         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4253     }
4254 
4255     tcg_temp_free_vec(lval);
4256     tcg_temp_free_vec(rval);
4257     tcg_temp_free_vec(lsh);
4258     tcg_temp_free_vec(rsh);
4259     tcg_temp_free_vec(tmp);
4260 }
4261 
4262 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4263                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4264 {
4265     static const TCGOpcode vecop_list[] = {
4266         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4267         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4268     };
4269     static const GVecGen3 ops[4] = {
4270         { .fniv = gen_sshl_vec,
4271           .fno = gen_helper_gvec_sshl_b,
4272           .opt_opc = vecop_list,
4273           .vece = MO_8 },
4274         { .fniv = gen_sshl_vec,
4275           .fno = gen_helper_gvec_sshl_h,
4276           .opt_opc = vecop_list,
4277           .vece = MO_16 },
4278         { .fni4 = gen_sshl_i32,
4279           .fniv = gen_sshl_vec,
4280           .opt_opc = vecop_list,
4281           .vece = MO_32 },
4282         { .fni8 = gen_sshl_i64,
4283           .fniv = gen_sshl_vec,
4284           .opt_opc = vecop_list,
4285           .vece = MO_64 },
4286     };
4287     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4288 }
4289 
4290 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4291                           TCGv_vec a, TCGv_vec b)
4292 {
4293     TCGv_vec x = tcg_temp_new_vec_matching(t);
4294     tcg_gen_add_vec(vece, x, a, b);
4295     tcg_gen_usadd_vec(vece, t, a, b);
4296     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4297     tcg_gen_or_vec(vece, sat, sat, x);
4298     tcg_temp_free_vec(x);
4299 }
4300 
4301 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4302                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4303 {
4304     static const TCGOpcode vecop_list[] = {
4305         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4306     };
4307     static const GVecGen4 ops[4] = {
4308         { .fniv = gen_uqadd_vec,
4309           .fno = gen_helper_gvec_uqadd_b,
4310           .write_aofs = true,
4311           .opt_opc = vecop_list,
4312           .vece = MO_8 },
4313         { .fniv = gen_uqadd_vec,
4314           .fno = gen_helper_gvec_uqadd_h,
4315           .write_aofs = true,
4316           .opt_opc = vecop_list,
4317           .vece = MO_16 },
4318         { .fniv = gen_uqadd_vec,
4319           .fno = gen_helper_gvec_uqadd_s,
4320           .write_aofs = true,
4321           .opt_opc = vecop_list,
4322           .vece = MO_32 },
4323         { .fniv = gen_uqadd_vec,
4324           .fno = gen_helper_gvec_uqadd_d,
4325           .write_aofs = true,
4326           .opt_opc = vecop_list,
4327           .vece = MO_64 },
4328     };
4329     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4330                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4331 }
4332 
4333 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4334                           TCGv_vec a, TCGv_vec b)
4335 {
4336     TCGv_vec x = tcg_temp_new_vec_matching(t);
4337     tcg_gen_add_vec(vece, x, a, b);
4338     tcg_gen_ssadd_vec(vece, t, a, b);
4339     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4340     tcg_gen_or_vec(vece, sat, sat, x);
4341     tcg_temp_free_vec(x);
4342 }
4343 
4344 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4345                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4346 {
4347     static const TCGOpcode vecop_list[] = {
4348         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4349     };
4350     static const GVecGen4 ops[4] = {
4351         { .fniv = gen_sqadd_vec,
4352           .fno = gen_helper_gvec_sqadd_b,
4353           .opt_opc = vecop_list,
4354           .write_aofs = true,
4355           .vece = MO_8 },
4356         { .fniv = gen_sqadd_vec,
4357           .fno = gen_helper_gvec_sqadd_h,
4358           .opt_opc = vecop_list,
4359           .write_aofs = true,
4360           .vece = MO_16 },
4361         { .fniv = gen_sqadd_vec,
4362           .fno = gen_helper_gvec_sqadd_s,
4363           .opt_opc = vecop_list,
4364           .write_aofs = true,
4365           .vece = MO_32 },
4366         { .fniv = gen_sqadd_vec,
4367           .fno = gen_helper_gvec_sqadd_d,
4368           .opt_opc = vecop_list,
4369           .write_aofs = true,
4370           .vece = MO_64 },
4371     };
4372     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4373                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4374 }
4375 
4376 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4377                           TCGv_vec a, TCGv_vec b)
4378 {
4379     TCGv_vec x = tcg_temp_new_vec_matching(t);
4380     tcg_gen_sub_vec(vece, x, a, b);
4381     tcg_gen_ussub_vec(vece, t, a, b);
4382     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4383     tcg_gen_or_vec(vece, sat, sat, x);
4384     tcg_temp_free_vec(x);
4385 }
4386 
4387 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4388                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4389 {
4390     static const TCGOpcode vecop_list[] = {
4391         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4392     };
4393     static const GVecGen4 ops[4] = {
4394         { .fniv = gen_uqsub_vec,
4395           .fno = gen_helper_gvec_uqsub_b,
4396           .opt_opc = vecop_list,
4397           .write_aofs = true,
4398           .vece = MO_8 },
4399         { .fniv = gen_uqsub_vec,
4400           .fno = gen_helper_gvec_uqsub_h,
4401           .opt_opc = vecop_list,
4402           .write_aofs = true,
4403           .vece = MO_16 },
4404         { .fniv = gen_uqsub_vec,
4405           .fno = gen_helper_gvec_uqsub_s,
4406           .opt_opc = vecop_list,
4407           .write_aofs = true,
4408           .vece = MO_32 },
4409         { .fniv = gen_uqsub_vec,
4410           .fno = gen_helper_gvec_uqsub_d,
4411           .opt_opc = vecop_list,
4412           .write_aofs = true,
4413           .vece = MO_64 },
4414     };
4415     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4416                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4417 }
4418 
4419 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4420                           TCGv_vec a, TCGv_vec b)
4421 {
4422     TCGv_vec x = tcg_temp_new_vec_matching(t);
4423     tcg_gen_sub_vec(vece, x, a, b);
4424     tcg_gen_sssub_vec(vece, t, a, b);
4425     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4426     tcg_gen_or_vec(vece, sat, sat, x);
4427     tcg_temp_free_vec(x);
4428 }
4429 
4430 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4431                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4432 {
4433     static const TCGOpcode vecop_list[] = {
4434         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4435     };
4436     static const GVecGen4 ops[4] = {
4437         { .fniv = gen_sqsub_vec,
4438           .fno = gen_helper_gvec_sqsub_b,
4439           .opt_opc = vecop_list,
4440           .write_aofs = true,
4441           .vece = MO_8 },
4442         { .fniv = gen_sqsub_vec,
4443           .fno = gen_helper_gvec_sqsub_h,
4444           .opt_opc = vecop_list,
4445           .write_aofs = true,
4446           .vece = MO_16 },
4447         { .fniv = gen_sqsub_vec,
4448           .fno = gen_helper_gvec_sqsub_s,
4449           .opt_opc = vecop_list,
4450           .write_aofs = true,
4451           .vece = MO_32 },
4452         { .fniv = gen_sqsub_vec,
4453           .fno = gen_helper_gvec_sqsub_d,
4454           .opt_opc = vecop_list,
4455           .write_aofs = true,
4456           .vece = MO_64 },
4457     };
4458     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4459                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4460 }
4461 
4462 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4463 {
4464     TCGv_i32 t = tcg_temp_new_i32();
4465 
4466     tcg_gen_sub_i32(t, a, b);
4467     tcg_gen_sub_i32(d, b, a);
4468     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4469     tcg_temp_free_i32(t);
4470 }
4471 
4472 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4473 {
4474     TCGv_i64 t = tcg_temp_new_i64();
4475 
4476     tcg_gen_sub_i64(t, a, b);
4477     tcg_gen_sub_i64(d, b, a);
4478     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4479     tcg_temp_free_i64(t);
4480 }
4481 
4482 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4483 {
4484     TCGv_vec t = tcg_temp_new_vec_matching(d);
4485 
4486     tcg_gen_smin_vec(vece, t, a, b);
4487     tcg_gen_smax_vec(vece, d, a, b);
4488     tcg_gen_sub_vec(vece, d, d, t);
4489     tcg_temp_free_vec(t);
4490 }
4491 
4492 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4493                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4494 {
4495     static const TCGOpcode vecop_list[] = {
4496         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4497     };
4498     static const GVecGen3 ops[4] = {
4499         { .fniv = gen_sabd_vec,
4500           .fno = gen_helper_gvec_sabd_b,
4501           .opt_opc = vecop_list,
4502           .vece = MO_8 },
4503         { .fniv = gen_sabd_vec,
4504           .fno = gen_helper_gvec_sabd_h,
4505           .opt_opc = vecop_list,
4506           .vece = MO_16 },
4507         { .fni4 = gen_sabd_i32,
4508           .fniv = gen_sabd_vec,
4509           .fno = gen_helper_gvec_sabd_s,
4510           .opt_opc = vecop_list,
4511           .vece = MO_32 },
4512         { .fni8 = gen_sabd_i64,
4513           .fniv = gen_sabd_vec,
4514           .fno = gen_helper_gvec_sabd_d,
4515           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4516           .opt_opc = vecop_list,
4517           .vece = MO_64 },
4518     };
4519     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4520 }
4521 
4522 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4523 {
4524     TCGv_i32 t = tcg_temp_new_i32();
4525 
4526     tcg_gen_sub_i32(t, a, b);
4527     tcg_gen_sub_i32(d, b, a);
4528     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4529     tcg_temp_free_i32(t);
4530 }
4531 
4532 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4533 {
4534     TCGv_i64 t = tcg_temp_new_i64();
4535 
4536     tcg_gen_sub_i64(t, a, b);
4537     tcg_gen_sub_i64(d, b, a);
4538     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4539     tcg_temp_free_i64(t);
4540 }
4541 
4542 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4543 {
4544     TCGv_vec t = tcg_temp_new_vec_matching(d);
4545 
4546     tcg_gen_umin_vec(vece, t, a, b);
4547     tcg_gen_umax_vec(vece, d, a, b);
4548     tcg_gen_sub_vec(vece, d, d, t);
4549     tcg_temp_free_vec(t);
4550 }
4551 
4552 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4553                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4554 {
4555     static const TCGOpcode vecop_list[] = {
4556         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4557     };
4558     static const GVecGen3 ops[4] = {
4559         { .fniv = gen_uabd_vec,
4560           .fno = gen_helper_gvec_uabd_b,
4561           .opt_opc = vecop_list,
4562           .vece = MO_8 },
4563         { .fniv = gen_uabd_vec,
4564           .fno = gen_helper_gvec_uabd_h,
4565           .opt_opc = vecop_list,
4566           .vece = MO_16 },
4567         { .fni4 = gen_uabd_i32,
4568           .fniv = gen_uabd_vec,
4569           .fno = gen_helper_gvec_uabd_s,
4570           .opt_opc = vecop_list,
4571           .vece = MO_32 },
4572         { .fni8 = gen_uabd_i64,
4573           .fniv = gen_uabd_vec,
4574           .fno = gen_helper_gvec_uabd_d,
4575           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4576           .opt_opc = vecop_list,
4577           .vece = MO_64 },
4578     };
4579     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4580 }
4581 
4582 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4583 {
4584     TCGv_i32 t = tcg_temp_new_i32();
4585     gen_sabd_i32(t, a, b);
4586     tcg_gen_add_i32(d, d, t);
4587     tcg_temp_free_i32(t);
4588 }
4589 
4590 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4591 {
4592     TCGv_i64 t = tcg_temp_new_i64();
4593     gen_sabd_i64(t, a, b);
4594     tcg_gen_add_i64(d, d, t);
4595     tcg_temp_free_i64(t);
4596 }
4597 
4598 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4599 {
4600     TCGv_vec t = tcg_temp_new_vec_matching(d);
4601     gen_sabd_vec(vece, t, a, b);
4602     tcg_gen_add_vec(vece, d, d, t);
4603     tcg_temp_free_vec(t);
4604 }
4605 
4606 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4607                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4608 {
4609     static const TCGOpcode vecop_list[] = {
4610         INDEX_op_sub_vec, INDEX_op_add_vec,
4611         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4612     };
4613     static const GVecGen3 ops[4] = {
4614         { .fniv = gen_saba_vec,
4615           .fno = gen_helper_gvec_saba_b,
4616           .opt_opc = vecop_list,
4617           .load_dest = true,
4618           .vece = MO_8 },
4619         { .fniv = gen_saba_vec,
4620           .fno = gen_helper_gvec_saba_h,
4621           .opt_opc = vecop_list,
4622           .load_dest = true,
4623           .vece = MO_16 },
4624         { .fni4 = gen_saba_i32,
4625           .fniv = gen_saba_vec,
4626           .fno = gen_helper_gvec_saba_s,
4627           .opt_opc = vecop_list,
4628           .load_dest = true,
4629           .vece = MO_32 },
4630         { .fni8 = gen_saba_i64,
4631           .fniv = gen_saba_vec,
4632           .fno = gen_helper_gvec_saba_d,
4633           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4634           .opt_opc = vecop_list,
4635           .load_dest = true,
4636           .vece = MO_64 },
4637     };
4638     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4639 }
4640 
4641 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4642 {
4643     TCGv_i32 t = tcg_temp_new_i32();
4644     gen_uabd_i32(t, a, b);
4645     tcg_gen_add_i32(d, d, t);
4646     tcg_temp_free_i32(t);
4647 }
4648 
4649 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4650 {
4651     TCGv_i64 t = tcg_temp_new_i64();
4652     gen_uabd_i64(t, a, b);
4653     tcg_gen_add_i64(d, d, t);
4654     tcg_temp_free_i64(t);
4655 }
4656 
4657 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4658 {
4659     TCGv_vec t = tcg_temp_new_vec_matching(d);
4660     gen_uabd_vec(vece, t, a, b);
4661     tcg_gen_add_vec(vece, d, d, t);
4662     tcg_temp_free_vec(t);
4663 }
4664 
4665 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4666                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4667 {
4668     static const TCGOpcode vecop_list[] = {
4669         INDEX_op_sub_vec, INDEX_op_add_vec,
4670         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4671     };
4672     static const GVecGen3 ops[4] = {
4673         { .fniv = gen_uaba_vec,
4674           .fno = gen_helper_gvec_uaba_b,
4675           .opt_opc = vecop_list,
4676           .load_dest = true,
4677           .vece = MO_8 },
4678         { .fniv = gen_uaba_vec,
4679           .fno = gen_helper_gvec_uaba_h,
4680           .opt_opc = vecop_list,
4681           .load_dest = true,
4682           .vece = MO_16 },
4683         { .fni4 = gen_uaba_i32,
4684           .fniv = gen_uaba_vec,
4685           .fno = gen_helper_gvec_uaba_s,
4686           .opt_opc = vecop_list,
4687           .load_dest = true,
4688           .vece = MO_32 },
4689         { .fni8 = gen_uaba_i64,
4690           .fniv = gen_uaba_vec,
4691           .fno = gen_helper_gvec_uaba_d,
4692           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4693           .opt_opc = vecop_list,
4694           .load_dest = true,
4695           .vece = MO_64 },
4696     };
4697     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4698 }
4699 
4700 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4701                            int opc1, int crn, int crm, int opc2,
4702                            bool isread, int rt, int rt2)
4703 {
4704     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4705     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4706     TCGv_ptr tcg_ri = NULL;
4707     bool need_exit_tb;
4708     uint32_t syndrome;
4709 
4710     /*
4711      * Note that since we are an implementation which takes an
4712      * exception on a trapped conditional instruction only if the
4713      * instruction passes its condition code check, we can take
4714      * advantage of the clause in the ARM ARM that allows us to set
4715      * the COND field in the instruction to 0xE in all cases.
4716      * We could fish the actual condition out of the insn (ARM)
4717      * or the condexec bits (Thumb) but it isn't necessary.
4718      */
4719     switch (cpnum) {
4720     case 14:
4721         if (is64) {
4722             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4723                                          isread, false);
4724         } else {
4725             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4726                                         rt, isread, false);
4727         }
4728         break;
4729     case 15:
4730         if (is64) {
4731             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4732                                          isread, false);
4733         } else {
4734             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4735                                         rt, isread, false);
4736         }
4737         break;
4738     default:
4739         /*
4740          * ARMv8 defines that only coprocessors 14 and 15 exist,
4741          * so this can only happen if this is an ARMv7 or earlier CPU,
4742          * in which case the syndrome information won't actually be
4743          * guest visible.
4744          */
4745         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4746         syndrome = syn_uncategorized();
4747         break;
4748     }
4749 
4750     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4751         /*
4752          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4753          * over the UNDEF for "no such register" or the UNDEF for "access
4754          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4755          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4756          * access_check_cp_reg(), after the checks for whether the access
4757          * configurably trapped to EL1.
4758          */
4759         uint32_t maskbit = is64 ? crm : crn;
4760 
4761         if (maskbit != 4 && maskbit != 14) {
4762             /* T4 and T14 are RES0 so never cause traps */
4763             TCGv_i32 t;
4764             DisasLabel over = gen_disas_label(s);
4765 
4766             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4767             tcg_gen_andi_i32(t, t, 1u << maskbit);
4768             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4769             tcg_temp_free_i32(t);
4770 
4771             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4772             set_disas_label(s, over);
4773         }
4774     }
4775 
4776     if (!ri) {
4777         /*
4778          * Unknown register; this might be a guest error or a QEMU
4779          * unimplemented feature.
4780          */
4781         if (is64) {
4782             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4783                           "64 bit system register cp:%d opc1: %d crm:%d "
4784                           "(%s)\n",
4785                           isread ? "read" : "write", cpnum, opc1, crm,
4786                           s->ns ? "non-secure" : "secure");
4787         } else {
4788             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4789                           "system register cp:%d opc1:%d crn:%d crm:%d "
4790                           "opc2:%d (%s)\n",
4791                           isread ? "read" : "write", cpnum, opc1, crn,
4792                           crm, opc2, s->ns ? "non-secure" : "secure");
4793         }
4794         unallocated_encoding(s);
4795         return;
4796     }
4797 
4798     /* Check access permissions */
4799     if (!cp_access_ok(s->current_el, ri, isread)) {
4800         unallocated_encoding(s);
4801         return;
4802     }
4803 
4804     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4805         (ri->fgt && s->fgt_active) ||
4806         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4807         /*
4808          * Emit code to perform further access permissions checks at
4809          * runtime; this may result in an exception.
4810          * Note that on XScale all cp0..c13 registers do an access check
4811          * call in order to handle c15_cpar.
4812          */
4813         gen_set_condexec(s);
4814         gen_update_pc(s, 0);
4815         tcg_ri = tcg_temp_new_ptr();
4816         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4817                                        tcg_constant_i32(key),
4818                                        tcg_constant_i32(syndrome),
4819                                        tcg_constant_i32(isread));
4820     } else if (ri->type & ARM_CP_RAISES_EXC) {
4821         /*
4822          * The readfn or writefn might raise an exception;
4823          * synchronize the CPU state in case it does.
4824          */
4825         gen_set_condexec(s);
4826         gen_update_pc(s, 0);
4827     }
4828 
4829     /* Handle special cases first */
4830     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4831     case 0:
4832         break;
4833     case ARM_CP_NOP:
4834         goto exit;
4835     case ARM_CP_WFI:
4836         if (isread) {
4837             unallocated_encoding(s);
4838         } else {
4839             gen_update_pc(s, curr_insn_len(s));
4840             s->base.is_jmp = DISAS_WFI;
4841         }
4842         goto exit;
4843     default:
4844         g_assert_not_reached();
4845     }
4846 
4847     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4848         gen_io_start();
4849     }
4850 
4851     if (isread) {
4852         /* Read */
4853         if (is64) {
4854             TCGv_i64 tmp64;
4855             TCGv_i32 tmp;
4856             if (ri->type & ARM_CP_CONST) {
4857                 tmp64 = tcg_constant_i64(ri->resetvalue);
4858             } else if (ri->readfn) {
4859                 if (!tcg_ri) {
4860                     tcg_ri = gen_lookup_cp_reg(key);
4861                 }
4862                 tmp64 = tcg_temp_new_i64();
4863                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4864             } else {
4865                 tmp64 = tcg_temp_new_i64();
4866                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4867             }
4868             tmp = tcg_temp_new_i32();
4869             tcg_gen_extrl_i64_i32(tmp, tmp64);
4870             store_reg(s, rt, tmp);
4871             tmp = tcg_temp_new_i32();
4872             tcg_gen_extrh_i64_i32(tmp, tmp64);
4873             tcg_temp_free_i64(tmp64);
4874             store_reg(s, rt2, tmp);
4875         } else {
4876             TCGv_i32 tmp;
4877             if (ri->type & ARM_CP_CONST) {
4878                 tmp = tcg_constant_i32(ri->resetvalue);
4879             } else if (ri->readfn) {
4880                 if (!tcg_ri) {
4881                     tcg_ri = gen_lookup_cp_reg(key);
4882                 }
4883                 tmp = tcg_temp_new_i32();
4884                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4885             } else {
4886                 tmp = load_cpu_offset(ri->fieldoffset);
4887             }
4888             if (rt == 15) {
4889                 /* Destination register of r15 for 32 bit loads sets
4890                  * the condition codes from the high 4 bits of the value
4891                  */
4892                 gen_set_nzcv(tmp);
4893                 tcg_temp_free_i32(tmp);
4894             } else {
4895                 store_reg(s, rt, tmp);
4896             }
4897         }
4898     } else {
4899         /* Write */
4900         if (ri->type & ARM_CP_CONST) {
4901             /* If not forbidden by access permissions, treat as WI */
4902             goto exit;
4903         }
4904 
4905         if (is64) {
4906             TCGv_i32 tmplo, tmphi;
4907             TCGv_i64 tmp64 = tcg_temp_new_i64();
4908             tmplo = load_reg(s, rt);
4909             tmphi = load_reg(s, rt2);
4910             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4911             tcg_temp_free_i32(tmplo);
4912             tcg_temp_free_i32(tmphi);
4913             if (ri->writefn) {
4914                 if (!tcg_ri) {
4915                     tcg_ri = gen_lookup_cp_reg(key);
4916                 }
4917                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4918             } else {
4919                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4920             }
4921             tcg_temp_free_i64(tmp64);
4922         } else {
4923             TCGv_i32 tmp = load_reg(s, rt);
4924             if (ri->writefn) {
4925                 if (!tcg_ri) {
4926                     tcg_ri = gen_lookup_cp_reg(key);
4927                 }
4928                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4929                 tcg_temp_free_i32(tmp);
4930             } else {
4931                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4932             }
4933         }
4934     }
4935 
4936     /* I/O operations must end the TB here (whether read or write) */
4937     need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4938                     (ri->type & ARM_CP_IO));
4939 
4940     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4941         /*
4942          * A write to any coprocessor register that ends a TB
4943          * must rebuild the hflags for the next TB.
4944          */
4945         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4946         /*
4947          * We default to ending the TB on a coprocessor register write,
4948          * but allow this to be suppressed by the register definition
4949          * (usually only necessary to work around guest bugs).
4950          */
4951         need_exit_tb = true;
4952     }
4953     if (need_exit_tb) {
4954         gen_lookup_tb(s);
4955     }
4956 
4957  exit:
4958     if (tcg_ri) {
4959         tcg_temp_free_ptr(tcg_ri);
4960     }
4961 }
4962 
4963 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4964 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4965 {
4966     int cpnum = (insn >> 8) & 0xf;
4967 
4968     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4969         unallocated_encoding(s);
4970     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4971         if (disas_iwmmxt_insn(s, insn)) {
4972             unallocated_encoding(s);
4973         }
4974     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4975         if (disas_dsp_insn(s, insn)) {
4976             unallocated_encoding(s);
4977         }
4978     }
4979 }
4980 
4981 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4982 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4983 {
4984     TCGv_i32 tmp;
4985     tmp = tcg_temp_new_i32();
4986     tcg_gen_extrl_i64_i32(tmp, val);
4987     store_reg(s, rlow, tmp);
4988     tmp = tcg_temp_new_i32();
4989     tcg_gen_extrh_i64_i32(tmp, val);
4990     store_reg(s, rhigh, tmp);
4991 }
4992 
4993 /* load and add a 64-bit value from a register pair.  */
4994 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4995 {
4996     TCGv_i64 tmp;
4997     TCGv_i32 tmpl;
4998     TCGv_i32 tmph;
4999 
5000     /* Load 64-bit value rd:rn.  */
5001     tmpl = load_reg(s, rlow);
5002     tmph = load_reg(s, rhigh);
5003     tmp = tcg_temp_new_i64();
5004     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5005     tcg_temp_free_i32(tmpl);
5006     tcg_temp_free_i32(tmph);
5007     tcg_gen_add_i64(val, val, tmp);
5008     tcg_temp_free_i64(tmp);
5009 }
5010 
5011 /* Set N and Z flags from hi|lo.  */
5012 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
5013 {
5014     tcg_gen_mov_i32(cpu_NF, hi);
5015     tcg_gen_or_i32(cpu_ZF, lo, hi);
5016 }
5017 
5018 /* Load/Store exclusive instructions are implemented by remembering
5019    the value/address loaded, and seeing if these are the same
5020    when the store is performed.  This should be sufficient to implement
5021    the architecturally mandated semantics, and avoids having to monitor
5022    regular stores.  The compare vs the remembered value is done during
5023    the cmpxchg operation, but we must compare the addresses manually.  */
5024 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
5025                                TCGv_i32 addr, int size)
5026 {
5027     TCGv_i32 tmp = tcg_temp_new_i32();
5028     MemOp opc = size | MO_ALIGN | s->be_data;
5029 
5030     s->is_ldex = true;
5031 
5032     if (size == 3) {
5033         TCGv_i32 tmp2 = tcg_temp_new_i32();
5034         TCGv_i64 t64 = tcg_temp_new_i64();
5035 
5036         /*
5037          * For AArch32, architecturally the 32-bit word at the lowest
5038          * address is always Rt and the one at addr+4 is Rt2, even if
5039          * the CPU is big-endian. That means we don't want to do a
5040          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
5041          * architecturally 64-bit access, but instead do a 64-bit access
5042          * using MO_BE if appropriate and then split the two halves.
5043          */
5044         TCGv taddr = gen_aa32_addr(s, addr, opc);
5045 
5046         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
5047         tcg_temp_free(taddr);
5048         tcg_gen_mov_i64(cpu_exclusive_val, t64);
5049         if (s->be_data == MO_BE) {
5050             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5051         } else {
5052             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5053         }
5054         tcg_temp_free_i64(t64);
5055 
5056         store_reg(s, rt2, tmp2);
5057     } else {
5058         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5059         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5060     }
5061 
5062     store_reg(s, rt, tmp);
5063     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5064 }
5065 
5066 static void gen_clrex(DisasContext *s)
5067 {
5068     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5069 }
5070 
5071 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5072                                 TCGv_i32 addr, int size)
5073 {
5074     TCGv_i32 t0, t1, t2;
5075     TCGv_i64 extaddr;
5076     TCGv taddr;
5077     TCGLabel *done_label;
5078     TCGLabel *fail_label;
5079     MemOp opc = size | MO_ALIGN | s->be_data;
5080 
5081     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5082          [addr] = {Rt};
5083          {Rd} = 0;
5084        } else {
5085          {Rd} = 1;
5086        } */
5087     fail_label = gen_new_label();
5088     done_label = gen_new_label();
5089     extaddr = tcg_temp_new_i64();
5090     tcg_gen_extu_i32_i64(extaddr, addr);
5091     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5092     tcg_temp_free_i64(extaddr);
5093 
5094     taddr = gen_aa32_addr(s, addr, opc);
5095     t0 = tcg_temp_new_i32();
5096     t1 = load_reg(s, rt);
5097     if (size == 3) {
5098         TCGv_i64 o64 = tcg_temp_new_i64();
5099         TCGv_i64 n64 = tcg_temp_new_i64();
5100 
5101         t2 = load_reg(s, rt2);
5102 
5103         /*
5104          * For AArch32, architecturally the 32-bit word at the lowest
5105          * address is always Rt and the one at addr+4 is Rt2, even if
5106          * the CPU is big-endian. Since we're going to treat this as a
5107          * single 64-bit BE store, we need to put the two halves in the
5108          * opposite order for BE to LE, so that they end up in the right
5109          * places.  We don't want gen_aa32_st_i64, because that checks
5110          * SCTLR_B as if for an architectural 64-bit access.
5111          */
5112         if (s->be_data == MO_BE) {
5113             tcg_gen_concat_i32_i64(n64, t2, t1);
5114         } else {
5115             tcg_gen_concat_i32_i64(n64, t1, t2);
5116         }
5117         tcg_temp_free_i32(t2);
5118 
5119         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5120                                    get_mem_index(s), opc);
5121         tcg_temp_free_i64(n64);
5122 
5123         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5124         tcg_gen_extrl_i64_i32(t0, o64);
5125 
5126         tcg_temp_free_i64(o64);
5127     } else {
5128         t2 = tcg_temp_new_i32();
5129         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5130         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5131         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5132         tcg_temp_free_i32(t2);
5133     }
5134     tcg_temp_free_i32(t1);
5135     tcg_temp_free(taddr);
5136     tcg_gen_mov_i32(cpu_R[rd], t0);
5137     tcg_temp_free_i32(t0);
5138     tcg_gen_br(done_label);
5139 
5140     gen_set_label(fail_label);
5141     tcg_gen_movi_i32(cpu_R[rd], 1);
5142     gen_set_label(done_label);
5143     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5144 }
5145 
5146 /* gen_srs:
5147  * @env: CPUARMState
5148  * @s: DisasContext
5149  * @mode: mode field from insn (which stack to store to)
5150  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5151  * @writeback: true if writeback bit set
5152  *
5153  * Generate code for the SRS (Store Return State) insn.
5154  */
5155 static void gen_srs(DisasContext *s,
5156                     uint32_t mode, uint32_t amode, bool writeback)
5157 {
5158     int32_t offset;
5159     TCGv_i32 addr, tmp;
5160     bool undef = false;
5161 
5162     /* SRS is:
5163      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5164      *   and specified mode is monitor mode
5165      * - UNDEFINED in Hyp mode
5166      * - UNPREDICTABLE in User or System mode
5167      * - UNPREDICTABLE if the specified mode is:
5168      * -- not implemented
5169      * -- not a valid mode number
5170      * -- a mode that's at a higher exception level
5171      * -- Monitor, if we are Non-secure
5172      * For the UNPREDICTABLE cases we choose to UNDEF.
5173      */
5174     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5175         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5176         return;
5177     }
5178 
5179     if (s->current_el == 0 || s->current_el == 2) {
5180         undef = true;
5181     }
5182 
5183     switch (mode) {
5184     case ARM_CPU_MODE_USR:
5185     case ARM_CPU_MODE_FIQ:
5186     case ARM_CPU_MODE_IRQ:
5187     case ARM_CPU_MODE_SVC:
5188     case ARM_CPU_MODE_ABT:
5189     case ARM_CPU_MODE_UND:
5190     case ARM_CPU_MODE_SYS:
5191         break;
5192     case ARM_CPU_MODE_HYP:
5193         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5194             undef = true;
5195         }
5196         break;
5197     case ARM_CPU_MODE_MON:
5198         /* No need to check specifically for "are we non-secure" because
5199          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5200          * so if this isn't EL3 then we must be non-secure.
5201          */
5202         if (s->current_el != 3) {
5203             undef = true;
5204         }
5205         break;
5206     default:
5207         undef = true;
5208     }
5209 
5210     if (undef) {
5211         unallocated_encoding(s);
5212         return;
5213     }
5214 
5215     addr = tcg_temp_new_i32();
5216     /* get_r13_banked() will raise an exception if called from System mode */
5217     gen_set_condexec(s);
5218     gen_update_pc(s, 0);
5219     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5220     switch (amode) {
5221     case 0: /* DA */
5222         offset = -4;
5223         break;
5224     case 1: /* IA */
5225         offset = 0;
5226         break;
5227     case 2: /* DB */
5228         offset = -8;
5229         break;
5230     case 3: /* IB */
5231         offset = 4;
5232         break;
5233     default:
5234         g_assert_not_reached();
5235     }
5236     tcg_gen_addi_i32(addr, addr, offset);
5237     tmp = load_reg(s, 14);
5238     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5239     tcg_temp_free_i32(tmp);
5240     tmp = load_cpu_field(spsr);
5241     tcg_gen_addi_i32(addr, addr, 4);
5242     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5243     tcg_temp_free_i32(tmp);
5244     if (writeback) {
5245         switch (amode) {
5246         case 0:
5247             offset = -8;
5248             break;
5249         case 1:
5250             offset = 4;
5251             break;
5252         case 2:
5253             offset = -4;
5254             break;
5255         case 3:
5256             offset = 0;
5257             break;
5258         default:
5259             g_assert_not_reached();
5260         }
5261         tcg_gen_addi_i32(addr, addr, offset);
5262         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5263     }
5264     tcg_temp_free_i32(addr);
5265     s->base.is_jmp = DISAS_UPDATE_EXIT;
5266 }
5267 
5268 /* Skip this instruction if the ARM condition is false */
5269 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5270 {
5271     arm_gen_condlabel(s);
5272     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5273 }
5274 
5275 
5276 /*
5277  * Constant expanders used by T16/T32 decode
5278  */
5279 
5280 /* Return only the rotation part of T32ExpandImm.  */
5281 static int t32_expandimm_rot(DisasContext *s, int x)
5282 {
5283     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5284 }
5285 
5286 /* Return the unrotated immediate from T32ExpandImm.  */
5287 static int t32_expandimm_imm(DisasContext *s, int x)
5288 {
5289     int imm = extract32(x, 0, 8);
5290 
5291     switch (extract32(x, 8, 4)) {
5292     case 0: /* XY */
5293         /* Nothing to do.  */
5294         break;
5295     case 1: /* 00XY00XY */
5296         imm *= 0x00010001;
5297         break;
5298     case 2: /* XY00XY00 */
5299         imm *= 0x01000100;
5300         break;
5301     case 3: /* XYXYXYXY */
5302         imm *= 0x01010101;
5303         break;
5304     default:
5305         /* Rotated constant.  */
5306         imm |= 0x80;
5307         break;
5308     }
5309     return imm;
5310 }
5311 
5312 static int t32_branch24(DisasContext *s, int x)
5313 {
5314     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5315     x ^= !(x < 0) * (3 << 21);
5316     /* Append the final zero.  */
5317     return x << 1;
5318 }
5319 
5320 static int t16_setflags(DisasContext *s)
5321 {
5322     return s->condexec_mask == 0;
5323 }
5324 
5325 static int t16_push_list(DisasContext *s, int x)
5326 {
5327     return (x & 0xff) | (x & 0x100) << (14 - 8);
5328 }
5329 
5330 static int t16_pop_list(DisasContext *s, int x)
5331 {
5332     return (x & 0xff) | (x & 0x100) << (15 - 8);
5333 }
5334 
5335 /*
5336  * Include the generated decoders.
5337  */
5338 
5339 #include "decode-a32.c.inc"
5340 #include "decode-a32-uncond.c.inc"
5341 #include "decode-t32.c.inc"
5342 #include "decode-t16.c.inc"
5343 
5344 static bool valid_cp(DisasContext *s, int cp)
5345 {
5346     /*
5347      * Return true if this coprocessor field indicates something
5348      * that's really a possible coprocessor.
5349      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5350      * and of those only cp14 and cp15 were used for registers.
5351      * cp10 and cp11 were used for VFP and Neon, whose decode is
5352      * dealt with elsewhere. With the advent of fp16, cp9 is also
5353      * now part of VFP.
5354      * For v8A and later, the encoding has been tightened so that
5355      * only cp14 and cp15 are valid, and other values aren't considered
5356      * to be in the coprocessor-instruction space at all. v8M still
5357      * permits coprocessors 0..7.
5358      * For XScale, we must not decode the XScale cp0, cp1 space as
5359      * a standard coprocessor insn, because we want to fall through to
5360      * the legacy disas_xscale_insn() decoder after decodetree is done.
5361      */
5362     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5363         return false;
5364     }
5365 
5366     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5367         !arm_dc_feature(s, ARM_FEATURE_M)) {
5368         return cp >= 14;
5369     }
5370     return cp < 8 || cp >= 14;
5371 }
5372 
5373 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5374 {
5375     if (!valid_cp(s, a->cp)) {
5376         return false;
5377     }
5378     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5379                    false, a->rt, 0);
5380     return true;
5381 }
5382 
5383 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5384 {
5385     if (!valid_cp(s, a->cp)) {
5386         return false;
5387     }
5388     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5389                    true, a->rt, 0);
5390     return true;
5391 }
5392 
5393 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5394 {
5395     if (!valid_cp(s, a->cp)) {
5396         return false;
5397     }
5398     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5399                    false, a->rt, a->rt2);
5400     return true;
5401 }
5402 
5403 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5404 {
5405     if (!valid_cp(s, a->cp)) {
5406         return false;
5407     }
5408     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5409                    true, a->rt, a->rt2);
5410     return true;
5411 }
5412 
5413 /* Helpers to swap operands for reverse-subtract.  */
5414 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5415 {
5416     tcg_gen_sub_i32(dst, b, a);
5417 }
5418 
5419 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5420 {
5421     gen_sub_CC(dst, b, a);
5422 }
5423 
5424 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5425 {
5426     gen_sub_carry(dest, b, a);
5427 }
5428 
5429 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5430 {
5431     gen_sbc_CC(dest, b, a);
5432 }
5433 
5434 /*
5435  * Helpers for the data processing routines.
5436  *
5437  * After the computation store the results back.
5438  * This may be suppressed altogether (STREG_NONE), require a runtime
5439  * check against the stack limits (STREG_SP_CHECK), or generate an
5440  * exception return.  Oh, or store into a register.
5441  *
5442  * Always return true, indicating success for a trans_* function.
5443  */
5444 typedef enum {
5445    STREG_NONE,
5446    STREG_NORMAL,
5447    STREG_SP_CHECK,
5448    STREG_EXC_RET,
5449 } StoreRegKind;
5450 
5451 static bool store_reg_kind(DisasContext *s, int rd,
5452                             TCGv_i32 val, StoreRegKind kind)
5453 {
5454     switch (kind) {
5455     case STREG_NONE:
5456         tcg_temp_free_i32(val);
5457         return true;
5458     case STREG_NORMAL:
5459         /* See ALUWritePC: Interworking only from a32 mode. */
5460         if (s->thumb) {
5461             store_reg(s, rd, val);
5462         } else {
5463             store_reg_bx(s, rd, val);
5464         }
5465         return true;
5466     case STREG_SP_CHECK:
5467         store_sp_checked(s, val);
5468         return true;
5469     case STREG_EXC_RET:
5470         gen_exception_return(s, val);
5471         return true;
5472     }
5473     g_assert_not_reached();
5474 }
5475 
5476 /*
5477  * Data Processing (register)
5478  *
5479  * Operate, with set flags, one register source,
5480  * one immediate shifted register source, and a destination.
5481  */
5482 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5483                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5484                          int logic_cc, StoreRegKind kind)
5485 {
5486     TCGv_i32 tmp1, tmp2;
5487 
5488     tmp2 = load_reg(s, a->rm);
5489     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5490     tmp1 = load_reg(s, a->rn);
5491 
5492     gen(tmp1, tmp1, tmp2);
5493     tcg_temp_free_i32(tmp2);
5494 
5495     if (logic_cc) {
5496         gen_logic_CC(tmp1);
5497     }
5498     return store_reg_kind(s, a->rd, tmp1, kind);
5499 }
5500 
5501 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5502                          void (*gen)(TCGv_i32, TCGv_i32),
5503                          int logic_cc, StoreRegKind kind)
5504 {
5505     TCGv_i32 tmp;
5506 
5507     tmp = load_reg(s, a->rm);
5508     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5509 
5510     gen(tmp, tmp);
5511     if (logic_cc) {
5512         gen_logic_CC(tmp);
5513     }
5514     return store_reg_kind(s, a->rd, tmp, kind);
5515 }
5516 
5517 /*
5518  * Data-processing (register-shifted register)
5519  *
5520  * Operate, with set flags, one register source,
5521  * one register shifted register source, and a destination.
5522  */
5523 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5524                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5525                          int logic_cc, StoreRegKind kind)
5526 {
5527     TCGv_i32 tmp1, tmp2;
5528 
5529     tmp1 = load_reg(s, a->rs);
5530     tmp2 = load_reg(s, a->rm);
5531     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5532     tmp1 = load_reg(s, a->rn);
5533 
5534     gen(tmp1, tmp1, tmp2);
5535     tcg_temp_free_i32(tmp2);
5536 
5537     if (logic_cc) {
5538         gen_logic_CC(tmp1);
5539     }
5540     return store_reg_kind(s, a->rd, tmp1, kind);
5541 }
5542 
5543 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5544                          void (*gen)(TCGv_i32, TCGv_i32),
5545                          int logic_cc, StoreRegKind kind)
5546 {
5547     TCGv_i32 tmp1, tmp2;
5548 
5549     tmp1 = load_reg(s, a->rs);
5550     tmp2 = load_reg(s, a->rm);
5551     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5552 
5553     gen(tmp2, tmp2);
5554     if (logic_cc) {
5555         gen_logic_CC(tmp2);
5556     }
5557     return store_reg_kind(s, a->rd, tmp2, kind);
5558 }
5559 
5560 /*
5561  * Data-processing (immediate)
5562  *
5563  * Operate, with set flags, one register source,
5564  * one rotated immediate, and a destination.
5565  *
5566  * Note that logic_cc && a->rot setting CF based on the msb of the
5567  * immediate is the reason why we must pass in the unrotated form
5568  * of the immediate.
5569  */
5570 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5571                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5572                          int logic_cc, StoreRegKind kind)
5573 {
5574     TCGv_i32 tmp1;
5575     uint32_t imm;
5576 
5577     imm = ror32(a->imm, a->rot);
5578     if (logic_cc && a->rot) {
5579         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5580     }
5581     tmp1 = load_reg(s, a->rn);
5582 
5583     gen(tmp1, tmp1, tcg_constant_i32(imm));
5584 
5585     if (logic_cc) {
5586         gen_logic_CC(tmp1);
5587     }
5588     return store_reg_kind(s, a->rd, tmp1, kind);
5589 }
5590 
5591 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5592                          void (*gen)(TCGv_i32, TCGv_i32),
5593                          int logic_cc, StoreRegKind kind)
5594 {
5595     TCGv_i32 tmp;
5596     uint32_t imm;
5597 
5598     imm = ror32(a->imm, a->rot);
5599     if (logic_cc && a->rot) {
5600         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5601     }
5602 
5603     tmp = tcg_temp_new_i32();
5604     gen(tmp, tcg_constant_i32(imm));
5605 
5606     if (logic_cc) {
5607         gen_logic_CC(tmp);
5608     }
5609     return store_reg_kind(s, a->rd, tmp, kind);
5610 }
5611 
5612 #define DO_ANY3(NAME, OP, L, K)                                         \
5613     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5614     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5615     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5616     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5617     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5618     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5619 
5620 #define DO_ANY2(NAME, OP, L, K)                                         \
5621     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5622     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5623     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5624     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5625     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5626     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5627 
5628 #define DO_CMP2(NAME, OP, L)                                            \
5629     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5630     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5631     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5632     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5633     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5634     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5635 
5636 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5637 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5638 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5639 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5640 
5641 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5642 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5643 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5644 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5645 
5646 DO_CMP2(TST, tcg_gen_and_i32, true)
5647 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5648 DO_CMP2(CMN, gen_add_CC, false)
5649 DO_CMP2(CMP, gen_sub_CC, false)
5650 
5651 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5652         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5653 
5654 /*
5655  * Note for the computation of StoreRegKind we return out of the
5656  * middle of the functions that are expanded by DO_ANY3, and that
5657  * we modify a->s via that parameter before it is used by OP.
5658  */
5659 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5660         ({
5661             StoreRegKind ret = STREG_NORMAL;
5662             if (a->rd == 15 && a->s) {
5663                 /*
5664                  * See ALUExceptionReturn:
5665                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5666                  * In Hyp mode, UNDEFINED.
5667                  */
5668                 if (IS_USER(s) || s->current_el == 2) {
5669                     unallocated_encoding(s);
5670                     return true;
5671                 }
5672                 /* There is no writeback of nzcv to PSTATE.  */
5673                 a->s = 0;
5674                 ret = STREG_EXC_RET;
5675             } else if (a->rd == 13 && a->rn == 13) {
5676                 ret = STREG_SP_CHECK;
5677             }
5678             ret;
5679         }))
5680 
5681 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5682         ({
5683             StoreRegKind ret = STREG_NORMAL;
5684             if (a->rd == 15 && a->s) {
5685                 /*
5686                  * See ALUExceptionReturn:
5687                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5688                  * In Hyp mode, UNDEFINED.
5689                  */
5690                 if (IS_USER(s) || s->current_el == 2) {
5691                     unallocated_encoding(s);
5692                     return true;
5693                 }
5694                 /* There is no writeback of nzcv to PSTATE.  */
5695                 a->s = 0;
5696                 ret = STREG_EXC_RET;
5697             } else if (a->rd == 13) {
5698                 ret = STREG_SP_CHECK;
5699             }
5700             ret;
5701         }))
5702 
5703 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5704 
5705 /*
5706  * ORN is only available with T32, so there is no register-shifted-register
5707  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5708  */
5709 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5710 {
5711     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5712 }
5713 
5714 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5715 {
5716     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5717 }
5718 
5719 #undef DO_ANY3
5720 #undef DO_ANY2
5721 #undef DO_CMP2
5722 
5723 static bool trans_ADR(DisasContext *s, arg_ri *a)
5724 {
5725     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5726     return true;
5727 }
5728 
5729 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5730 {
5731     if (!ENABLE_ARCH_6T2) {
5732         return false;
5733     }
5734 
5735     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5736     return true;
5737 }
5738 
5739 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5740 {
5741     TCGv_i32 tmp;
5742 
5743     if (!ENABLE_ARCH_6T2) {
5744         return false;
5745     }
5746 
5747     tmp = load_reg(s, a->rd);
5748     tcg_gen_ext16u_i32(tmp, tmp);
5749     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5750     store_reg(s, a->rd, tmp);
5751     return true;
5752 }
5753 
5754 /*
5755  * v8.1M MVE wide-shifts
5756  */
5757 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5758                           WideShiftImmFn *fn)
5759 {
5760     TCGv_i64 rda;
5761     TCGv_i32 rdalo, rdahi;
5762 
5763     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5764         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5765         return false;
5766     }
5767     if (a->rdahi == 15) {
5768         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5769         return false;
5770     }
5771     if (!dc_isar_feature(aa32_mve, s) ||
5772         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5773         a->rdahi == 13) {
5774         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5775         unallocated_encoding(s);
5776         return true;
5777     }
5778 
5779     if (a->shim == 0) {
5780         a->shim = 32;
5781     }
5782 
5783     rda = tcg_temp_new_i64();
5784     rdalo = load_reg(s, a->rdalo);
5785     rdahi = load_reg(s, a->rdahi);
5786     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5787 
5788     fn(rda, rda, a->shim);
5789 
5790     tcg_gen_extrl_i64_i32(rdalo, rda);
5791     tcg_gen_extrh_i64_i32(rdahi, rda);
5792     store_reg(s, a->rdalo, rdalo);
5793     store_reg(s, a->rdahi, rdahi);
5794     tcg_temp_free_i64(rda);
5795 
5796     return true;
5797 }
5798 
5799 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5800 {
5801     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5802 }
5803 
5804 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5805 {
5806     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5807 }
5808 
5809 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5810 {
5811     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5812 }
5813 
5814 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5815 {
5816     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5817 }
5818 
5819 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5820 {
5821     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5822 }
5823 
5824 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5825 {
5826     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5827 }
5828 
5829 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5830 {
5831     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5832 }
5833 
5834 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5835 {
5836     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5837 }
5838 
5839 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5840 {
5841     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5842 }
5843 
5844 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5845 {
5846     TCGv_i64 rda;
5847     TCGv_i32 rdalo, rdahi;
5848 
5849     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5850         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5851         return false;
5852     }
5853     if (a->rdahi == 15) {
5854         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5855         return false;
5856     }
5857     if (!dc_isar_feature(aa32_mve, s) ||
5858         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5859         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5860         a->rm == a->rdahi || a->rm == a->rdalo) {
5861         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5862         unallocated_encoding(s);
5863         return true;
5864     }
5865 
5866     rda = tcg_temp_new_i64();
5867     rdalo = load_reg(s, a->rdalo);
5868     rdahi = load_reg(s, a->rdahi);
5869     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5870 
5871     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5872     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5873 
5874     tcg_gen_extrl_i64_i32(rdalo, rda);
5875     tcg_gen_extrh_i64_i32(rdahi, rda);
5876     store_reg(s, a->rdalo, rdalo);
5877     store_reg(s, a->rdahi, rdahi);
5878     tcg_temp_free_i64(rda);
5879 
5880     return true;
5881 }
5882 
5883 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5884 {
5885     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5886 }
5887 
5888 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5889 {
5890     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5891 }
5892 
5893 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5894 {
5895     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5896 }
5897 
5898 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5899 {
5900     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5901 }
5902 
5903 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5904 {
5905     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5906 }
5907 
5908 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5909 {
5910     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5911 }
5912 
5913 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5914 {
5915     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5916         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5917         return false;
5918     }
5919     if (!dc_isar_feature(aa32_mve, s) ||
5920         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5921         a->rda == 13 || a->rda == 15) {
5922         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5923         unallocated_encoding(s);
5924         return true;
5925     }
5926 
5927     if (a->shim == 0) {
5928         a->shim = 32;
5929     }
5930     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5931 
5932     return true;
5933 }
5934 
5935 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5936 {
5937     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5938 }
5939 
5940 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5941 {
5942     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5943 }
5944 
5945 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5946 {
5947     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5948 }
5949 
5950 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5951 {
5952     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5953 }
5954 
5955 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5956 {
5957     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5958 }
5959 
5960 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5961 {
5962     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5963 }
5964 
5965 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5966 {
5967     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5968         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5969         return false;
5970     }
5971     if (!dc_isar_feature(aa32_mve, s) ||
5972         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5973         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5974         a->rm == a->rda) {
5975         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5976         unallocated_encoding(s);
5977         return true;
5978     }
5979 
5980     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5981     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5982     return true;
5983 }
5984 
5985 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5986 {
5987     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5988 }
5989 
5990 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5991 {
5992     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5993 }
5994 
5995 /*
5996  * Multiply and multiply accumulate
5997  */
5998 
5999 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
6000 {
6001     TCGv_i32 t1, t2;
6002 
6003     t1 = load_reg(s, a->rn);
6004     t2 = load_reg(s, a->rm);
6005     tcg_gen_mul_i32(t1, t1, t2);
6006     tcg_temp_free_i32(t2);
6007     if (add) {
6008         t2 = load_reg(s, a->ra);
6009         tcg_gen_add_i32(t1, t1, t2);
6010         tcg_temp_free_i32(t2);
6011     }
6012     if (a->s) {
6013         gen_logic_CC(t1);
6014     }
6015     store_reg(s, a->rd, t1);
6016     return true;
6017 }
6018 
6019 static bool trans_MUL(DisasContext *s, arg_MUL *a)
6020 {
6021     return op_mla(s, a, false);
6022 }
6023 
6024 static bool trans_MLA(DisasContext *s, arg_MLA *a)
6025 {
6026     return op_mla(s, a, true);
6027 }
6028 
6029 static bool trans_MLS(DisasContext *s, arg_MLS *a)
6030 {
6031     TCGv_i32 t1, t2;
6032 
6033     if (!ENABLE_ARCH_6T2) {
6034         return false;
6035     }
6036     t1 = load_reg(s, a->rn);
6037     t2 = load_reg(s, a->rm);
6038     tcg_gen_mul_i32(t1, t1, t2);
6039     tcg_temp_free_i32(t2);
6040     t2 = load_reg(s, a->ra);
6041     tcg_gen_sub_i32(t1, t2, t1);
6042     tcg_temp_free_i32(t2);
6043     store_reg(s, a->rd, t1);
6044     return true;
6045 }
6046 
6047 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
6048 {
6049     TCGv_i32 t0, t1, t2, t3;
6050 
6051     t0 = load_reg(s, a->rm);
6052     t1 = load_reg(s, a->rn);
6053     if (uns) {
6054         tcg_gen_mulu2_i32(t0, t1, t0, t1);
6055     } else {
6056         tcg_gen_muls2_i32(t0, t1, t0, t1);
6057     }
6058     if (add) {
6059         t2 = load_reg(s, a->ra);
6060         t3 = load_reg(s, a->rd);
6061         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6062         tcg_temp_free_i32(t2);
6063         tcg_temp_free_i32(t3);
6064     }
6065     if (a->s) {
6066         gen_logicq_cc(t0, t1);
6067     }
6068     store_reg(s, a->ra, t0);
6069     store_reg(s, a->rd, t1);
6070     return true;
6071 }
6072 
6073 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6074 {
6075     return op_mlal(s, a, true, false);
6076 }
6077 
6078 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6079 {
6080     return op_mlal(s, a, false, false);
6081 }
6082 
6083 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6084 {
6085     return op_mlal(s, a, true, true);
6086 }
6087 
6088 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6089 {
6090     return op_mlal(s, a, false, true);
6091 }
6092 
6093 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6094 {
6095     TCGv_i32 t0, t1, t2, zero;
6096 
6097     if (s->thumb
6098         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6099         : !ENABLE_ARCH_6) {
6100         return false;
6101     }
6102 
6103     t0 = load_reg(s, a->rm);
6104     t1 = load_reg(s, a->rn);
6105     tcg_gen_mulu2_i32(t0, t1, t0, t1);
6106     zero = tcg_constant_i32(0);
6107     t2 = load_reg(s, a->ra);
6108     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6109     tcg_temp_free_i32(t2);
6110     t2 = load_reg(s, a->rd);
6111     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6112     tcg_temp_free_i32(t2);
6113     store_reg(s, a->ra, t0);
6114     store_reg(s, a->rd, t1);
6115     return true;
6116 }
6117 
6118 /*
6119  * Saturating addition and subtraction
6120  */
6121 
6122 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6123 {
6124     TCGv_i32 t0, t1;
6125 
6126     if (s->thumb
6127         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6128         : !ENABLE_ARCH_5TE) {
6129         return false;
6130     }
6131 
6132     t0 = load_reg(s, a->rm);
6133     t1 = load_reg(s, a->rn);
6134     if (doub) {
6135         gen_helper_add_saturate(t1, cpu_env, t1, t1);
6136     }
6137     if (add) {
6138         gen_helper_add_saturate(t0, cpu_env, t0, t1);
6139     } else {
6140         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6141     }
6142     tcg_temp_free_i32(t1);
6143     store_reg(s, a->rd, t0);
6144     return true;
6145 }
6146 
6147 #define DO_QADDSUB(NAME, ADD, DOUB) \
6148 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6149 {                                                        \
6150     return op_qaddsub(s, a, ADD, DOUB);                  \
6151 }
6152 
6153 DO_QADDSUB(QADD, true, false)
6154 DO_QADDSUB(QSUB, false, false)
6155 DO_QADDSUB(QDADD, true, true)
6156 DO_QADDSUB(QDSUB, false, true)
6157 
6158 #undef DO_QADDSUB
6159 
6160 /*
6161  * Halfword multiply and multiply accumulate
6162  */
6163 
6164 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6165                        int add_long, bool nt, bool mt)
6166 {
6167     TCGv_i32 t0, t1, tl, th;
6168 
6169     if (s->thumb
6170         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6171         : !ENABLE_ARCH_5TE) {
6172         return false;
6173     }
6174 
6175     t0 = load_reg(s, a->rn);
6176     t1 = load_reg(s, a->rm);
6177     gen_mulxy(t0, t1, nt, mt);
6178     tcg_temp_free_i32(t1);
6179 
6180     switch (add_long) {
6181     case 0:
6182         store_reg(s, a->rd, t0);
6183         break;
6184     case 1:
6185         t1 = load_reg(s, a->ra);
6186         gen_helper_add_setq(t0, cpu_env, t0, t1);
6187         tcg_temp_free_i32(t1);
6188         store_reg(s, a->rd, t0);
6189         break;
6190     case 2:
6191         tl = load_reg(s, a->ra);
6192         th = load_reg(s, a->rd);
6193         /* Sign-extend the 32-bit product to 64 bits.  */
6194         t1 = tcg_temp_new_i32();
6195         tcg_gen_sari_i32(t1, t0, 31);
6196         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6197         tcg_temp_free_i32(t0);
6198         tcg_temp_free_i32(t1);
6199         store_reg(s, a->ra, tl);
6200         store_reg(s, a->rd, th);
6201         break;
6202     default:
6203         g_assert_not_reached();
6204     }
6205     return true;
6206 }
6207 
6208 #define DO_SMLAX(NAME, add, nt, mt) \
6209 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6210 {                                                          \
6211     return op_smlaxxx(s, a, add, nt, mt);                  \
6212 }
6213 
6214 DO_SMLAX(SMULBB, 0, 0, 0)
6215 DO_SMLAX(SMULBT, 0, 0, 1)
6216 DO_SMLAX(SMULTB, 0, 1, 0)
6217 DO_SMLAX(SMULTT, 0, 1, 1)
6218 
6219 DO_SMLAX(SMLABB, 1, 0, 0)
6220 DO_SMLAX(SMLABT, 1, 0, 1)
6221 DO_SMLAX(SMLATB, 1, 1, 0)
6222 DO_SMLAX(SMLATT, 1, 1, 1)
6223 
6224 DO_SMLAX(SMLALBB, 2, 0, 0)
6225 DO_SMLAX(SMLALBT, 2, 0, 1)
6226 DO_SMLAX(SMLALTB, 2, 1, 0)
6227 DO_SMLAX(SMLALTT, 2, 1, 1)
6228 
6229 #undef DO_SMLAX
6230 
6231 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6232 {
6233     TCGv_i32 t0, t1;
6234 
6235     if (!ENABLE_ARCH_5TE) {
6236         return false;
6237     }
6238 
6239     t0 = load_reg(s, a->rn);
6240     t1 = load_reg(s, a->rm);
6241     /*
6242      * Since the nominal result is product<47:16>, shift the 16-bit
6243      * input up by 16 bits, so that the result is at product<63:32>.
6244      */
6245     if (mt) {
6246         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6247     } else {
6248         tcg_gen_shli_i32(t1, t1, 16);
6249     }
6250     tcg_gen_muls2_i32(t0, t1, t0, t1);
6251     tcg_temp_free_i32(t0);
6252     if (add) {
6253         t0 = load_reg(s, a->ra);
6254         gen_helper_add_setq(t1, cpu_env, t1, t0);
6255         tcg_temp_free_i32(t0);
6256     }
6257     store_reg(s, a->rd, t1);
6258     return true;
6259 }
6260 
6261 #define DO_SMLAWX(NAME, add, mt) \
6262 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6263 {                                                          \
6264     return op_smlawx(s, a, add, mt);                       \
6265 }
6266 
6267 DO_SMLAWX(SMULWB, 0, 0)
6268 DO_SMLAWX(SMULWT, 0, 1)
6269 DO_SMLAWX(SMLAWB, 1, 0)
6270 DO_SMLAWX(SMLAWT, 1, 1)
6271 
6272 #undef DO_SMLAWX
6273 
6274 /*
6275  * MSR (immediate) and hints
6276  */
6277 
6278 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6279 {
6280     /*
6281      * When running single-threaded TCG code, use the helper to ensure that
6282      * the next round-robin scheduled vCPU gets a crack.  When running in
6283      * MTTCG we don't generate jumps to the helper as it won't affect the
6284      * scheduling of other vCPUs.
6285      */
6286     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6287         gen_update_pc(s, curr_insn_len(s));
6288         s->base.is_jmp = DISAS_YIELD;
6289     }
6290     return true;
6291 }
6292 
6293 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6294 {
6295     /*
6296      * When running single-threaded TCG code, use the helper to ensure that
6297      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6298      * just skip this instruction.  Currently the SEV/SEVL instructions,
6299      * which are *one* of many ways to wake the CPU from WFE, are not
6300      * implemented so we can't sleep like WFI does.
6301      */
6302     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6303         gen_update_pc(s, curr_insn_len(s));
6304         s->base.is_jmp = DISAS_WFE;
6305     }
6306     return true;
6307 }
6308 
6309 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6310 {
6311     /* For WFI, halt the vCPU until an IRQ. */
6312     gen_update_pc(s, curr_insn_len(s));
6313     s->base.is_jmp = DISAS_WFI;
6314     return true;
6315 }
6316 
6317 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6318 {
6319     /*
6320      * For M-profile, minimal-RAS ESB can be a NOP.
6321      * Without RAS, we must implement this as NOP.
6322      */
6323     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6324         /*
6325          * QEMU does not have a source of physical SErrors,
6326          * so we are only concerned with virtual SErrors.
6327          * The pseudocode in the ARM for this case is
6328          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6329          *      AArch32.vESBOperation();
6330          * Most of the condition can be evaluated at translation time.
6331          * Test for EL2 present, and defer test for SEL2 to runtime.
6332          */
6333         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6334             gen_helper_vesb(cpu_env);
6335         }
6336     }
6337     return true;
6338 }
6339 
6340 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6341 {
6342     return true;
6343 }
6344 
6345 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6346 {
6347     uint32_t val = ror32(a->imm, a->rot * 2);
6348     uint32_t mask = msr_mask(s, a->mask, a->r);
6349 
6350     if (gen_set_psr_im(s, mask, a->r, val)) {
6351         unallocated_encoding(s);
6352     }
6353     return true;
6354 }
6355 
6356 /*
6357  * Cyclic Redundancy Check
6358  */
6359 
6360 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6361 {
6362     TCGv_i32 t1, t2, t3;
6363 
6364     if (!dc_isar_feature(aa32_crc32, s)) {
6365         return false;
6366     }
6367 
6368     t1 = load_reg(s, a->rn);
6369     t2 = load_reg(s, a->rm);
6370     switch (sz) {
6371     case MO_8:
6372         gen_uxtb(t2);
6373         break;
6374     case MO_16:
6375         gen_uxth(t2);
6376         break;
6377     case MO_32:
6378         break;
6379     default:
6380         g_assert_not_reached();
6381     }
6382     t3 = tcg_constant_i32(1 << sz);
6383     if (c) {
6384         gen_helper_crc32c(t1, t1, t2, t3);
6385     } else {
6386         gen_helper_crc32(t1, t1, t2, t3);
6387     }
6388     tcg_temp_free_i32(t2);
6389     store_reg(s, a->rd, t1);
6390     return true;
6391 }
6392 
6393 #define DO_CRC32(NAME, c, sz) \
6394 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6395     { return op_crc32(s, a, c, sz); }
6396 
6397 DO_CRC32(CRC32B, false, MO_8)
6398 DO_CRC32(CRC32H, false, MO_16)
6399 DO_CRC32(CRC32W, false, MO_32)
6400 DO_CRC32(CRC32CB, true, MO_8)
6401 DO_CRC32(CRC32CH, true, MO_16)
6402 DO_CRC32(CRC32CW, true, MO_32)
6403 
6404 #undef DO_CRC32
6405 
6406 /*
6407  * Miscellaneous instructions
6408  */
6409 
6410 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6411 {
6412     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6413         return false;
6414     }
6415     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6416     return true;
6417 }
6418 
6419 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6420 {
6421     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6422         return false;
6423     }
6424     gen_msr_banked(s, a->r, a->sysm, a->rn);
6425     return true;
6426 }
6427 
6428 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6429 {
6430     TCGv_i32 tmp;
6431 
6432     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6433         return false;
6434     }
6435     if (a->r) {
6436         if (IS_USER(s)) {
6437             unallocated_encoding(s);
6438             return true;
6439         }
6440         tmp = load_cpu_field(spsr);
6441     } else {
6442         tmp = tcg_temp_new_i32();
6443         gen_helper_cpsr_read(tmp, cpu_env);
6444     }
6445     store_reg(s, a->rd, tmp);
6446     return true;
6447 }
6448 
6449 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6450 {
6451     TCGv_i32 tmp;
6452     uint32_t mask = msr_mask(s, a->mask, a->r);
6453 
6454     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6455         return false;
6456     }
6457     tmp = load_reg(s, a->rn);
6458     if (gen_set_psr(s, mask, a->r, tmp)) {
6459         unallocated_encoding(s);
6460     }
6461     return true;
6462 }
6463 
6464 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6465 {
6466     TCGv_i32 tmp;
6467 
6468     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6469         return false;
6470     }
6471     tmp = tcg_temp_new_i32();
6472     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6473     store_reg(s, a->rd, tmp);
6474     return true;
6475 }
6476 
6477 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6478 {
6479     TCGv_i32 addr, reg;
6480 
6481     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6482         return false;
6483     }
6484     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6485     reg = load_reg(s, a->rn);
6486     gen_helper_v7m_msr(cpu_env, addr, reg);
6487     tcg_temp_free_i32(reg);
6488     /* If we wrote to CONTROL, the EL might have changed */
6489     gen_rebuild_hflags(s, true);
6490     gen_lookup_tb(s);
6491     return true;
6492 }
6493 
6494 static bool trans_BX(DisasContext *s, arg_BX *a)
6495 {
6496     if (!ENABLE_ARCH_4T) {
6497         return false;
6498     }
6499     gen_bx_excret(s, load_reg(s, a->rm));
6500     return true;
6501 }
6502 
6503 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6504 {
6505     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6506         return false;
6507     }
6508     /*
6509      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6510      * TBFLAGS bit on a basically-never-happens case, so call a helper
6511      * function to check for the trap and raise the exception if needed
6512      * (passing it the register number for the syndrome value).
6513      * v8A doesn't have this HSTR bit.
6514      */
6515     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6516         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6517         s->current_el < 2 && s->ns) {
6518         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6519     }
6520     /* Trivial implementation equivalent to bx.  */
6521     gen_bx(s, load_reg(s, a->rm));
6522     return true;
6523 }
6524 
6525 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6526 {
6527     TCGv_i32 tmp;
6528 
6529     if (!ENABLE_ARCH_5) {
6530         return false;
6531     }
6532     tmp = load_reg(s, a->rm);
6533     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6534     gen_bx(s, tmp);
6535     return true;
6536 }
6537 
6538 /*
6539  * BXNS/BLXNS: only exist for v8M with the security extensions,
6540  * and always UNDEF if NonSecure.  We don't implement these in
6541  * the user-only mode either (in theory you can use them from
6542  * Secure User mode but they are too tied in to system emulation).
6543  */
6544 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6545 {
6546     if (!s->v8m_secure || IS_USER_ONLY) {
6547         unallocated_encoding(s);
6548     } else {
6549         gen_bxns(s, a->rm);
6550     }
6551     return true;
6552 }
6553 
6554 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6555 {
6556     if (!s->v8m_secure || IS_USER_ONLY) {
6557         unallocated_encoding(s);
6558     } else {
6559         gen_blxns(s, a->rm);
6560     }
6561     return true;
6562 }
6563 
6564 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6565 {
6566     TCGv_i32 tmp;
6567 
6568     if (!ENABLE_ARCH_5) {
6569         return false;
6570     }
6571     tmp = load_reg(s, a->rm);
6572     tcg_gen_clzi_i32(tmp, tmp, 32);
6573     store_reg(s, a->rd, tmp);
6574     return true;
6575 }
6576 
6577 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6578 {
6579     TCGv_i32 tmp;
6580 
6581     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6582         return false;
6583     }
6584     if (IS_USER(s)) {
6585         unallocated_encoding(s);
6586         return true;
6587     }
6588     if (s->current_el == 2) {
6589         /* ERET from Hyp uses ELR_Hyp, not LR */
6590         tmp = load_cpu_field(elr_el[2]);
6591     } else {
6592         tmp = load_reg(s, 14);
6593     }
6594     gen_exception_return(s, tmp);
6595     return true;
6596 }
6597 
6598 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6599 {
6600     gen_hlt(s, a->imm);
6601     return true;
6602 }
6603 
6604 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6605 {
6606     if (!ENABLE_ARCH_5) {
6607         return false;
6608     }
6609     /* BKPT is OK with ECI set and leaves it untouched */
6610     s->eci_handled = true;
6611     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6612         semihosting_enabled(s->current_el == 0) &&
6613         (a->imm == 0xab)) {
6614         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6615     } else {
6616         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6617     }
6618     return true;
6619 }
6620 
6621 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6622 {
6623     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6624         return false;
6625     }
6626     if (IS_USER(s)) {
6627         unallocated_encoding(s);
6628     } else {
6629         gen_hvc(s, a->imm);
6630     }
6631     return true;
6632 }
6633 
6634 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6635 {
6636     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6637         return false;
6638     }
6639     if (IS_USER(s)) {
6640         unallocated_encoding(s);
6641     } else {
6642         gen_smc(s);
6643     }
6644     return true;
6645 }
6646 
6647 static bool trans_SG(DisasContext *s, arg_SG *a)
6648 {
6649     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6650         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6651         return false;
6652     }
6653     /*
6654      * SG (v8M only)
6655      * The bulk of the behaviour for this instruction is implemented
6656      * in v7m_handle_execute_nsc(), which deals with the insn when
6657      * it is executed by a CPU in non-secure state from memory
6658      * which is Secure & NonSecure-Callable.
6659      * Here we only need to handle the remaining cases:
6660      *  * in NS memory (including the "security extension not
6661      *    implemented" case) : NOP
6662      *  * in S memory but CPU already secure (clear IT bits)
6663      * We know that the attribute for the memory this insn is
6664      * in must match the current CPU state, because otherwise
6665      * get_phys_addr_pmsav8 would have generated an exception.
6666      */
6667     if (s->v8m_secure) {
6668         /* Like the IT insn, we don't need to generate any code */
6669         s->condexec_cond = 0;
6670         s->condexec_mask = 0;
6671     }
6672     return true;
6673 }
6674 
6675 static bool trans_TT(DisasContext *s, arg_TT *a)
6676 {
6677     TCGv_i32 addr, tmp;
6678 
6679     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6680         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6681         return false;
6682     }
6683     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6684         /* We UNDEF for these UNPREDICTABLE cases */
6685         unallocated_encoding(s);
6686         return true;
6687     }
6688     if (a->A && !s->v8m_secure) {
6689         /* This case is UNDEFINED.  */
6690         unallocated_encoding(s);
6691         return true;
6692     }
6693 
6694     addr = load_reg(s, a->rn);
6695     tmp = tcg_temp_new_i32();
6696     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6697     tcg_temp_free_i32(addr);
6698     store_reg(s, a->rd, tmp);
6699     return true;
6700 }
6701 
6702 /*
6703  * Load/store register index
6704  */
6705 
6706 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6707 {
6708     ISSInfo ret;
6709 
6710     /* ISS not valid if writeback */
6711     if (p && !w) {
6712         ret = rd;
6713         if (curr_insn_len(s) == 2) {
6714             ret |= ISSIs16Bit;
6715         }
6716     } else {
6717         ret = ISSInvalid;
6718     }
6719     return ret;
6720 }
6721 
6722 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6723 {
6724     TCGv_i32 addr = load_reg(s, a->rn);
6725 
6726     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6727         gen_helper_v8m_stackcheck(cpu_env, addr);
6728     }
6729 
6730     if (a->p) {
6731         TCGv_i32 ofs = load_reg(s, a->rm);
6732         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6733         if (a->u) {
6734             tcg_gen_add_i32(addr, addr, ofs);
6735         } else {
6736             tcg_gen_sub_i32(addr, addr, ofs);
6737         }
6738         tcg_temp_free_i32(ofs);
6739     }
6740     return addr;
6741 }
6742 
6743 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6744                             TCGv_i32 addr, int address_offset)
6745 {
6746     if (!a->p) {
6747         TCGv_i32 ofs = load_reg(s, a->rm);
6748         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6749         if (a->u) {
6750             tcg_gen_add_i32(addr, addr, ofs);
6751         } else {
6752             tcg_gen_sub_i32(addr, addr, ofs);
6753         }
6754         tcg_temp_free_i32(ofs);
6755     } else if (!a->w) {
6756         tcg_temp_free_i32(addr);
6757         return;
6758     }
6759     tcg_gen_addi_i32(addr, addr, address_offset);
6760     store_reg(s, a->rn, addr);
6761 }
6762 
6763 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6764                        MemOp mop, int mem_idx)
6765 {
6766     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6767     TCGv_i32 addr, tmp;
6768 
6769     addr = op_addr_rr_pre(s, a);
6770 
6771     tmp = tcg_temp_new_i32();
6772     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6773     disas_set_da_iss(s, mop, issinfo);
6774 
6775     /*
6776      * Perform base writeback before the loaded value to
6777      * ensure correct behavior with overlapping index registers.
6778      */
6779     op_addr_rr_post(s, a, addr, 0);
6780     store_reg_from_load(s, a->rt, tmp);
6781     return true;
6782 }
6783 
6784 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6785                         MemOp mop, int mem_idx)
6786 {
6787     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6788     TCGv_i32 addr, tmp;
6789 
6790     /*
6791      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6792      * is either UNPREDICTABLE or has defined behaviour
6793      */
6794     if (s->thumb && a->rn == 15) {
6795         return false;
6796     }
6797 
6798     addr = op_addr_rr_pre(s, a);
6799 
6800     tmp = load_reg(s, a->rt);
6801     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6802     disas_set_da_iss(s, mop, issinfo);
6803     tcg_temp_free_i32(tmp);
6804 
6805     op_addr_rr_post(s, a, addr, 0);
6806     return true;
6807 }
6808 
6809 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6810 {
6811     int mem_idx = get_mem_index(s);
6812     TCGv_i32 addr, tmp;
6813 
6814     if (!ENABLE_ARCH_5TE) {
6815         return false;
6816     }
6817     if (a->rt & 1) {
6818         unallocated_encoding(s);
6819         return true;
6820     }
6821     addr = op_addr_rr_pre(s, a);
6822 
6823     tmp = tcg_temp_new_i32();
6824     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6825     store_reg(s, a->rt, tmp);
6826 
6827     tcg_gen_addi_i32(addr, addr, 4);
6828 
6829     tmp = tcg_temp_new_i32();
6830     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6831     store_reg(s, a->rt + 1, tmp);
6832 
6833     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6834     op_addr_rr_post(s, a, addr, -4);
6835     return true;
6836 }
6837 
6838 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6839 {
6840     int mem_idx = get_mem_index(s);
6841     TCGv_i32 addr, tmp;
6842 
6843     if (!ENABLE_ARCH_5TE) {
6844         return false;
6845     }
6846     if (a->rt & 1) {
6847         unallocated_encoding(s);
6848         return true;
6849     }
6850     addr = op_addr_rr_pre(s, a);
6851 
6852     tmp = load_reg(s, a->rt);
6853     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6854     tcg_temp_free_i32(tmp);
6855 
6856     tcg_gen_addi_i32(addr, addr, 4);
6857 
6858     tmp = load_reg(s, a->rt + 1);
6859     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6860     tcg_temp_free_i32(tmp);
6861 
6862     op_addr_rr_post(s, a, addr, -4);
6863     return true;
6864 }
6865 
6866 /*
6867  * Load/store immediate index
6868  */
6869 
6870 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6871 {
6872     int ofs = a->imm;
6873 
6874     if (!a->u) {
6875         ofs = -ofs;
6876     }
6877 
6878     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6879         /*
6880          * Stackcheck. Here we know 'addr' is the current SP;
6881          * U is set if we're moving SP up, else down. It is
6882          * UNKNOWN whether the limit check triggers when SP starts
6883          * below the limit and ends up above it; we chose to do so.
6884          */
6885         if (!a->u) {
6886             TCGv_i32 newsp = tcg_temp_new_i32();
6887             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6888             gen_helper_v8m_stackcheck(cpu_env, newsp);
6889             tcg_temp_free_i32(newsp);
6890         } else {
6891             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6892         }
6893     }
6894 
6895     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6896 }
6897 
6898 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6899                             TCGv_i32 addr, int address_offset)
6900 {
6901     if (!a->p) {
6902         if (a->u) {
6903             address_offset += a->imm;
6904         } else {
6905             address_offset -= a->imm;
6906         }
6907     } else if (!a->w) {
6908         tcg_temp_free_i32(addr);
6909         return;
6910     }
6911     tcg_gen_addi_i32(addr, addr, address_offset);
6912     store_reg(s, a->rn, addr);
6913 }
6914 
6915 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6916                        MemOp mop, int mem_idx)
6917 {
6918     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6919     TCGv_i32 addr, tmp;
6920 
6921     addr = op_addr_ri_pre(s, a);
6922 
6923     tmp = tcg_temp_new_i32();
6924     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6925     disas_set_da_iss(s, mop, issinfo);
6926 
6927     /*
6928      * Perform base writeback before the loaded value to
6929      * ensure correct behavior with overlapping index registers.
6930      */
6931     op_addr_ri_post(s, a, addr, 0);
6932     store_reg_from_load(s, a->rt, tmp);
6933     return true;
6934 }
6935 
6936 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6937                         MemOp mop, int mem_idx)
6938 {
6939     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6940     TCGv_i32 addr, tmp;
6941 
6942     /*
6943      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6944      * is either UNPREDICTABLE or has defined behaviour
6945      */
6946     if (s->thumb && a->rn == 15) {
6947         return false;
6948     }
6949 
6950     addr = op_addr_ri_pre(s, a);
6951 
6952     tmp = load_reg(s, a->rt);
6953     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6954     disas_set_da_iss(s, mop, issinfo);
6955     tcg_temp_free_i32(tmp);
6956 
6957     op_addr_ri_post(s, a, addr, 0);
6958     return true;
6959 }
6960 
6961 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6962 {
6963     int mem_idx = get_mem_index(s);
6964     TCGv_i32 addr, tmp;
6965 
6966     addr = op_addr_ri_pre(s, a);
6967 
6968     tmp = tcg_temp_new_i32();
6969     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6970     store_reg(s, a->rt, tmp);
6971 
6972     tcg_gen_addi_i32(addr, addr, 4);
6973 
6974     tmp = tcg_temp_new_i32();
6975     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6976     store_reg(s, rt2, tmp);
6977 
6978     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6979     op_addr_ri_post(s, a, addr, -4);
6980     return true;
6981 }
6982 
6983 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6984 {
6985     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6986         return false;
6987     }
6988     return op_ldrd_ri(s, a, a->rt + 1);
6989 }
6990 
6991 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6992 {
6993     arg_ldst_ri b = {
6994         .u = a->u, .w = a->w, .p = a->p,
6995         .rn = a->rn, .rt = a->rt, .imm = a->imm
6996     };
6997     return op_ldrd_ri(s, &b, a->rt2);
6998 }
6999 
7000 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
7001 {
7002     int mem_idx = get_mem_index(s);
7003     TCGv_i32 addr, tmp;
7004 
7005     addr = op_addr_ri_pre(s, a);
7006 
7007     tmp = load_reg(s, a->rt);
7008     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7009     tcg_temp_free_i32(tmp);
7010 
7011     tcg_gen_addi_i32(addr, addr, 4);
7012 
7013     tmp = load_reg(s, rt2);
7014     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7015     tcg_temp_free_i32(tmp);
7016 
7017     op_addr_ri_post(s, a, addr, -4);
7018     return true;
7019 }
7020 
7021 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
7022 {
7023     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
7024         return false;
7025     }
7026     return op_strd_ri(s, a, a->rt + 1);
7027 }
7028 
7029 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7030 {
7031     arg_ldst_ri b = {
7032         .u = a->u, .w = a->w, .p = a->p,
7033         .rn = a->rn, .rt = a->rt, .imm = a->imm
7034     };
7035     return op_strd_ri(s, &b, a->rt2);
7036 }
7037 
7038 #define DO_LDST(NAME, WHICH, MEMOP) \
7039 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
7040 {                                                                     \
7041     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
7042 }                                                                     \
7043 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
7044 {                                                                     \
7045     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
7046 }                                                                     \
7047 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
7048 {                                                                     \
7049     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
7050 }                                                                     \
7051 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
7052 {                                                                     \
7053     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
7054 }
7055 
7056 DO_LDST(LDR, load, MO_UL)
7057 DO_LDST(LDRB, load, MO_UB)
7058 DO_LDST(LDRH, load, MO_UW)
7059 DO_LDST(LDRSB, load, MO_SB)
7060 DO_LDST(LDRSH, load, MO_SW)
7061 
7062 DO_LDST(STR, store, MO_UL)
7063 DO_LDST(STRB, store, MO_UB)
7064 DO_LDST(STRH, store, MO_UW)
7065 
7066 #undef DO_LDST
7067 
7068 /*
7069  * Synchronization primitives
7070  */
7071 
7072 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7073 {
7074     TCGv_i32 addr, tmp;
7075     TCGv taddr;
7076 
7077     opc |= s->be_data;
7078     addr = load_reg(s, a->rn);
7079     taddr = gen_aa32_addr(s, addr, opc);
7080     tcg_temp_free_i32(addr);
7081 
7082     tmp = load_reg(s, a->rt2);
7083     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7084     tcg_temp_free(taddr);
7085 
7086     store_reg(s, a->rt, tmp);
7087     return true;
7088 }
7089 
7090 static bool trans_SWP(DisasContext *s, arg_SWP *a)
7091 {
7092     return op_swp(s, a, MO_UL | MO_ALIGN);
7093 }
7094 
7095 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7096 {
7097     return op_swp(s, a, MO_UB);
7098 }
7099 
7100 /*
7101  * Load/Store Exclusive and Load-Acquire/Store-Release
7102  */
7103 
7104 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7105 {
7106     TCGv_i32 addr;
7107     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7108     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7109 
7110     /* We UNDEF for these UNPREDICTABLE cases.  */
7111     if (a->rd == 15 || a->rn == 15 || a->rt == 15
7112         || a->rd == a->rn || a->rd == a->rt
7113         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7114         || (mop == MO_64
7115             && (a->rt2 == 15
7116                 || a->rd == a->rt2
7117                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7118         unallocated_encoding(s);
7119         return true;
7120     }
7121 
7122     if (rel) {
7123         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7124     }
7125 
7126     addr = tcg_temp_new_i32();
7127     load_reg_var(s, addr, a->rn);
7128     tcg_gen_addi_i32(addr, addr, a->imm);
7129 
7130     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7131     tcg_temp_free_i32(addr);
7132     return true;
7133 }
7134 
7135 static bool trans_STREX(DisasContext *s, arg_STREX *a)
7136 {
7137     if (!ENABLE_ARCH_6) {
7138         return false;
7139     }
7140     return op_strex(s, a, MO_32, false);
7141 }
7142 
7143 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7144 {
7145     if (!ENABLE_ARCH_6K) {
7146         return false;
7147     }
7148     /* We UNDEF for these UNPREDICTABLE cases.  */
7149     if (a->rt & 1) {
7150         unallocated_encoding(s);
7151         return true;
7152     }
7153     a->rt2 = a->rt + 1;
7154     return op_strex(s, a, MO_64, false);
7155 }
7156 
7157 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7158 {
7159     return op_strex(s, a, MO_64, false);
7160 }
7161 
7162 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7163 {
7164     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7165         return false;
7166     }
7167     return op_strex(s, a, MO_8, false);
7168 }
7169 
7170 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7171 {
7172     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7173         return false;
7174     }
7175     return op_strex(s, a, MO_16, false);
7176 }
7177 
7178 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7179 {
7180     if (!ENABLE_ARCH_8) {
7181         return false;
7182     }
7183     return op_strex(s, a, MO_32, true);
7184 }
7185 
7186 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7187 {
7188     if (!ENABLE_ARCH_8) {
7189         return false;
7190     }
7191     /* We UNDEF for these UNPREDICTABLE cases.  */
7192     if (a->rt & 1) {
7193         unallocated_encoding(s);
7194         return true;
7195     }
7196     a->rt2 = a->rt + 1;
7197     return op_strex(s, a, MO_64, true);
7198 }
7199 
7200 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7201 {
7202     if (!ENABLE_ARCH_8) {
7203         return false;
7204     }
7205     return op_strex(s, a, MO_64, true);
7206 }
7207 
7208 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7209 {
7210     if (!ENABLE_ARCH_8) {
7211         return false;
7212     }
7213     return op_strex(s, a, MO_8, true);
7214 }
7215 
7216 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7217 {
7218     if (!ENABLE_ARCH_8) {
7219         return false;
7220     }
7221     return op_strex(s, a, MO_16, true);
7222 }
7223 
7224 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7225 {
7226     TCGv_i32 addr, tmp;
7227 
7228     if (!ENABLE_ARCH_8) {
7229         return false;
7230     }
7231     /* We UNDEF for these UNPREDICTABLE cases.  */
7232     if (a->rn == 15 || a->rt == 15) {
7233         unallocated_encoding(s);
7234         return true;
7235     }
7236 
7237     addr = load_reg(s, a->rn);
7238     tmp = load_reg(s, a->rt);
7239     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7240     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7241     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7242 
7243     tcg_temp_free_i32(tmp);
7244     tcg_temp_free_i32(addr);
7245     return true;
7246 }
7247 
7248 static bool trans_STL(DisasContext *s, arg_STL *a)
7249 {
7250     return op_stl(s, a, MO_UL);
7251 }
7252 
7253 static bool trans_STLB(DisasContext *s, arg_STL *a)
7254 {
7255     return op_stl(s, a, MO_UB);
7256 }
7257 
7258 static bool trans_STLH(DisasContext *s, arg_STL *a)
7259 {
7260     return op_stl(s, a, MO_UW);
7261 }
7262 
7263 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7264 {
7265     TCGv_i32 addr;
7266     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7267     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7268 
7269     /* We UNDEF for these UNPREDICTABLE cases.  */
7270     if (a->rn == 15 || a->rt == 15
7271         || (!v8a && s->thumb && a->rt == 13)
7272         || (mop == MO_64
7273             && (a->rt2 == 15 || a->rt == a->rt2
7274                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7275         unallocated_encoding(s);
7276         return true;
7277     }
7278 
7279     addr = tcg_temp_new_i32();
7280     load_reg_var(s, addr, a->rn);
7281     tcg_gen_addi_i32(addr, addr, a->imm);
7282 
7283     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7284     tcg_temp_free_i32(addr);
7285 
7286     if (acq) {
7287         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7288     }
7289     return true;
7290 }
7291 
7292 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7293 {
7294     if (!ENABLE_ARCH_6) {
7295         return false;
7296     }
7297     return op_ldrex(s, a, MO_32, false);
7298 }
7299 
7300 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7301 {
7302     if (!ENABLE_ARCH_6K) {
7303         return false;
7304     }
7305     /* We UNDEF for these UNPREDICTABLE cases.  */
7306     if (a->rt & 1) {
7307         unallocated_encoding(s);
7308         return true;
7309     }
7310     a->rt2 = a->rt + 1;
7311     return op_ldrex(s, a, MO_64, false);
7312 }
7313 
7314 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7315 {
7316     return op_ldrex(s, a, MO_64, false);
7317 }
7318 
7319 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7320 {
7321     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7322         return false;
7323     }
7324     return op_ldrex(s, a, MO_8, false);
7325 }
7326 
7327 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7328 {
7329     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7330         return false;
7331     }
7332     return op_ldrex(s, a, MO_16, false);
7333 }
7334 
7335 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7336 {
7337     if (!ENABLE_ARCH_8) {
7338         return false;
7339     }
7340     return op_ldrex(s, a, MO_32, true);
7341 }
7342 
7343 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7344 {
7345     if (!ENABLE_ARCH_8) {
7346         return false;
7347     }
7348     /* We UNDEF for these UNPREDICTABLE cases.  */
7349     if (a->rt & 1) {
7350         unallocated_encoding(s);
7351         return true;
7352     }
7353     a->rt2 = a->rt + 1;
7354     return op_ldrex(s, a, MO_64, true);
7355 }
7356 
7357 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7358 {
7359     if (!ENABLE_ARCH_8) {
7360         return false;
7361     }
7362     return op_ldrex(s, a, MO_64, true);
7363 }
7364 
7365 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7366 {
7367     if (!ENABLE_ARCH_8) {
7368         return false;
7369     }
7370     return op_ldrex(s, a, MO_8, true);
7371 }
7372 
7373 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7374 {
7375     if (!ENABLE_ARCH_8) {
7376         return false;
7377     }
7378     return op_ldrex(s, a, MO_16, true);
7379 }
7380 
7381 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7382 {
7383     TCGv_i32 addr, tmp;
7384 
7385     if (!ENABLE_ARCH_8) {
7386         return false;
7387     }
7388     /* We UNDEF for these UNPREDICTABLE cases.  */
7389     if (a->rn == 15 || a->rt == 15) {
7390         unallocated_encoding(s);
7391         return true;
7392     }
7393 
7394     addr = load_reg(s, a->rn);
7395     tmp = tcg_temp_new_i32();
7396     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7397     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7398     tcg_temp_free_i32(addr);
7399 
7400     store_reg(s, a->rt, tmp);
7401     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7402     return true;
7403 }
7404 
7405 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7406 {
7407     return op_lda(s, a, MO_UL);
7408 }
7409 
7410 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7411 {
7412     return op_lda(s, a, MO_UB);
7413 }
7414 
7415 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7416 {
7417     return op_lda(s, a, MO_UW);
7418 }
7419 
7420 /*
7421  * Media instructions
7422  */
7423 
7424 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7425 {
7426     TCGv_i32 t1, t2;
7427 
7428     if (!ENABLE_ARCH_6) {
7429         return false;
7430     }
7431 
7432     t1 = load_reg(s, a->rn);
7433     t2 = load_reg(s, a->rm);
7434     gen_helper_usad8(t1, t1, t2);
7435     tcg_temp_free_i32(t2);
7436     if (a->ra != 15) {
7437         t2 = load_reg(s, a->ra);
7438         tcg_gen_add_i32(t1, t1, t2);
7439         tcg_temp_free_i32(t2);
7440     }
7441     store_reg(s, a->rd, t1);
7442     return true;
7443 }
7444 
7445 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7446 {
7447     TCGv_i32 tmp;
7448     int width = a->widthm1 + 1;
7449     int shift = a->lsb;
7450 
7451     if (!ENABLE_ARCH_6T2) {
7452         return false;
7453     }
7454     if (shift + width > 32) {
7455         /* UNPREDICTABLE; we choose to UNDEF */
7456         unallocated_encoding(s);
7457         return true;
7458     }
7459 
7460     tmp = load_reg(s, a->rn);
7461     if (u) {
7462         tcg_gen_extract_i32(tmp, tmp, shift, width);
7463     } else {
7464         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7465     }
7466     store_reg(s, a->rd, tmp);
7467     return true;
7468 }
7469 
7470 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7471 {
7472     return op_bfx(s, a, false);
7473 }
7474 
7475 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7476 {
7477     return op_bfx(s, a, true);
7478 }
7479 
7480 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7481 {
7482     TCGv_i32 tmp;
7483     int msb = a->msb, lsb = a->lsb;
7484     int width;
7485 
7486     if (!ENABLE_ARCH_6T2) {
7487         return false;
7488     }
7489     if (msb < lsb) {
7490         /* UNPREDICTABLE; we choose to UNDEF */
7491         unallocated_encoding(s);
7492         return true;
7493     }
7494 
7495     width = msb + 1 - lsb;
7496     if (a->rn == 15) {
7497         /* BFC */
7498         tmp = tcg_const_i32(0);
7499     } else {
7500         /* BFI */
7501         tmp = load_reg(s, a->rn);
7502     }
7503     if (width != 32) {
7504         TCGv_i32 tmp2 = load_reg(s, a->rd);
7505         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7506         tcg_temp_free_i32(tmp2);
7507     }
7508     store_reg(s, a->rd, tmp);
7509     return true;
7510 }
7511 
7512 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7513 {
7514     unallocated_encoding(s);
7515     return true;
7516 }
7517 
7518 /*
7519  * Parallel addition and subtraction
7520  */
7521 
7522 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7523                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7524 {
7525     TCGv_i32 t0, t1;
7526 
7527     if (s->thumb
7528         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7529         : !ENABLE_ARCH_6) {
7530         return false;
7531     }
7532 
7533     t0 = load_reg(s, a->rn);
7534     t1 = load_reg(s, a->rm);
7535 
7536     gen(t0, t0, t1);
7537 
7538     tcg_temp_free_i32(t1);
7539     store_reg(s, a->rd, t0);
7540     return true;
7541 }
7542 
7543 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7544                              void (*gen)(TCGv_i32, TCGv_i32,
7545                                          TCGv_i32, TCGv_ptr))
7546 {
7547     TCGv_i32 t0, t1;
7548     TCGv_ptr ge;
7549 
7550     if (s->thumb
7551         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7552         : !ENABLE_ARCH_6) {
7553         return false;
7554     }
7555 
7556     t0 = load_reg(s, a->rn);
7557     t1 = load_reg(s, a->rm);
7558 
7559     ge = tcg_temp_new_ptr();
7560     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7561     gen(t0, t0, t1, ge);
7562 
7563     tcg_temp_free_ptr(ge);
7564     tcg_temp_free_i32(t1);
7565     store_reg(s, a->rd, t0);
7566     return true;
7567 }
7568 
7569 #define DO_PAR_ADDSUB(NAME, helper) \
7570 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7571 {                                                       \
7572     return op_par_addsub(s, a, helper);                 \
7573 }
7574 
7575 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7576 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7577 {                                                       \
7578     return op_par_addsub_ge(s, a, helper);              \
7579 }
7580 
7581 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7582 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7583 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7584 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7585 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7586 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7587 
7588 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7589 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7590 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7591 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7592 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7593 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7594 
7595 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7596 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7597 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7598 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7599 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7600 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7601 
7602 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7603 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7604 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7605 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7606 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7607 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7608 
7609 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7610 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7611 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7612 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7613 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7614 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7615 
7616 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7617 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7618 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7619 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7620 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7621 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7622 
7623 #undef DO_PAR_ADDSUB
7624 #undef DO_PAR_ADDSUB_GE
7625 
7626 /*
7627  * Packing, unpacking, saturation, and reversal
7628  */
7629 
7630 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7631 {
7632     TCGv_i32 tn, tm;
7633     int shift = a->imm;
7634 
7635     if (s->thumb
7636         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7637         : !ENABLE_ARCH_6) {
7638         return false;
7639     }
7640 
7641     tn = load_reg(s, a->rn);
7642     tm = load_reg(s, a->rm);
7643     if (a->tb) {
7644         /* PKHTB */
7645         if (shift == 0) {
7646             shift = 31;
7647         }
7648         tcg_gen_sari_i32(tm, tm, shift);
7649         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7650     } else {
7651         /* PKHBT */
7652         tcg_gen_shli_i32(tm, tm, shift);
7653         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7654     }
7655     tcg_temp_free_i32(tm);
7656     store_reg(s, a->rd, tn);
7657     return true;
7658 }
7659 
7660 static bool op_sat(DisasContext *s, arg_sat *a,
7661                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7662 {
7663     TCGv_i32 tmp;
7664     int shift = a->imm;
7665 
7666     if (!ENABLE_ARCH_6) {
7667         return false;
7668     }
7669 
7670     tmp = load_reg(s, a->rn);
7671     if (a->sh) {
7672         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7673     } else {
7674         tcg_gen_shli_i32(tmp, tmp, shift);
7675     }
7676 
7677     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7678 
7679     store_reg(s, a->rd, tmp);
7680     return true;
7681 }
7682 
7683 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7684 {
7685     return op_sat(s, a, gen_helper_ssat);
7686 }
7687 
7688 static bool trans_USAT(DisasContext *s, arg_sat *a)
7689 {
7690     return op_sat(s, a, gen_helper_usat);
7691 }
7692 
7693 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7694 {
7695     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7696         return false;
7697     }
7698     return op_sat(s, a, gen_helper_ssat16);
7699 }
7700 
7701 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7702 {
7703     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7704         return false;
7705     }
7706     return op_sat(s, a, gen_helper_usat16);
7707 }
7708 
7709 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7710                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7711                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7712 {
7713     TCGv_i32 tmp;
7714 
7715     if (!ENABLE_ARCH_6) {
7716         return false;
7717     }
7718 
7719     tmp = load_reg(s, a->rm);
7720     /*
7721      * TODO: In many cases we could do a shift instead of a rotate.
7722      * Combined with a simple extend, that becomes an extract.
7723      */
7724     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7725     gen_extract(tmp, tmp);
7726 
7727     if (a->rn != 15) {
7728         TCGv_i32 tmp2 = load_reg(s, a->rn);
7729         gen_add(tmp, tmp, tmp2);
7730         tcg_temp_free_i32(tmp2);
7731     }
7732     store_reg(s, a->rd, tmp);
7733     return true;
7734 }
7735 
7736 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7737 {
7738     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7739 }
7740 
7741 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7742 {
7743     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7744 }
7745 
7746 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7747 {
7748     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7749         return false;
7750     }
7751     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7752 }
7753 
7754 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7755 {
7756     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7757 }
7758 
7759 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7760 {
7761     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7762 }
7763 
7764 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7765 {
7766     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7767         return false;
7768     }
7769     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7770 }
7771 
7772 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7773 {
7774     TCGv_i32 t1, t2, t3;
7775 
7776     if (s->thumb
7777         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7778         : !ENABLE_ARCH_6) {
7779         return false;
7780     }
7781 
7782     t1 = load_reg(s, a->rn);
7783     t2 = load_reg(s, a->rm);
7784     t3 = tcg_temp_new_i32();
7785     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7786     gen_helper_sel_flags(t1, t3, t1, t2);
7787     tcg_temp_free_i32(t3);
7788     tcg_temp_free_i32(t2);
7789     store_reg(s, a->rd, t1);
7790     return true;
7791 }
7792 
7793 static bool op_rr(DisasContext *s, arg_rr *a,
7794                   void (*gen)(TCGv_i32, TCGv_i32))
7795 {
7796     TCGv_i32 tmp;
7797 
7798     tmp = load_reg(s, a->rm);
7799     gen(tmp, tmp);
7800     store_reg(s, a->rd, tmp);
7801     return true;
7802 }
7803 
7804 static bool trans_REV(DisasContext *s, arg_rr *a)
7805 {
7806     if (!ENABLE_ARCH_6) {
7807         return false;
7808     }
7809     return op_rr(s, a, tcg_gen_bswap32_i32);
7810 }
7811 
7812 static bool trans_REV16(DisasContext *s, arg_rr *a)
7813 {
7814     if (!ENABLE_ARCH_6) {
7815         return false;
7816     }
7817     return op_rr(s, a, gen_rev16);
7818 }
7819 
7820 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7821 {
7822     if (!ENABLE_ARCH_6) {
7823         return false;
7824     }
7825     return op_rr(s, a, gen_revsh);
7826 }
7827 
7828 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7829 {
7830     if (!ENABLE_ARCH_6T2) {
7831         return false;
7832     }
7833     return op_rr(s, a, gen_helper_rbit);
7834 }
7835 
7836 /*
7837  * Signed multiply, signed and unsigned divide
7838  */
7839 
7840 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7841 {
7842     TCGv_i32 t1, t2;
7843 
7844     if (!ENABLE_ARCH_6) {
7845         return false;
7846     }
7847 
7848     t1 = load_reg(s, a->rn);
7849     t2 = load_reg(s, a->rm);
7850     if (m_swap) {
7851         gen_swap_half(t2, t2);
7852     }
7853     gen_smul_dual(t1, t2);
7854 
7855     if (sub) {
7856         /*
7857          * This subtraction cannot overflow, so we can do a simple
7858          * 32-bit subtraction and then a possible 32-bit saturating
7859          * addition of Ra.
7860          */
7861         tcg_gen_sub_i32(t1, t1, t2);
7862         tcg_temp_free_i32(t2);
7863 
7864         if (a->ra != 15) {
7865             t2 = load_reg(s, a->ra);
7866             gen_helper_add_setq(t1, cpu_env, t1, t2);
7867             tcg_temp_free_i32(t2);
7868         }
7869     } else if (a->ra == 15) {
7870         /* Single saturation-checking addition */
7871         gen_helper_add_setq(t1, cpu_env, t1, t2);
7872         tcg_temp_free_i32(t2);
7873     } else {
7874         /*
7875          * We need to add the products and Ra together and then
7876          * determine whether the final result overflowed. Doing
7877          * this as two separate add-and-check-overflow steps incorrectly
7878          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7879          * Do all the arithmetic at 64-bits and then check for overflow.
7880          */
7881         TCGv_i64 p64, q64;
7882         TCGv_i32 t3, qf, one;
7883 
7884         p64 = tcg_temp_new_i64();
7885         q64 = tcg_temp_new_i64();
7886         tcg_gen_ext_i32_i64(p64, t1);
7887         tcg_gen_ext_i32_i64(q64, t2);
7888         tcg_gen_add_i64(p64, p64, q64);
7889         load_reg_var(s, t2, a->ra);
7890         tcg_gen_ext_i32_i64(q64, t2);
7891         tcg_gen_add_i64(p64, p64, q64);
7892         tcg_temp_free_i64(q64);
7893 
7894         tcg_gen_extr_i64_i32(t1, t2, p64);
7895         tcg_temp_free_i64(p64);
7896         /*
7897          * t1 is the low half of the result which goes into Rd.
7898          * We have overflow and must set Q if the high half (t2)
7899          * is different from the sign-extension of t1.
7900          */
7901         t3 = tcg_temp_new_i32();
7902         tcg_gen_sari_i32(t3, t1, 31);
7903         qf = load_cpu_field(QF);
7904         one = tcg_constant_i32(1);
7905         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7906         store_cpu_field(qf, QF);
7907         tcg_temp_free_i32(t3);
7908         tcg_temp_free_i32(t2);
7909     }
7910     store_reg(s, a->rd, t1);
7911     return true;
7912 }
7913 
7914 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7915 {
7916     return op_smlad(s, a, false, false);
7917 }
7918 
7919 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7920 {
7921     return op_smlad(s, a, true, false);
7922 }
7923 
7924 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7925 {
7926     return op_smlad(s, a, false, true);
7927 }
7928 
7929 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7930 {
7931     return op_smlad(s, a, true, true);
7932 }
7933 
7934 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7935 {
7936     TCGv_i32 t1, t2;
7937     TCGv_i64 l1, l2;
7938 
7939     if (!ENABLE_ARCH_6) {
7940         return false;
7941     }
7942 
7943     t1 = load_reg(s, a->rn);
7944     t2 = load_reg(s, a->rm);
7945     if (m_swap) {
7946         gen_swap_half(t2, t2);
7947     }
7948     gen_smul_dual(t1, t2);
7949 
7950     l1 = tcg_temp_new_i64();
7951     l2 = tcg_temp_new_i64();
7952     tcg_gen_ext_i32_i64(l1, t1);
7953     tcg_gen_ext_i32_i64(l2, t2);
7954     tcg_temp_free_i32(t1);
7955     tcg_temp_free_i32(t2);
7956 
7957     if (sub) {
7958         tcg_gen_sub_i64(l1, l1, l2);
7959     } else {
7960         tcg_gen_add_i64(l1, l1, l2);
7961     }
7962     tcg_temp_free_i64(l2);
7963 
7964     gen_addq(s, l1, a->ra, a->rd);
7965     gen_storeq_reg(s, a->ra, a->rd, l1);
7966     tcg_temp_free_i64(l1);
7967     return true;
7968 }
7969 
7970 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7971 {
7972     return op_smlald(s, a, false, false);
7973 }
7974 
7975 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7976 {
7977     return op_smlald(s, a, true, false);
7978 }
7979 
7980 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7981 {
7982     return op_smlald(s, a, false, true);
7983 }
7984 
7985 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7986 {
7987     return op_smlald(s, a, true, true);
7988 }
7989 
7990 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7991 {
7992     TCGv_i32 t1, t2;
7993 
7994     if (s->thumb
7995         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7996         : !ENABLE_ARCH_6) {
7997         return false;
7998     }
7999 
8000     t1 = load_reg(s, a->rn);
8001     t2 = load_reg(s, a->rm);
8002     tcg_gen_muls2_i32(t2, t1, t1, t2);
8003 
8004     if (a->ra != 15) {
8005         TCGv_i32 t3 = load_reg(s, a->ra);
8006         if (sub) {
8007             /*
8008              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
8009              * a non-zero multiplicand lowpart, and the correct result
8010              * lowpart for rounding.
8011              */
8012             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
8013         } else {
8014             tcg_gen_add_i32(t1, t1, t3);
8015         }
8016         tcg_temp_free_i32(t3);
8017     }
8018     if (round) {
8019         /*
8020          * Adding 0x80000000 to the 64-bit quantity means that we have
8021          * carry in to the high word when the low word has the msb set.
8022          */
8023         tcg_gen_shri_i32(t2, t2, 31);
8024         tcg_gen_add_i32(t1, t1, t2);
8025     }
8026     tcg_temp_free_i32(t2);
8027     store_reg(s, a->rd, t1);
8028     return true;
8029 }
8030 
8031 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
8032 {
8033     return op_smmla(s, a, false, false);
8034 }
8035 
8036 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
8037 {
8038     return op_smmla(s, a, true, false);
8039 }
8040 
8041 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
8042 {
8043     return op_smmla(s, a, false, true);
8044 }
8045 
8046 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
8047 {
8048     return op_smmla(s, a, true, true);
8049 }
8050 
8051 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
8052 {
8053     TCGv_i32 t1, t2;
8054 
8055     if (s->thumb
8056         ? !dc_isar_feature(aa32_thumb_div, s)
8057         : !dc_isar_feature(aa32_arm_div, s)) {
8058         return false;
8059     }
8060 
8061     t1 = load_reg(s, a->rn);
8062     t2 = load_reg(s, a->rm);
8063     if (u) {
8064         gen_helper_udiv(t1, cpu_env, t1, t2);
8065     } else {
8066         gen_helper_sdiv(t1, cpu_env, t1, t2);
8067     }
8068     tcg_temp_free_i32(t2);
8069     store_reg(s, a->rd, t1);
8070     return true;
8071 }
8072 
8073 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8074 {
8075     return op_div(s, a, false);
8076 }
8077 
8078 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8079 {
8080     return op_div(s, a, true);
8081 }
8082 
8083 /*
8084  * Block data transfer
8085  */
8086 
8087 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8088 {
8089     TCGv_i32 addr = load_reg(s, a->rn);
8090 
8091     if (a->b) {
8092         if (a->i) {
8093             /* pre increment */
8094             tcg_gen_addi_i32(addr, addr, 4);
8095         } else {
8096             /* pre decrement */
8097             tcg_gen_addi_i32(addr, addr, -(n * 4));
8098         }
8099     } else if (!a->i && n != 1) {
8100         /* post decrement */
8101         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8102     }
8103 
8104     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8105         /*
8106          * If the writeback is incrementing SP rather than
8107          * decrementing it, and the initial SP is below the
8108          * stack limit but the final written-back SP would
8109          * be above, then we must not perform any memory
8110          * accesses, but it is IMPDEF whether we generate
8111          * an exception. We choose to do so in this case.
8112          * At this point 'addr' is the lowest address, so
8113          * either the original SP (if incrementing) or our
8114          * final SP (if decrementing), so that's what we check.
8115          */
8116         gen_helper_v8m_stackcheck(cpu_env, addr);
8117     }
8118 
8119     return addr;
8120 }
8121 
8122 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8123                                TCGv_i32 addr, int n)
8124 {
8125     if (a->w) {
8126         /* write back */
8127         if (!a->b) {
8128             if (a->i) {
8129                 /* post increment */
8130                 tcg_gen_addi_i32(addr, addr, 4);
8131             } else {
8132                 /* post decrement */
8133                 tcg_gen_addi_i32(addr, addr, -(n * 4));
8134             }
8135         } else if (!a->i && n != 1) {
8136             /* pre decrement */
8137             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8138         }
8139         store_reg(s, a->rn, addr);
8140     } else {
8141         tcg_temp_free_i32(addr);
8142     }
8143 }
8144 
8145 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8146 {
8147     int i, j, n, list, mem_idx;
8148     bool user = a->u;
8149     TCGv_i32 addr, tmp;
8150 
8151     if (user) {
8152         /* STM (user) */
8153         if (IS_USER(s)) {
8154             /* Only usable in supervisor mode.  */
8155             unallocated_encoding(s);
8156             return true;
8157         }
8158     }
8159 
8160     list = a->list;
8161     n = ctpop16(list);
8162     if (n < min_n || a->rn == 15) {
8163         unallocated_encoding(s);
8164         return true;
8165     }
8166 
8167     s->eci_handled = true;
8168 
8169     addr = op_addr_block_pre(s, a, n);
8170     mem_idx = get_mem_index(s);
8171 
8172     for (i = j = 0; i < 16; i++) {
8173         if (!(list & (1 << i))) {
8174             continue;
8175         }
8176 
8177         if (user && i != 15) {
8178             tmp = tcg_temp_new_i32();
8179             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
8180         } else {
8181             tmp = load_reg(s, i);
8182         }
8183         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8184         tcg_temp_free_i32(tmp);
8185 
8186         /* No need to add after the last transfer.  */
8187         if (++j != n) {
8188             tcg_gen_addi_i32(addr, addr, 4);
8189         }
8190     }
8191 
8192     op_addr_block_post(s, a, addr, n);
8193     clear_eci_state(s);
8194     return true;
8195 }
8196 
8197 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8198 {
8199     /* BitCount(list) < 1 is UNPREDICTABLE */
8200     return op_stm(s, a, 1);
8201 }
8202 
8203 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8204 {
8205     /* Writeback register in register list is UNPREDICTABLE for T32.  */
8206     if (a->w && (a->list & (1 << a->rn))) {
8207         unallocated_encoding(s);
8208         return true;
8209     }
8210     /* BitCount(list) < 2 is UNPREDICTABLE */
8211     return op_stm(s, a, 2);
8212 }
8213 
8214 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8215 {
8216     int i, j, n, list, mem_idx;
8217     bool loaded_base;
8218     bool user = a->u;
8219     bool exc_return = false;
8220     TCGv_i32 addr, tmp, loaded_var;
8221 
8222     if (user) {
8223         /* LDM (user), LDM (exception return) */
8224         if (IS_USER(s)) {
8225             /* Only usable in supervisor mode.  */
8226             unallocated_encoding(s);
8227             return true;
8228         }
8229         if (extract32(a->list, 15, 1)) {
8230             exc_return = true;
8231             user = false;
8232         } else {
8233             /* LDM (user) does not allow writeback.  */
8234             if (a->w) {
8235                 unallocated_encoding(s);
8236                 return true;
8237             }
8238         }
8239     }
8240 
8241     list = a->list;
8242     n = ctpop16(list);
8243     if (n < min_n || a->rn == 15) {
8244         unallocated_encoding(s);
8245         return true;
8246     }
8247 
8248     s->eci_handled = true;
8249 
8250     addr = op_addr_block_pre(s, a, n);
8251     mem_idx = get_mem_index(s);
8252     loaded_base = false;
8253     loaded_var = NULL;
8254 
8255     for (i = j = 0; i < 16; i++) {
8256         if (!(list & (1 << i))) {
8257             continue;
8258         }
8259 
8260         tmp = tcg_temp_new_i32();
8261         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8262         if (user) {
8263             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8264             tcg_temp_free_i32(tmp);
8265         } else if (i == a->rn) {
8266             loaded_var = tmp;
8267             loaded_base = true;
8268         } else if (i == 15 && exc_return) {
8269             store_pc_exc_ret(s, tmp);
8270         } else {
8271             store_reg_from_load(s, i, tmp);
8272         }
8273 
8274         /* No need to add after the last transfer.  */
8275         if (++j != n) {
8276             tcg_gen_addi_i32(addr, addr, 4);
8277         }
8278     }
8279 
8280     op_addr_block_post(s, a, addr, n);
8281 
8282     if (loaded_base) {
8283         /* Note that we reject base == pc above.  */
8284         store_reg(s, a->rn, loaded_var);
8285     }
8286 
8287     if (exc_return) {
8288         /* Restore CPSR from SPSR.  */
8289         tmp = load_cpu_field(spsr);
8290         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8291             gen_io_start();
8292         }
8293         gen_helper_cpsr_write_eret(cpu_env, tmp);
8294         tcg_temp_free_i32(tmp);
8295         /* Must exit loop to check un-masked IRQs */
8296         s->base.is_jmp = DISAS_EXIT;
8297     }
8298     clear_eci_state(s);
8299     return true;
8300 }
8301 
8302 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8303 {
8304     /*
8305      * Writeback register in register list is UNPREDICTABLE
8306      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8307      * an UNKNOWN value to the base register.
8308      */
8309     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8310         unallocated_encoding(s);
8311         return true;
8312     }
8313     /* BitCount(list) < 1 is UNPREDICTABLE */
8314     return do_ldm(s, a, 1);
8315 }
8316 
8317 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8318 {
8319     /* Writeback register in register list is UNPREDICTABLE for T32. */
8320     if (a->w && (a->list & (1 << a->rn))) {
8321         unallocated_encoding(s);
8322         return true;
8323     }
8324     /* BitCount(list) < 2 is UNPREDICTABLE */
8325     return do_ldm(s, a, 2);
8326 }
8327 
8328 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8329 {
8330     /* Writeback is conditional on the base register not being loaded.  */
8331     a->w = !(a->list & (1 << a->rn));
8332     /* BitCount(list) < 1 is UNPREDICTABLE */
8333     return do_ldm(s, a, 1);
8334 }
8335 
8336 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8337 {
8338     int i;
8339     TCGv_i32 zero;
8340 
8341     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8342         return false;
8343     }
8344 
8345     if (extract32(a->list, 13, 1)) {
8346         return false;
8347     }
8348 
8349     if (!a->list) {
8350         /* UNPREDICTABLE; we choose to UNDEF */
8351         return false;
8352     }
8353 
8354     s->eci_handled = true;
8355 
8356     zero = tcg_constant_i32(0);
8357     for (i = 0; i < 15; i++) {
8358         if (extract32(a->list, i, 1)) {
8359             /* Clear R[i] */
8360             tcg_gen_mov_i32(cpu_R[i], zero);
8361         }
8362     }
8363     if (extract32(a->list, 15, 1)) {
8364         /*
8365          * Clear APSR (by calling the MSR helper with the same argument
8366          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8367          */
8368         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8369     }
8370     clear_eci_state(s);
8371     return true;
8372 }
8373 
8374 /*
8375  * Branch, branch with link
8376  */
8377 
8378 static bool trans_B(DisasContext *s, arg_i *a)
8379 {
8380     gen_jmp(s, jmp_diff(s, a->imm));
8381     return true;
8382 }
8383 
8384 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8385 {
8386     /* This has cond from encoding, required to be outside IT block.  */
8387     if (a->cond >= 0xe) {
8388         return false;
8389     }
8390     if (s->condexec_mask) {
8391         unallocated_encoding(s);
8392         return true;
8393     }
8394     arm_skip_unless(s, a->cond);
8395     gen_jmp(s, jmp_diff(s, a->imm));
8396     return true;
8397 }
8398 
8399 static bool trans_BL(DisasContext *s, arg_i *a)
8400 {
8401     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8402     gen_jmp(s, jmp_diff(s, a->imm));
8403     return true;
8404 }
8405 
8406 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8407 {
8408     /*
8409      * BLX <imm> would be useless on M-profile; the encoding space
8410      * is used for other insns from v8.1M onward, and UNDEFs before that.
8411      */
8412     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8413         return false;
8414     }
8415 
8416     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8417     if (s->thumb && (a->imm & 2)) {
8418         return false;
8419     }
8420     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8421     store_cpu_field_constant(!s->thumb, thumb);
8422     /* This jump is computed from an aligned PC: subtract off the low bits. */
8423     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8424     return true;
8425 }
8426 
8427 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8428 {
8429     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8430     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8431     return true;
8432 }
8433 
8434 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8435 {
8436     TCGv_i32 tmp = tcg_temp_new_i32();
8437 
8438     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8439     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8440     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8441     gen_bx(s, tmp);
8442     return true;
8443 }
8444 
8445 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8446 {
8447     TCGv_i32 tmp;
8448 
8449     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8450     if (!ENABLE_ARCH_5) {
8451         return false;
8452     }
8453     tmp = tcg_temp_new_i32();
8454     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8455     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8456     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8457     gen_bx(s, tmp);
8458     return true;
8459 }
8460 
8461 static bool trans_BF(DisasContext *s, arg_BF *a)
8462 {
8463     /*
8464      * M-profile branch future insns. The architecture permits an
8465      * implementation to implement these as NOPs (equivalent to
8466      * discarding the LO_BRANCH_INFO cache immediately), and we
8467      * take that IMPDEF option because for QEMU a "real" implementation
8468      * would be complicated and wouldn't execute any faster.
8469      */
8470     if (!dc_isar_feature(aa32_lob, s)) {
8471         return false;
8472     }
8473     if (a->boff == 0) {
8474         /* SEE "Related encodings" (loop insns) */
8475         return false;
8476     }
8477     /* Handle as NOP */
8478     return true;
8479 }
8480 
8481 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8482 {
8483     /* M-profile low-overhead loop start */
8484     TCGv_i32 tmp;
8485 
8486     if (!dc_isar_feature(aa32_lob, s)) {
8487         return false;
8488     }
8489     if (a->rn == 13 || a->rn == 15) {
8490         /*
8491          * For DLSTP rn == 15 is a related encoding (LCTP); the
8492          * other cases caught by this condition are all
8493          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8494          */
8495         return false;
8496     }
8497 
8498     if (a->size != 4) {
8499         /* DLSTP */
8500         if (!dc_isar_feature(aa32_mve, s)) {
8501             return false;
8502         }
8503         if (!vfp_access_check(s)) {
8504             return true;
8505         }
8506     }
8507 
8508     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8509     tmp = load_reg(s, a->rn);
8510     store_reg(s, 14, tmp);
8511     if (a->size != 4) {
8512         /* DLSTP: set FPSCR.LTPSIZE */
8513         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8514         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8515     }
8516     return true;
8517 }
8518 
8519 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8520 {
8521     /* M-profile low-overhead while-loop start */
8522     TCGv_i32 tmp;
8523     DisasLabel nextlabel;
8524 
8525     if (!dc_isar_feature(aa32_lob, s)) {
8526         return false;
8527     }
8528     if (a->rn == 13 || a->rn == 15) {
8529         /*
8530          * For WLSTP rn == 15 is a related encoding (LE); the
8531          * other cases caught by this condition are all
8532          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8533          */
8534         return false;
8535     }
8536     if (s->condexec_mask) {
8537         /*
8538          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8539          * we choose to UNDEF, because otherwise our use of
8540          * gen_goto_tb(1) would clash with the use of TB exit 1
8541          * in the dc->condjmp condition-failed codepath in
8542          * arm_tr_tb_stop() and we'd get an assertion.
8543          */
8544         return false;
8545     }
8546     if (a->size != 4) {
8547         /* WLSTP */
8548         if (!dc_isar_feature(aa32_mve, s)) {
8549             return false;
8550         }
8551         /*
8552          * We need to check that the FPU is enabled here, but mustn't
8553          * call vfp_access_check() to do that because we don't want to
8554          * do the lazy state preservation in the "loop count is zero" case.
8555          * Do the check-and-raise-exception by hand.
8556          */
8557         if (s->fp_excp_el) {
8558             gen_exception_insn_el(s, 0, EXCP_NOCP,
8559                                   syn_uncategorized(), s->fp_excp_el);
8560             return true;
8561         }
8562     }
8563 
8564     nextlabel = gen_disas_label(s);
8565     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8566     tmp = load_reg(s, a->rn);
8567     store_reg(s, 14, tmp);
8568     if (a->size != 4) {
8569         /*
8570          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8571          * lazy state preservation, new FP context creation, etc,
8572          * that vfp_access_check() does. We know that the actual
8573          * access check will succeed (ie it won't generate code that
8574          * throws an exception) because we did that check by hand earlier.
8575          */
8576         bool ok = vfp_access_check(s);
8577         assert(ok);
8578         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8579         /*
8580          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8581          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8582          */
8583     }
8584     gen_jmp_tb(s, curr_insn_len(s), 1);
8585 
8586     set_disas_label(s, nextlabel);
8587     gen_jmp(s, jmp_diff(s, a->imm));
8588     return true;
8589 }
8590 
8591 static bool trans_LE(DisasContext *s, arg_LE *a)
8592 {
8593     /*
8594      * M-profile low-overhead loop end. The architecture permits an
8595      * implementation to discard the LO_BRANCH_INFO cache at any time,
8596      * and we take the IMPDEF option to never set it in the first place
8597      * (equivalent to always discarding it immediately), because for QEMU
8598      * a "real" implementation would be complicated and wouldn't execute
8599      * any faster.
8600      */
8601     TCGv_i32 tmp;
8602     DisasLabel loopend;
8603     bool fpu_active;
8604 
8605     if (!dc_isar_feature(aa32_lob, s)) {
8606         return false;
8607     }
8608     if (a->f && a->tp) {
8609         return false;
8610     }
8611     if (s->condexec_mask) {
8612         /*
8613          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8614          * we choose to UNDEF, because otherwise our use of
8615          * gen_goto_tb(1) would clash with the use of TB exit 1
8616          * in the dc->condjmp condition-failed codepath in
8617          * arm_tr_tb_stop() and we'd get an assertion.
8618          */
8619         return false;
8620     }
8621     if (a->tp) {
8622         /* LETP */
8623         if (!dc_isar_feature(aa32_mve, s)) {
8624             return false;
8625         }
8626         if (!vfp_access_check(s)) {
8627             s->eci_handled = true;
8628             return true;
8629         }
8630     }
8631 
8632     /* LE/LETP is OK with ECI set and leaves it untouched */
8633     s->eci_handled = true;
8634 
8635     /*
8636      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8637      * UsageFault exception for the LE insn in that case. Note that we
8638      * are not directly checking FPSCR.LTPSIZE but instead check the
8639      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8640      * not currently active (ie ActiveFPState() returns false). We
8641      * can identify not-active purely from our TB state flags, as the
8642      * FPU is active only if:
8643      *  the FPU is enabled
8644      *  AND lazy state preservation is not active
8645      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8646      *
8647      * Usually we don't need to care about this distinction between
8648      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8649      * will either take an exception or clear the conditions that make
8650      * the FPU not active. But LE is an unusual case of a non-FP insn
8651      * that looks at LTPSIZE.
8652      */
8653     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8654 
8655     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8656         /* Need to do a runtime check for LTPSIZE != 4 */
8657         DisasLabel skipexc = gen_disas_label(s);
8658         tmp = load_cpu_field(v7m.ltpsize);
8659         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8660         tcg_temp_free_i32(tmp);
8661         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8662         set_disas_label(s, skipexc);
8663     }
8664 
8665     if (a->f) {
8666         /* Loop-forever: just jump back to the loop start */
8667         gen_jmp(s, jmp_diff(s, -a->imm));
8668         return true;
8669     }
8670 
8671     /*
8672      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8673      * For LE, we know at this point that LTPSIZE must be 4 and the
8674      * loop decrement value is 1. For LETP we need to calculate the decrement
8675      * value from LTPSIZE.
8676      */
8677     loopend = gen_disas_label(s);
8678     if (!a->tp) {
8679         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8680         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8681     } else {
8682         /*
8683          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8684          * so that decr stays live after the brcondi.
8685          */
8686         TCGv_i32 decr = tcg_temp_new_i32();
8687         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8688         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8689         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8690         tcg_temp_free_i32(ltpsize);
8691 
8692         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8693 
8694         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8695         tcg_temp_free_i32(decr);
8696     }
8697     /* Jump back to the loop start */
8698     gen_jmp(s, jmp_diff(s, -a->imm));
8699 
8700     set_disas_label(s, loopend);
8701     if (a->tp) {
8702         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8703         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8704     }
8705     /* End TB, continuing to following insn */
8706     gen_jmp_tb(s, curr_insn_len(s), 1);
8707     return true;
8708 }
8709 
8710 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8711 {
8712     /*
8713      * M-profile Loop Clear with Tail Predication. Since our implementation
8714      * doesn't cache branch information, all we need to do is reset
8715      * FPSCR.LTPSIZE to 4.
8716      */
8717 
8718     if (!dc_isar_feature(aa32_lob, s) ||
8719         !dc_isar_feature(aa32_mve, s)) {
8720         return false;
8721     }
8722 
8723     if (!vfp_access_check(s)) {
8724         return true;
8725     }
8726 
8727     store_cpu_field_constant(4, v7m.ltpsize);
8728     return true;
8729 }
8730 
8731 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8732 {
8733     /*
8734      * M-profile Create Vector Tail Predicate. This insn is itself
8735      * predicated and is subject to beatwise execution.
8736      */
8737     TCGv_i32 rn_shifted, masklen;
8738 
8739     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8740         return false;
8741     }
8742 
8743     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8744         return true;
8745     }
8746 
8747     /*
8748      * We pre-calculate the mask length here to avoid having
8749      * to have multiple helpers specialized for size.
8750      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8751      */
8752     rn_shifted = tcg_temp_new_i32();
8753     masklen = load_reg(s, a->rn);
8754     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8755     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8756                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8757                         rn_shifted, tcg_constant_i32(16));
8758     gen_helper_mve_vctp(cpu_env, masklen);
8759     tcg_temp_free_i32(masklen);
8760     tcg_temp_free_i32(rn_shifted);
8761     /* This insn updates predication bits */
8762     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8763     mve_update_eci(s);
8764     return true;
8765 }
8766 
8767 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8768 {
8769     TCGv_i32 addr, tmp;
8770 
8771     tmp = load_reg(s, a->rm);
8772     if (half) {
8773         tcg_gen_add_i32(tmp, tmp, tmp);
8774     }
8775     addr = load_reg(s, a->rn);
8776     tcg_gen_add_i32(addr, addr, tmp);
8777 
8778     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8779 
8780     tcg_gen_add_i32(tmp, tmp, tmp);
8781     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8782     tcg_gen_add_i32(tmp, tmp, addr);
8783     tcg_temp_free_i32(addr);
8784     store_reg(s, 15, tmp);
8785     return true;
8786 }
8787 
8788 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8789 {
8790     return op_tbranch(s, a, false);
8791 }
8792 
8793 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8794 {
8795     return op_tbranch(s, a, true);
8796 }
8797 
8798 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8799 {
8800     TCGv_i32 tmp = load_reg(s, a->rn);
8801 
8802     arm_gen_condlabel(s);
8803     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8804                         tmp, 0, s->condlabel.label);
8805     tcg_temp_free_i32(tmp);
8806     gen_jmp(s, jmp_diff(s, a->imm));
8807     return true;
8808 }
8809 
8810 /*
8811  * Supervisor call - both T32 & A32 come here so we need to check
8812  * which mode we are in when checking for semihosting.
8813  */
8814 
8815 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8816 {
8817     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8818 
8819     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8820         semihosting_enabled(s->current_el == 0) &&
8821         (a->imm == semihost_imm)) {
8822         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8823     } else {
8824         if (s->fgt_svc) {
8825             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8826             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8827         } else {
8828             gen_update_pc(s, curr_insn_len(s));
8829             s->svc_imm = a->imm;
8830             s->base.is_jmp = DISAS_SWI;
8831         }
8832     }
8833     return true;
8834 }
8835 
8836 /*
8837  * Unconditional system instructions
8838  */
8839 
8840 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8841 {
8842     static const int8_t pre_offset[4] = {
8843         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8844     };
8845     static const int8_t post_offset[4] = {
8846         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8847     };
8848     TCGv_i32 addr, t1, t2;
8849 
8850     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8851         return false;
8852     }
8853     if (IS_USER(s)) {
8854         unallocated_encoding(s);
8855         return true;
8856     }
8857 
8858     addr = load_reg(s, a->rn);
8859     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8860 
8861     /* Load PC into tmp and CPSR into tmp2.  */
8862     t1 = tcg_temp_new_i32();
8863     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8864     tcg_gen_addi_i32(addr, addr, 4);
8865     t2 = tcg_temp_new_i32();
8866     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8867 
8868     if (a->w) {
8869         /* Base writeback.  */
8870         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8871         store_reg(s, a->rn, addr);
8872     } else {
8873         tcg_temp_free_i32(addr);
8874     }
8875     gen_rfe(s, t1, t2);
8876     return true;
8877 }
8878 
8879 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8880 {
8881     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8882         return false;
8883     }
8884     gen_srs(s, a->mode, a->pu, a->w);
8885     return true;
8886 }
8887 
8888 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8889 {
8890     uint32_t mask, val;
8891 
8892     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8893         return false;
8894     }
8895     if (IS_USER(s)) {
8896         /* Implemented as NOP in user mode.  */
8897         return true;
8898     }
8899     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8900 
8901     mask = val = 0;
8902     if (a->imod & 2) {
8903         if (a->A) {
8904             mask |= CPSR_A;
8905         }
8906         if (a->I) {
8907             mask |= CPSR_I;
8908         }
8909         if (a->F) {
8910             mask |= CPSR_F;
8911         }
8912         if (a->imod & 1) {
8913             val |= mask;
8914         }
8915     }
8916     if (a->M) {
8917         mask |= CPSR_M;
8918         val |= a->mode;
8919     }
8920     if (mask) {
8921         gen_set_psr_im(s, mask, 0, val);
8922     }
8923     return true;
8924 }
8925 
8926 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8927 {
8928     TCGv_i32 tmp, addr;
8929 
8930     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8931         return false;
8932     }
8933     if (IS_USER(s)) {
8934         /* Implemented as NOP in user mode.  */
8935         return true;
8936     }
8937 
8938     tmp = tcg_constant_i32(a->im);
8939     /* FAULTMASK */
8940     if (a->F) {
8941         addr = tcg_constant_i32(19);
8942         gen_helper_v7m_msr(cpu_env, addr, tmp);
8943     }
8944     /* PRIMASK */
8945     if (a->I) {
8946         addr = tcg_constant_i32(16);
8947         gen_helper_v7m_msr(cpu_env, addr, tmp);
8948     }
8949     gen_rebuild_hflags(s, false);
8950     gen_lookup_tb(s);
8951     return true;
8952 }
8953 
8954 /*
8955  * Clear-Exclusive, Barriers
8956  */
8957 
8958 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8959 {
8960     if (s->thumb
8961         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8962         : !ENABLE_ARCH_6K) {
8963         return false;
8964     }
8965     gen_clrex(s);
8966     return true;
8967 }
8968 
8969 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8970 {
8971     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8972         return false;
8973     }
8974     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8975     return true;
8976 }
8977 
8978 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8979 {
8980     return trans_DSB(s, NULL);
8981 }
8982 
8983 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8984 {
8985     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8986         return false;
8987     }
8988     /*
8989      * We need to break the TB after this insn to execute
8990      * self-modifying code correctly and also to take
8991      * any pending interrupts immediately.
8992      */
8993     s->base.is_jmp = DISAS_TOO_MANY;
8994     return true;
8995 }
8996 
8997 static bool trans_SB(DisasContext *s, arg_SB *a)
8998 {
8999     if (!dc_isar_feature(aa32_sb, s)) {
9000         return false;
9001     }
9002     /*
9003      * TODO: There is no speculation barrier opcode
9004      * for TCG; MB and end the TB instead.
9005      */
9006     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9007     s->base.is_jmp = DISAS_TOO_MANY;
9008     return true;
9009 }
9010 
9011 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
9012 {
9013     if (!ENABLE_ARCH_6) {
9014         return false;
9015     }
9016     if (a->E != (s->be_data == MO_BE)) {
9017         gen_helper_setend(cpu_env);
9018         s->base.is_jmp = DISAS_UPDATE_EXIT;
9019     }
9020     return true;
9021 }
9022 
9023 /*
9024  * Preload instructions
9025  * All are nops, contingent on the appropriate arch level.
9026  */
9027 
9028 static bool trans_PLD(DisasContext *s, arg_PLD *a)
9029 {
9030     return ENABLE_ARCH_5TE;
9031 }
9032 
9033 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9034 {
9035     return arm_dc_feature(s, ARM_FEATURE_V7MP);
9036 }
9037 
9038 static bool trans_PLI(DisasContext *s, arg_PLD *a)
9039 {
9040     return ENABLE_ARCH_7;
9041 }
9042 
9043 /*
9044  * If-then
9045  */
9046 
9047 static bool trans_IT(DisasContext *s, arg_IT *a)
9048 {
9049     int cond_mask = a->cond_mask;
9050 
9051     /*
9052      * No actual code generated for this insn, just setup state.
9053      *
9054      * Combinations of firstcond and mask which set up an 0b1111
9055      * condition are UNPREDICTABLE; we take the CONSTRAINED
9056      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9057      * i.e. both meaning "execute always".
9058      */
9059     s->condexec_cond = (cond_mask >> 4) & 0xe;
9060     s->condexec_mask = cond_mask & 0x1f;
9061     return true;
9062 }
9063 
9064 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
9065 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9066 {
9067     TCGv_i32 rn, rm, zero;
9068     DisasCompare c;
9069 
9070     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
9071         return false;
9072     }
9073 
9074     if (a->rm == 13) {
9075         /* SEE "Related encodings" (MVE shifts) */
9076         return false;
9077     }
9078 
9079     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
9080         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
9081         return false;
9082     }
9083 
9084     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
9085     zero = tcg_constant_i32(0);
9086     if (a->rn == 15) {
9087         rn = zero;
9088     } else {
9089         rn = load_reg(s, a->rn);
9090     }
9091     if (a->rm == 15) {
9092         rm = zero;
9093     } else {
9094         rm = load_reg(s, a->rm);
9095     }
9096 
9097     switch (a->op) {
9098     case 0: /* CSEL */
9099         break;
9100     case 1: /* CSINC */
9101         tcg_gen_addi_i32(rm, rm, 1);
9102         break;
9103     case 2: /* CSINV */
9104         tcg_gen_not_i32(rm, rm);
9105         break;
9106     case 3: /* CSNEG */
9107         tcg_gen_neg_i32(rm, rm);
9108         break;
9109     default:
9110         g_assert_not_reached();
9111     }
9112 
9113     arm_test_cc(&c, a->fcond);
9114     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
9115 
9116     store_reg(s, a->rd, rn);
9117     tcg_temp_free_i32(rm);
9118 
9119     return true;
9120 }
9121 
9122 /*
9123  * Legacy decoder.
9124  */
9125 
9126 static void disas_arm_insn(DisasContext *s, unsigned int insn)
9127 {
9128     unsigned int cond = insn >> 28;
9129 
9130     /* M variants do not implement ARM mode; this must raise the INVSTATE
9131      * UsageFault exception.
9132      */
9133     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9134         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
9135         return;
9136     }
9137 
9138     if (s->pstate_il) {
9139         /*
9140          * Illegal execution state. This has priority over BTI
9141          * exceptions, but comes after instruction abort exceptions.
9142          */
9143         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9144         return;
9145     }
9146 
9147     if (cond == 0xf) {
9148         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9149          * choose to UNDEF. In ARMv5 and above the space is used
9150          * for miscellaneous unconditional instructions.
9151          */
9152         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
9153             unallocated_encoding(s);
9154             return;
9155         }
9156 
9157         /* Unconditional instructions.  */
9158         /* TODO: Perhaps merge these into one decodetree output file.  */
9159         if (disas_a32_uncond(s, insn) ||
9160             disas_vfp_uncond(s, insn) ||
9161             disas_neon_dp(s, insn) ||
9162             disas_neon_ls(s, insn) ||
9163             disas_neon_shared(s, insn)) {
9164             return;
9165         }
9166         /* fall back to legacy decoder */
9167 
9168         if ((insn & 0x0e000f00) == 0x0c000100) {
9169             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9170                 /* iWMMXt register transfer.  */
9171                 if (extract32(s->c15_cpar, 1, 1)) {
9172                     if (!disas_iwmmxt_insn(s, insn)) {
9173                         return;
9174                     }
9175                 }
9176             }
9177         }
9178         goto illegal_op;
9179     }
9180     if (cond != 0xe) {
9181         /* if not always execute, we generate a conditional jump to
9182            next instruction */
9183         arm_skip_unless(s, cond);
9184     }
9185 
9186     /* TODO: Perhaps merge these into one decodetree output file.  */
9187     if (disas_a32(s, insn) ||
9188         disas_vfp(s, insn)) {
9189         return;
9190     }
9191     /* fall back to legacy decoder */
9192     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
9193     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
9194         if (((insn & 0x0c000e00) == 0x0c000000)
9195             && ((insn & 0x03000000) != 0x03000000)) {
9196             /* Coprocessor insn, coprocessor 0 or 1 */
9197             disas_xscale_insn(s, insn);
9198             return;
9199         }
9200     }
9201 
9202 illegal_op:
9203     unallocated_encoding(s);
9204 }
9205 
9206 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9207 {
9208     /*
9209      * Return true if this is a 16 bit instruction. We must be precise
9210      * about this (matching the decode).
9211      */
9212     if ((insn >> 11) < 0x1d) {
9213         /* Definitely a 16-bit instruction */
9214         return true;
9215     }
9216 
9217     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9218      * first half of a 32-bit Thumb insn. Thumb-1 cores might
9219      * end up actually treating this as two 16-bit insns, though,
9220      * if it's half of a bl/blx pair that might span a page boundary.
9221      */
9222     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9223         arm_dc_feature(s, ARM_FEATURE_M)) {
9224         /* Thumb2 cores (including all M profile ones) always treat
9225          * 32-bit insns as 32-bit.
9226          */
9227         return false;
9228     }
9229 
9230     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9231         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9232          * is not on the next page; we merge this into a 32-bit
9233          * insn.
9234          */
9235         return false;
9236     }
9237     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9238      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9239      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9240      *  -- handle as single 16 bit insn
9241      */
9242     return true;
9243 }
9244 
9245 /* Translate a 32-bit thumb instruction. */
9246 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9247 {
9248     /*
9249      * ARMv6-M supports a limited subset of Thumb2 instructions.
9250      * Other Thumb1 architectures allow only 32-bit
9251      * combined BL/BLX prefix and suffix.
9252      */
9253     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9254         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9255         int i;
9256         bool found = false;
9257         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9258                                                0xf3b08040 /* dsb */,
9259                                                0xf3b08050 /* dmb */,
9260                                                0xf3b08060 /* isb */,
9261                                                0xf3e08000 /* mrs */,
9262                                                0xf000d000 /* bl */};
9263         static const uint32_t armv6m_mask[] = {0xffe0d000,
9264                                                0xfff0d0f0,
9265                                                0xfff0d0f0,
9266                                                0xfff0d0f0,
9267                                                0xffe0d000,
9268                                                0xf800d000};
9269 
9270         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9271             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9272                 found = true;
9273                 break;
9274             }
9275         }
9276         if (!found) {
9277             goto illegal_op;
9278         }
9279     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9280         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9281             unallocated_encoding(s);
9282             return;
9283         }
9284     }
9285 
9286     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9287         /*
9288          * NOCP takes precedence over any UNDEF for (almost) the
9289          * entire wide range of coprocessor-space encodings, so check
9290          * for it first before proceeding to actually decode eg VFP
9291          * insns. This decode also handles the few insns which are
9292          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9293          */
9294         if (disas_m_nocp(s, insn)) {
9295             return;
9296         }
9297     }
9298 
9299     if ((insn & 0xef000000) == 0xef000000) {
9300         /*
9301          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9302          * transform into
9303          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9304          */
9305         uint32_t a32_insn = (insn & 0xe2ffffff) |
9306             ((insn & (1 << 28)) >> 4) | (1 << 28);
9307 
9308         if (disas_neon_dp(s, a32_insn)) {
9309             return;
9310         }
9311     }
9312 
9313     if ((insn & 0xff100000) == 0xf9000000) {
9314         /*
9315          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9316          * transform into
9317          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9318          */
9319         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9320 
9321         if (disas_neon_ls(s, a32_insn)) {
9322             return;
9323         }
9324     }
9325 
9326     /*
9327      * TODO: Perhaps merge these into one decodetree output file.
9328      * Note disas_vfp is written for a32 with cond field in the
9329      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9330      */
9331     if (disas_t32(s, insn) ||
9332         disas_vfp_uncond(s, insn) ||
9333         disas_neon_shared(s, insn) ||
9334         disas_mve(s, insn) ||
9335         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9336         return;
9337     }
9338 
9339 illegal_op:
9340     unallocated_encoding(s);
9341 }
9342 
9343 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9344 {
9345     if (!disas_t16(s, insn)) {
9346         unallocated_encoding(s);
9347     }
9348 }
9349 
9350 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9351 {
9352     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9353      * (False positives are OK, false negatives are not.)
9354      * We know this is a Thumb insn, and our caller ensures we are
9355      * only called if dc->base.pc_next is less than 4 bytes from the page
9356      * boundary, so we cross the page if the first 16 bits indicate
9357      * that this is a 32 bit insn.
9358      */
9359     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9360 
9361     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9362 }
9363 
9364 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9365 {
9366     DisasContext *dc = container_of(dcbase, DisasContext, base);
9367     CPUARMState *env = cs->env_ptr;
9368     ARMCPU *cpu = env_archcpu(env);
9369     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9370     uint32_t condexec, core_mmu_idx;
9371 
9372     dc->isar = &cpu->isar;
9373     dc->condjmp = 0;
9374     dc->pc_save = dc->base.pc_first;
9375     dc->aarch64 = false;
9376     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9377     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9378     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9379     /*
9380      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9381      * is always the IT bits. On M-profile, some of the reserved encodings
9382      * of IT are used instead to indicate either ICI or ECI, which
9383      * indicate partial progress of a restartable insn that was interrupted
9384      * partway through by an exception:
9385      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9386      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9387      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9388      * insn, behave normally".
9389      */
9390     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9391     dc->eci_handled = false;
9392     if (condexec & 0xf) {
9393         dc->condexec_mask = (condexec & 0xf) << 1;
9394         dc->condexec_cond = condexec >> 4;
9395     } else {
9396         if (arm_feature(env, ARM_FEATURE_M)) {
9397             dc->eci = condexec >> 4;
9398         }
9399     }
9400 
9401     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9402     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9403     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9404 #if !defined(CONFIG_USER_ONLY)
9405     dc->user = (dc->current_el == 0);
9406 #endif
9407     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9408     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9409     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9410     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9411     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9412 
9413     if (arm_feature(env, ARM_FEATURE_M)) {
9414         dc->vfp_enabled = 1;
9415         dc->be_data = MO_TE;
9416         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9417         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9418         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9419         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9420         dc->v7m_new_fp_ctxt_needed =
9421             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9422         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9423         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9424     } else {
9425         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9426         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9427         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9428         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9429         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9430             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9431         } else {
9432             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9433             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9434         }
9435         dc->sme_trap_nonstreaming =
9436             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9437     }
9438     dc->cp_regs = cpu->cp_regs;
9439     dc->features = env->features;
9440 
9441     /* Single step state. The code-generation logic here is:
9442      *  SS_ACTIVE == 0:
9443      *   generate code with no special handling for single-stepping (except
9444      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9445      *   this happens anyway because those changes are all system register or
9446      *   PSTATE writes).
9447      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9448      *   emit code for one insn
9449      *   emit code to clear PSTATE.SS
9450      *   emit code to generate software step exception for completed step
9451      *   end TB (as usual for having generated an exception)
9452      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9453      *   emit code to generate a software step exception
9454      *   end the TB
9455      */
9456     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9457     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9458     dc->is_ldex = false;
9459 
9460     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9461 
9462     /* If architectural single step active, limit to 1.  */
9463     if (dc->ss_active) {
9464         dc->base.max_insns = 1;
9465     }
9466 
9467     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9468        to those left on the page.  */
9469     if (!dc->thumb) {
9470         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9471         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9472     }
9473 
9474     cpu_V0 = tcg_temp_new_i64();
9475     cpu_V1 = tcg_temp_new_i64();
9476     cpu_M0 = tcg_temp_new_i64();
9477 }
9478 
9479 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9480 {
9481     DisasContext *dc = container_of(dcbase, DisasContext, base);
9482 
9483     /* A note on handling of the condexec (IT) bits:
9484      *
9485      * We want to avoid the overhead of having to write the updated condexec
9486      * bits back to the CPUARMState for every instruction in an IT block. So:
9487      * (1) if the condexec bits are not already zero then we write
9488      * zero back into the CPUARMState now. This avoids complications trying
9489      * to do it at the end of the block. (For example if we don't do this
9490      * it's hard to identify whether we can safely skip writing condexec
9491      * at the end of the TB, which we definitely want to do for the case
9492      * where a TB doesn't do anything with the IT state at all.)
9493      * (2) if we are going to leave the TB then we call gen_set_condexec()
9494      * which will write the correct value into CPUARMState if zero is wrong.
9495      * This is done both for leaving the TB at the end, and for leaving
9496      * it because of an exception we know will happen, which is done in
9497      * gen_exception_insn(). The latter is necessary because we need to
9498      * leave the TB with the PC/IT state just prior to execution of the
9499      * instruction which caused the exception.
9500      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9501      * then the CPUARMState will be wrong and we need to reset it.
9502      * This is handled in the same way as restoration of the
9503      * PC in these situations; we save the value of the condexec bits
9504      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9505      * then uses this to restore them after an exception.
9506      *
9507      * Note that there are no instructions which can read the condexec
9508      * bits, and none which can write non-static values to them, so
9509      * we don't need to care about whether CPUARMState is correct in the
9510      * middle of a TB.
9511      */
9512 
9513     /* Reset the conditional execution bits immediately. This avoids
9514        complications trying to do it at the end of the block.  */
9515     if (dc->condexec_mask || dc->condexec_cond) {
9516         store_cpu_field_constant(0, condexec_bits);
9517     }
9518 }
9519 
9520 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9521 {
9522     DisasContext *dc = container_of(dcbase, DisasContext, base);
9523     /*
9524      * The ECI/ICI bits share PSR bits with the IT bits, so we
9525      * need to reconstitute the bits from the split-out DisasContext
9526      * fields here.
9527      */
9528     uint32_t condexec_bits;
9529     target_ulong pc_arg = dc->base.pc_next;
9530 
9531     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9532         pc_arg &= ~TARGET_PAGE_MASK;
9533     }
9534     if (dc->eci) {
9535         condexec_bits = dc->eci << 4;
9536     } else {
9537         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9538     }
9539     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9540     dc->insn_start = tcg_last_op();
9541 }
9542 
9543 static bool arm_check_kernelpage(DisasContext *dc)
9544 {
9545 #ifdef CONFIG_USER_ONLY
9546     /* Intercept jump to the magic kernel page.  */
9547     if (dc->base.pc_next >= 0xffff0000) {
9548         /* We always get here via a jump, so know we are not in a
9549            conditional execution block.  */
9550         gen_exception_internal(EXCP_KERNEL_TRAP);
9551         dc->base.is_jmp = DISAS_NORETURN;
9552         return true;
9553     }
9554 #endif
9555     return false;
9556 }
9557 
9558 static bool arm_check_ss_active(DisasContext *dc)
9559 {
9560     if (dc->ss_active && !dc->pstate_ss) {
9561         /* Singlestep state is Active-pending.
9562          * If we're in this state at the start of a TB then either
9563          *  a) we just took an exception to an EL which is being debugged
9564          *     and this is the first insn in the exception handler
9565          *  b) debug exceptions were masked and we just unmasked them
9566          *     without changing EL (eg by clearing PSTATE.D)
9567          * In either case we're going to take a swstep exception in the
9568          * "did not step an insn" case, and so the syndrome ISV and EX
9569          * bits should be zero.
9570          */
9571         assert(dc->base.num_insns == 1);
9572         gen_swstep_exception(dc, 0, 0);
9573         dc->base.is_jmp = DISAS_NORETURN;
9574         return true;
9575     }
9576 
9577     return false;
9578 }
9579 
9580 static void arm_post_translate_insn(DisasContext *dc)
9581 {
9582     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9583         if (dc->pc_save != dc->condlabel.pc_save) {
9584             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9585         }
9586         gen_set_label(dc->condlabel.label);
9587         dc->condjmp = 0;
9588     }
9589 }
9590 
9591 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9592 {
9593     DisasContext *dc = container_of(dcbase, DisasContext, base);
9594     CPUARMState *env = cpu->env_ptr;
9595     uint32_t pc = dc->base.pc_next;
9596     unsigned int insn;
9597 
9598     /* Singlestep exceptions have the highest priority. */
9599     if (arm_check_ss_active(dc)) {
9600         dc->base.pc_next = pc + 4;
9601         return;
9602     }
9603 
9604     if (pc & 3) {
9605         /*
9606          * PC alignment fault.  This has priority over the instruction abort
9607          * that we would receive from a translation fault via arm_ldl_code
9608          * (or the execution of the kernelpage entrypoint). This should only
9609          * be possible after an indirect branch, at the start of the TB.
9610          */
9611         assert(dc->base.num_insns == 1);
9612         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9613         dc->base.is_jmp = DISAS_NORETURN;
9614         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9615         return;
9616     }
9617 
9618     if (arm_check_kernelpage(dc)) {
9619         dc->base.pc_next = pc + 4;
9620         return;
9621     }
9622 
9623     dc->pc_curr = pc;
9624     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9625     dc->insn = insn;
9626     dc->base.pc_next = pc + 4;
9627     disas_arm_insn(dc, insn);
9628 
9629     arm_post_translate_insn(dc);
9630 
9631     /* ARM is a fixed-length ISA.  We performed the cross-page check
9632        in init_disas_context by adjusting max_insns.  */
9633 }
9634 
9635 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9636 {
9637     /* Return true if this Thumb insn is always unconditional,
9638      * even inside an IT block. This is true of only a very few
9639      * instructions: BKPT, HLT, and SG.
9640      *
9641      * A larger class of instructions are UNPREDICTABLE if used
9642      * inside an IT block; we do not need to detect those here, because
9643      * what we do by default (perform the cc check and update the IT
9644      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9645      * choice for those situations.
9646      *
9647      * insn is either a 16-bit or a 32-bit instruction; the two are
9648      * distinguishable because for the 16-bit case the top 16 bits
9649      * are zeroes, and that isn't a valid 32-bit encoding.
9650      */
9651     if ((insn & 0xffffff00) == 0xbe00) {
9652         /* BKPT */
9653         return true;
9654     }
9655 
9656     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9657         !arm_dc_feature(s, ARM_FEATURE_M)) {
9658         /* HLT: v8A only. This is unconditional even when it is going to
9659          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9660          * For v7 cores this was a plain old undefined encoding and so
9661          * honours its cc check. (We might be using the encoding as
9662          * a semihosting trap, but we don't change the cc check behaviour
9663          * on that account, because a debugger connected to a real v7A
9664          * core and emulating semihosting traps by catching the UNDEF
9665          * exception would also only see cases where the cc check passed.
9666          * No guest code should be trying to do a HLT semihosting trap
9667          * in an IT block anyway.
9668          */
9669         return true;
9670     }
9671 
9672     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9673         arm_dc_feature(s, ARM_FEATURE_M)) {
9674         /* SG: v8M only */
9675         return true;
9676     }
9677 
9678     return false;
9679 }
9680 
9681 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9682 {
9683     DisasContext *dc = container_of(dcbase, DisasContext, base);
9684     CPUARMState *env = cpu->env_ptr;
9685     uint32_t pc = dc->base.pc_next;
9686     uint32_t insn;
9687     bool is_16bit;
9688     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9689     TCGOp *insn_eci_rewind = NULL;
9690     target_ulong insn_eci_pc_save = -1;
9691 
9692     /* Misaligned thumb PC is architecturally impossible. */
9693     assert((dc->base.pc_next & 1) == 0);
9694 
9695     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9696         dc->base.pc_next = pc + 2;
9697         return;
9698     }
9699 
9700     dc->pc_curr = pc;
9701     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9702     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9703     pc += 2;
9704     if (!is_16bit) {
9705         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9706         insn = insn << 16 | insn2;
9707         pc += 2;
9708     }
9709     dc->base.pc_next = pc;
9710     dc->insn = insn;
9711 
9712     if (dc->pstate_il) {
9713         /*
9714          * Illegal execution state. This has priority over BTI
9715          * exceptions, but comes after instruction abort exceptions.
9716          */
9717         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9718         return;
9719     }
9720 
9721     if (dc->eci) {
9722         /*
9723          * For M-profile continuable instructions, ECI/ICI handling
9724          * falls into these cases:
9725          *  - interrupt-continuable instructions
9726          *     These are the various load/store multiple insns (both
9727          *     integer and fp). The ICI bits indicate the register
9728          *     where the load/store can resume. We make the IMPDEF
9729          *     choice to always do "instruction restart", ie ignore
9730          *     the ICI value and always execute the ldm/stm from the
9731          *     start. So all we need to do is zero PSR.ICI if the
9732          *     insn executes.
9733          *  - MVE instructions subject to beat-wise execution
9734          *     Here the ECI bits indicate which beats have already been
9735          *     executed, and we must honour this. Each insn of this
9736          *     type will handle it correctly. We will update PSR.ECI
9737          *     in the helper function for the insn (some ECI values
9738          *     mean that the following insn also has been partially
9739          *     executed).
9740          *  - Special cases which don't advance ECI
9741          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9742          *     bits untouched.
9743          *  - all other insns (the common case)
9744          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9745          *     We place a rewind-marker here. Insns in the previous
9746          *     three categories will set a flag in the DisasContext.
9747          *     If the flag isn't set after we call disas_thumb_insn()
9748          *     or disas_thumb2_insn() then we know we have a "some other
9749          *     insn" case. We will rewind to the marker (ie throwing away
9750          *     all the generated code) and instead emit "take exception".
9751          */
9752         insn_eci_rewind = tcg_last_op();
9753         insn_eci_pc_save = dc->pc_save;
9754     }
9755 
9756     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9757         uint32_t cond = dc->condexec_cond;
9758 
9759         /*
9760          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9761          * "always"; 0xf is not "never".
9762          */
9763         if (cond < 0x0e) {
9764             arm_skip_unless(dc, cond);
9765         }
9766     }
9767 
9768     if (is_16bit) {
9769         disas_thumb_insn(dc, insn);
9770     } else {
9771         disas_thumb2_insn(dc, insn);
9772     }
9773 
9774     /* Advance the Thumb condexec condition.  */
9775     if (dc->condexec_mask) {
9776         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9777                              ((dc->condexec_mask >> 4) & 1));
9778         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9779         if (dc->condexec_mask == 0) {
9780             dc->condexec_cond = 0;
9781         }
9782     }
9783 
9784     if (dc->eci && !dc->eci_handled) {
9785         /*
9786          * Insn wasn't valid for ECI/ICI at all: undo what we
9787          * just generated and instead emit an exception
9788          */
9789         tcg_remove_ops_after(insn_eci_rewind);
9790         dc->pc_save = insn_eci_pc_save;
9791         dc->condjmp = 0;
9792         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9793     }
9794 
9795     arm_post_translate_insn(dc);
9796 
9797     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9798      * will touch a new page.  This ensures that prefetch aborts occur at
9799      * the right place.
9800      *
9801      * We want to stop the TB if the next insn starts in a new page,
9802      * or if it spans between this page and the next. This means that
9803      * if we're looking at the last halfword in the page we need to
9804      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9805      * or a 32-bit Thumb insn (which won't).
9806      * This is to avoid generating a silly TB with a single 16-bit insn
9807      * in it at the end of this page (which would execute correctly
9808      * but isn't very efficient).
9809      */
9810     if (dc->base.is_jmp == DISAS_NEXT
9811         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9812             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9813                 && insn_crosses_page(env, dc)))) {
9814         dc->base.is_jmp = DISAS_TOO_MANY;
9815     }
9816 }
9817 
9818 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9819 {
9820     DisasContext *dc = container_of(dcbase, DisasContext, base);
9821 
9822     /* At this stage dc->condjmp will only be set when the skipped
9823        instruction was a conditional branch or trap, and the PC has
9824        already been written.  */
9825     gen_set_condexec(dc);
9826     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9827         /* Exception return branches need some special case code at the
9828          * end of the TB, which is complex enough that it has to
9829          * handle the single-step vs not and the condition-failed
9830          * insn codepath itself.
9831          */
9832         gen_bx_excret_final_code(dc);
9833     } else if (unlikely(dc->ss_active)) {
9834         /* Unconditional and "condition passed" instruction codepath. */
9835         switch (dc->base.is_jmp) {
9836         case DISAS_SWI:
9837             gen_ss_advance(dc);
9838             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9839             break;
9840         case DISAS_HVC:
9841             gen_ss_advance(dc);
9842             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9843             break;
9844         case DISAS_SMC:
9845             gen_ss_advance(dc);
9846             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9847             break;
9848         case DISAS_NEXT:
9849         case DISAS_TOO_MANY:
9850         case DISAS_UPDATE_EXIT:
9851         case DISAS_UPDATE_NOCHAIN:
9852             gen_update_pc(dc, curr_insn_len(dc));
9853             /* fall through */
9854         default:
9855             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9856             gen_singlestep_exception(dc);
9857             break;
9858         case DISAS_NORETURN:
9859             break;
9860         }
9861     } else {
9862         /* While branches must always occur at the end of an IT block,
9863            there are a few other things that can cause us to terminate
9864            the TB in the middle of an IT block:
9865             - Exception generating instructions (bkpt, swi, undefined).
9866             - Page boundaries.
9867             - Hardware watchpoints.
9868            Hardware breakpoints have already been handled and skip this code.
9869          */
9870         switch (dc->base.is_jmp) {
9871         case DISAS_NEXT:
9872         case DISAS_TOO_MANY:
9873             gen_goto_tb(dc, 1, curr_insn_len(dc));
9874             break;
9875         case DISAS_UPDATE_NOCHAIN:
9876             gen_update_pc(dc, curr_insn_len(dc));
9877             /* fall through */
9878         case DISAS_JUMP:
9879             gen_goto_ptr();
9880             break;
9881         case DISAS_UPDATE_EXIT:
9882             gen_update_pc(dc, curr_insn_len(dc));
9883             /* fall through */
9884         default:
9885             /* indicate that the hash table must be used to find the next TB */
9886             tcg_gen_exit_tb(NULL, 0);
9887             break;
9888         case DISAS_NORETURN:
9889             /* nothing more to generate */
9890             break;
9891         case DISAS_WFI:
9892             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9893             /*
9894              * The helper doesn't necessarily throw an exception, but we
9895              * must go back to the main loop to check for interrupts anyway.
9896              */
9897             tcg_gen_exit_tb(NULL, 0);
9898             break;
9899         case DISAS_WFE:
9900             gen_helper_wfe(cpu_env);
9901             break;
9902         case DISAS_YIELD:
9903             gen_helper_yield(cpu_env);
9904             break;
9905         case DISAS_SWI:
9906             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9907             break;
9908         case DISAS_HVC:
9909             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9910             break;
9911         case DISAS_SMC:
9912             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9913             break;
9914         }
9915     }
9916 
9917     if (dc->condjmp) {
9918         /* "Condition failed" instruction codepath for the branch/trap insn */
9919         set_disas_label(dc, dc->condlabel);
9920         gen_set_condexec(dc);
9921         if (unlikely(dc->ss_active)) {
9922             gen_update_pc(dc, curr_insn_len(dc));
9923             gen_singlestep_exception(dc);
9924         } else {
9925             gen_goto_tb(dc, 1, curr_insn_len(dc));
9926         }
9927     }
9928 }
9929 
9930 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9931                              CPUState *cpu, FILE *logfile)
9932 {
9933     DisasContext *dc = container_of(dcbase, DisasContext, base);
9934 
9935     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9936     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9937 }
9938 
9939 static const TranslatorOps arm_translator_ops = {
9940     .init_disas_context = arm_tr_init_disas_context,
9941     .tb_start           = arm_tr_tb_start,
9942     .insn_start         = arm_tr_insn_start,
9943     .translate_insn     = arm_tr_translate_insn,
9944     .tb_stop            = arm_tr_tb_stop,
9945     .disas_log          = arm_tr_disas_log,
9946 };
9947 
9948 static const TranslatorOps thumb_translator_ops = {
9949     .init_disas_context = arm_tr_init_disas_context,
9950     .tb_start           = arm_tr_tb_start,
9951     .insn_start         = arm_tr_insn_start,
9952     .translate_insn     = thumb_tr_translate_insn,
9953     .tb_stop            = arm_tr_tb_stop,
9954     .disas_log          = arm_tr_disas_log,
9955 };
9956 
9957 /* generate intermediate code for basic block 'tb'.  */
9958 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9959                            target_ulong pc, void *host_pc)
9960 {
9961     DisasContext dc = { };
9962     const TranslatorOps *ops = &arm_translator_ops;
9963     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9964 
9965     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9966         ops = &thumb_translator_ops;
9967     }
9968 #ifdef TARGET_AARCH64
9969     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9970         ops = &aarch64_translator_ops;
9971     }
9972 #endif
9973 
9974     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9975 }
9976