xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 1f2146f7)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, cpu_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, cpu_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(cpu_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(cpu_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(cpu_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(cpu_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(cpu_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
904         /*
905          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
906          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
907          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
908          */
909         MO_ALIGN_16
910     };
911     g_assert(i < ARRAY_SIZE(mop_align));
912     return mop_align[i];
913 }
914 
915 /*
916  * Abstractions of "generate code to do a guest load/store for
917  * AArch32", where a vaddr is always 32 bits (and is zero
918  * extended if we're a 64 bit core) and  data is also
919  * 32 bits unless specifically doing a 64 bit access.
920  * These functions work like tcg_gen_qemu_{ld,st}* except
921  * that the address argument is TCGv_i32 rather than TCGv.
922  */
923 
924 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926     TCGv addr = tcg_temp_new();
927     tcg_gen_extu_i32_tl(addr, a32);
928 
929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932     }
933     return addr;
934 }
935 
936 /*
937  * Internal routines are used for NEON cases where the endianness
938  * and/or alignment has already been taken into account and manipulated.
939  */
940 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_ld_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951     tcg_gen_qemu_st_i32(val, addr, index, opc);
952 }
953 
954 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
955                               TCGv_i32 a32, int index, MemOp opc)
956 {
957     TCGv addr = gen_aa32_addr(s, a32, opc);
958 
959     tcg_gen_qemu_ld_i64(val, addr, index, opc);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
963         tcg_gen_rotri_i64(val, val, 32);
964     }
965 }
966 
967 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971 
972     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
973     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
974         TCGv_i64 tmp = tcg_temp_new_i64();
975         tcg_gen_rotri_i64(tmp, val, 32);
976         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
977     } else {
978         tcg_gen_qemu_st_i64(val, addr, index, opc);
979     }
980 }
981 
982 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                      int index, MemOp opc)
1002 {
1003     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004 }
1005 
1006 #define DO_GEN_LD(SUFF, OPC)                                            \
1007     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 #define DO_GEN_ST(SUFF, OPC)                                            \
1014     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                          TCGv_i32 a32, int index)       \
1016     {                                                                   \
1017         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018     }
1019 
1020 static inline void gen_hvc(DisasContext *s, int imm16)
1021 {
1022     /* The pre HVC helper handles cases when HVC gets trapped
1023      * as an undefined insn by runtime configuration (ie before
1024      * the insn really executes).
1025      */
1026     gen_update_pc(s, 0);
1027     gen_helper_pre_hvc(cpu_env);
1028     /* Otherwise we will treat this as a real exception which
1029      * happens after execution of the insn. (The distinction matters
1030      * for the PC value reported to the exception handler and also
1031      * for single stepping.)
1032      */
1033     s->svc_imm = imm16;
1034     gen_update_pc(s, curr_insn_len(s));
1035     s->base.is_jmp = DISAS_HVC;
1036 }
1037 
1038 static inline void gen_smc(DisasContext *s)
1039 {
1040     /* As with HVC, we may take an exception either before or after
1041      * the insn executes.
1042      */
1043     gen_update_pc(s, 0);
1044     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1045     gen_update_pc(s, curr_insn_len(s));
1046     s->base.is_jmp = DISAS_SMC;
1047 }
1048 
1049 static void gen_exception_internal_insn(DisasContext *s, int excp)
1050 {
1051     gen_set_condexec(s);
1052     gen_update_pc(s, 0);
1053     gen_exception_internal(excp);
1054     s->base.is_jmp = DISAS_NORETURN;
1055 }
1056 
1057 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058 {
1059     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1060                                           tcg_constant_i32(syndrome), tcg_el);
1061 }
1062 
1063 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064 {
1065     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066 }
1067 
1068 static void gen_exception(int excp, uint32_t syndrome)
1069 {
1070     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1071                                        tcg_constant_i32(syndrome));
1072 }
1073 
1074 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1076 {
1077     if (s->aarch64) {
1078         gen_a64_update_pc(s, pc_diff);
1079     } else {
1080         gen_set_condexec(s);
1081         gen_update_pc(s, pc_diff);
1082     }
1083     gen_exception_el_v(excp, syn, tcg_el);
1084     s->base.is_jmp = DISAS_NORETURN;
1085 }
1086 
1087 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                            uint32_t syn, uint32_t target_el)
1089 {
1090     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                             tcg_constant_i32(target_el));
1092 }
1093 
1094 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                         int excp, uint32_t syn)
1096 {
1097     if (s->aarch64) {
1098         gen_a64_update_pc(s, pc_diff);
1099     } else {
1100         gen_set_condexec(s);
1101         gen_update_pc(s, pc_diff);
1102     }
1103     gen_exception(excp, syn);
1104     s->base.is_jmp = DISAS_NORETURN;
1105 }
1106 
1107 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108 {
1109     gen_set_condexec(s);
1110     gen_update_pc(s, 0);
1111     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 void unallocated_encoding(DisasContext *s)
1116 {
1117     /* Unallocated and reserved encodings are uncategorized */
1118     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119 }
1120 
1121 /* Force a TB lookup after an instruction that changes the CPU state.  */
1122 void gen_lookup_tb(DisasContext *s)
1123 {
1124     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125     s->base.is_jmp = DISAS_EXIT;
1126 }
1127 
1128 static inline void gen_hlt(DisasContext *s, int imm)
1129 {
1130     /* HLT. This has two purposes.
1131      * Architecturally, it is an external halting debug instruction.
1132      * Since QEMU doesn't implement external debug, we treat this as
1133      * it is required for halting debug disabled: it will UNDEF.
1134      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136      * must trigger semihosting even for ARMv7 and earlier, where
1137      * HLT was an undefined encoding.
1138      * In system mode, we don't allow userspace access to
1139      * semihosting, to provide some semblance of security
1140      * (and for consistency with our 32-bit semihosting).
1141      */
1142     if (semihosting_enabled(s->current_el == 0) &&
1143         (imm == (s->thumb ? 0x3c : 0xf000))) {
1144         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145         return;
1146     }
1147 
1148     unallocated_encoding(s);
1149 }
1150 
1151 /*
1152  * Return the offset of a "full" NEON Dreg.
1153  */
1154 long neon_full_reg_offset(unsigned reg)
1155 {
1156     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157 }
1158 
1159 /*
1160  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161  * where 0 is the least significant end of the register.
1162  */
1163 long neon_element_offset(int reg, int element, MemOp memop)
1164 {
1165     int element_size = 1 << (memop & MO_SIZE);
1166     int ofs = element * element_size;
1167 #if HOST_BIG_ENDIAN
1168     /*
1169      * Calculate the offset assuming fully little-endian,
1170      * then XOR to account for the order of the 8-byte units.
1171      */
1172     if (element_size < 8) {
1173         ofs ^= 8 - element_size;
1174     }
1175 #endif
1176     return neon_full_reg_offset(reg) + ofs;
1177 }
1178 
1179 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180 long vfp_reg_offset(bool dp, unsigned reg)
1181 {
1182     if (dp) {
1183         return neon_element_offset(reg, 0, MO_64);
1184     } else {
1185         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186     }
1187 }
1188 
1189 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190 {
1191     long off = neon_element_offset(reg, ele, memop);
1192 
1193     switch (memop) {
1194     case MO_SB:
1195         tcg_gen_ld8s_i32(dest, cpu_env, off);
1196         break;
1197     case MO_UB:
1198         tcg_gen_ld8u_i32(dest, cpu_env, off);
1199         break;
1200     case MO_SW:
1201         tcg_gen_ld16s_i32(dest, cpu_env, off);
1202         break;
1203     case MO_UW:
1204         tcg_gen_ld16u_i32(dest, cpu_env, off);
1205         break;
1206     case MO_UL:
1207     case MO_SL:
1208         tcg_gen_ld_i32(dest, cpu_env, off);
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213 }
1214 
1215 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216 {
1217     long off = neon_element_offset(reg, ele, memop);
1218 
1219     switch (memop) {
1220     case MO_SL:
1221         tcg_gen_ld32s_i64(dest, cpu_env, off);
1222         break;
1223     case MO_UL:
1224         tcg_gen_ld32u_i64(dest, cpu_env, off);
1225         break;
1226     case MO_UQ:
1227         tcg_gen_ld_i64(dest, cpu_env, off);
1228         break;
1229     default:
1230         g_assert_not_reached();
1231     }
1232 }
1233 
1234 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_8:
1240         tcg_gen_st8_i32(src, cpu_env, off);
1241         break;
1242     case MO_16:
1243         tcg_gen_st16_i32(src, cpu_env, off);
1244         break;
1245     case MO_32:
1246         tcg_gen_st_i32(src, cpu_env, off);
1247         break;
1248     default:
1249         g_assert_not_reached();
1250     }
1251 }
1252 
1253 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254 {
1255     long off = neon_element_offset(reg, ele, memop);
1256 
1257     switch (memop) {
1258     case MO_32:
1259         tcg_gen_st32_i64(src, cpu_env, off);
1260         break;
1261     case MO_64:
1262         tcg_gen_st_i64(src, cpu_env, off);
1263         break;
1264     default:
1265         g_assert_not_reached();
1266     }
1267 }
1268 
1269 #define ARM_CP_RW_BIT   (1 << 20)
1270 
1271 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272 {
1273     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274 }
1275 
1276 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277 {
1278     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279 }
1280 
1281 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282 {
1283     TCGv_i32 var = tcg_temp_new_i32();
1284     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285     return var;
1286 }
1287 
1288 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289 {
1290     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291 }
1292 
1293 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294 {
1295     iwmmxt_store_reg(cpu_M0, rn);
1296 }
1297 
1298 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299 {
1300     iwmmxt_load_reg(cpu_M0, rn);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316 {
1317     iwmmxt_load_reg(cpu_V1, rn);
1318     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319 }
1320 
1321 #define IWMMXT_OP(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV(name) \
1329 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330 { \
1331     iwmmxt_load_reg(cpu_V1, rn); \
1332     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1333 }
1334 
1335 #define IWMMXT_OP_ENV_SIZE(name) \
1336 IWMMXT_OP_ENV(name##b) \
1337 IWMMXT_OP_ENV(name##w) \
1338 IWMMXT_OP_ENV(name##l)
1339 
1340 #define IWMMXT_OP_ENV1(name) \
1341 static inline void gen_op_iwmmxt_##name##_M0(void) \
1342 { \
1343     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1344 }
1345 
1346 IWMMXT_OP(maddsq)
1347 IWMMXT_OP(madduq)
1348 IWMMXT_OP(sadb)
1349 IWMMXT_OP(sadw)
1350 IWMMXT_OP(mulslw)
1351 IWMMXT_OP(mulshw)
1352 IWMMXT_OP(mululw)
1353 IWMMXT_OP(muluhw)
1354 IWMMXT_OP(macsw)
1355 IWMMXT_OP(macuw)
1356 
1357 IWMMXT_OP_ENV_SIZE(unpackl)
1358 IWMMXT_OP_ENV_SIZE(unpackh)
1359 
1360 IWMMXT_OP_ENV1(unpacklub)
1361 IWMMXT_OP_ENV1(unpackluw)
1362 IWMMXT_OP_ENV1(unpacklul)
1363 IWMMXT_OP_ENV1(unpackhub)
1364 IWMMXT_OP_ENV1(unpackhuw)
1365 IWMMXT_OP_ENV1(unpackhul)
1366 IWMMXT_OP_ENV1(unpacklsb)
1367 IWMMXT_OP_ENV1(unpacklsw)
1368 IWMMXT_OP_ENV1(unpacklsl)
1369 IWMMXT_OP_ENV1(unpackhsb)
1370 IWMMXT_OP_ENV1(unpackhsw)
1371 IWMMXT_OP_ENV1(unpackhsl)
1372 
1373 IWMMXT_OP_ENV_SIZE(cmpeq)
1374 IWMMXT_OP_ENV_SIZE(cmpgtu)
1375 IWMMXT_OP_ENV_SIZE(cmpgts)
1376 
1377 IWMMXT_OP_ENV_SIZE(mins)
1378 IWMMXT_OP_ENV_SIZE(minu)
1379 IWMMXT_OP_ENV_SIZE(maxs)
1380 IWMMXT_OP_ENV_SIZE(maxu)
1381 
1382 IWMMXT_OP_ENV_SIZE(subn)
1383 IWMMXT_OP_ENV_SIZE(addn)
1384 IWMMXT_OP_ENV_SIZE(subu)
1385 IWMMXT_OP_ENV_SIZE(addu)
1386 IWMMXT_OP_ENV_SIZE(subs)
1387 IWMMXT_OP_ENV_SIZE(adds)
1388 
1389 IWMMXT_OP_ENV(avgb0)
1390 IWMMXT_OP_ENV(avgb1)
1391 IWMMXT_OP_ENV(avgw0)
1392 IWMMXT_OP_ENV(avgw1)
1393 
1394 IWMMXT_OP_ENV(packuw)
1395 IWMMXT_OP_ENV(packul)
1396 IWMMXT_OP_ENV(packuq)
1397 IWMMXT_OP_ENV(packsw)
1398 IWMMXT_OP_ENV(packsl)
1399 IWMMXT_OP_ENV(packsq)
1400 
1401 static void gen_op_iwmmxt_set_mup(void)
1402 {
1403     TCGv_i32 tmp;
1404     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405     tcg_gen_ori_i32(tmp, tmp, 2);
1406     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407 }
1408 
1409 static void gen_op_iwmmxt_set_cup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 1);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_setpsr_nz(void)
1418 {
1419     TCGv_i32 tmp = tcg_temp_new_i32();
1420     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422 }
1423 
1424 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425 {
1426     iwmmxt_load_reg(cpu_V1, rn);
1427     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429 }
1430 
1431 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                      TCGv_i32 dest)
1433 {
1434     int rd;
1435     uint32_t offset;
1436     TCGv_i32 tmp;
1437 
1438     rd = (insn >> 16) & 0xf;
1439     tmp = load_reg(s, rd);
1440 
1441     offset = (insn & 0xff) << ((insn >> 7) & 2);
1442     if (insn & (1 << 24)) {
1443         /* Pre indexed */
1444         if (insn & (1 << 23))
1445             tcg_gen_addi_i32(tmp, tmp, offset);
1446         else
1447             tcg_gen_addi_i32(tmp, tmp, -offset);
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 21)) {
1450             store_reg(s, rd, tmp);
1451         }
1452     } else if (insn & (1 << 21)) {
1453         /* Post indexed */
1454         tcg_gen_mov_i32(dest, tmp);
1455         if (insn & (1 << 23))
1456             tcg_gen_addi_i32(tmp, tmp, offset);
1457         else
1458             tcg_gen_addi_i32(tmp, tmp, -offset);
1459         store_reg(s, rd, tmp);
1460     } else if (!(insn & (1 << 23)))
1461         return 1;
1462     return 0;
1463 }
1464 
1465 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466 {
1467     int rd = (insn >> 0) & 0xf;
1468     TCGv_i32 tmp;
1469 
1470     if (insn & (1 << 8)) {
1471         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472             return 1;
1473         } else {
1474             tmp = iwmmxt_load_creg(rd);
1475         }
1476     } else {
1477         tmp = tcg_temp_new_i32();
1478         iwmmxt_load_reg(cpu_V0, rd);
1479         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480     }
1481     tcg_gen_andi_i32(tmp, tmp, mask);
1482     tcg_gen_mov_i32(dest, tmp);
1483     return 0;
1484 }
1485 
1486 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487    (ie. an undefined instruction).  */
1488 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489 {
1490     int rd, wrd;
1491     int rdhi, rdlo, rd0, rd1, i;
1492     TCGv_i32 addr;
1493     TCGv_i32 tmp, tmp2, tmp3;
1494 
1495     if ((insn & 0x0e000e00) == 0x0c000000) {
1496         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497             wrd = insn & 0xf;
1498             rdlo = (insn >> 12) & 0xf;
1499             rdhi = (insn >> 16) & 0xf;
1500             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                 iwmmxt_load_reg(cpu_V0, wrd);
1502                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504             } else {                                    /* TMCRR */
1505                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                 iwmmxt_store_reg(cpu_V0, wrd);
1507                 gen_op_iwmmxt_set_mup();
1508             }
1509             return 0;
1510         }
1511 
1512         wrd = (insn >> 12) & 0xf;
1513         addr = tcg_temp_new_i32();
1514         if (gen_iwmmxt_address(s, insn, addr)) {
1515             return 1;
1516         }
1517         if (insn & ARM_CP_RW_BIT) {
1518             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                 tmp = tcg_temp_new_i32();
1520                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                 iwmmxt_store_creg(wrd, tmp);
1522             } else {
1523                 i = 1;
1524                 if (insn & (1 << 8)) {
1525                     if (insn & (1 << 22)) {             /* WLDRD */
1526                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                         i = 0;
1528                     } else {                            /* WLDRW wRd */
1529                         tmp = tcg_temp_new_i32();
1530                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 } else {
1533                     tmp = tcg_temp_new_i32();
1534                     if (insn & (1 << 22)) {             /* WLDRH */
1535                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                     } else {                            /* WLDRB */
1537                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                     }
1539                 }
1540                 if (i) {
1541                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                 }
1543                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1544             }
1545         } else {
1546             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                 tmp = iwmmxt_load_creg(wrd);
1548                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549             } else {
1550                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                 tmp = tcg_temp_new_i32();
1552                 if (insn & (1 << 8)) {
1553                     if (insn & (1 << 22)) {             /* WSTRD */
1554                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                     } else {                            /* WSTRW wRd */
1556                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                     }
1559                 } else {
1560                     if (insn & (1 << 22)) {             /* WSTRH */
1561                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                     } else {                            /* WSTRB */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 }
1568             }
1569         }
1570         return 0;
1571     }
1572 
1573     if ((insn & 0x0f000000) != 0x0e000000)
1574         return 1;
1575 
1576     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577     case 0x000:                                                 /* WOR */
1578         wrd = (insn >> 12) & 0xf;
1579         rd0 = (insn >> 0) & 0xf;
1580         rd1 = (insn >> 16) & 0xf;
1581         gen_op_iwmmxt_movq_M0_wRn(rd0);
1582         gen_op_iwmmxt_orq_M0_wRn(rd1);
1583         gen_op_iwmmxt_setpsr_nz();
1584         gen_op_iwmmxt_movq_wRn_M0(wrd);
1585         gen_op_iwmmxt_set_mup();
1586         gen_op_iwmmxt_set_cup();
1587         break;
1588     case 0x011:                                                 /* TMCR */
1589         if (insn & 0xf)
1590             return 1;
1591         rd = (insn >> 12) & 0xf;
1592         wrd = (insn >> 16) & 0xf;
1593         switch (wrd) {
1594         case ARM_IWMMXT_wCID:
1595         case ARM_IWMMXT_wCASF:
1596             break;
1597         case ARM_IWMMXT_wCon:
1598             gen_op_iwmmxt_set_cup();
1599             /* Fall through.  */
1600         case ARM_IWMMXT_wCSSF:
1601             tmp = iwmmxt_load_creg(wrd);
1602             tmp2 = load_reg(s, rd);
1603             tcg_gen_andc_i32(tmp, tmp, tmp2);
1604             iwmmxt_store_creg(wrd, tmp);
1605             break;
1606         case ARM_IWMMXT_wCGR0:
1607         case ARM_IWMMXT_wCGR1:
1608         case ARM_IWMMXT_wCGR2:
1609         case ARM_IWMMXT_wCGR3:
1610             gen_op_iwmmxt_set_cup();
1611             tmp = load_reg(s, rd);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         default:
1615             return 1;
1616         }
1617         break;
1618     case 0x100:                                                 /* WXOR */
1619         wrd = (insn >> 12) & 0xf;
1620         rd0 = (insn >> 0) & 0xf;
1621         rd1 = (insn >> 16) & 0xf;
1622         gen_op_iwmmxt_movq_M0_wRn(rd0);
1623         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624         gen_op_iwmmxt_setpsr_nz();
1625         gen_op_iwmmxt_movq_wRn_M0(wrd);
1626         gen_op_iwmmxt_set_mup();
1627         gen_op_iwmmxt_set_cup();
1628         break;
1629     case 0x111:                                                 /* TMRC */
1630         if (insn & 0xf)
1631             return 1;
1632         rd = (insn >> 12) & 0xf;
1633         wrd = (insn >> 16) & 0xf;
1634         tmp = iwmmxt_load_creg(wrd);
1635         store_reg(s, rd, tmp);
1636         break;
1637     case 0x300:                                                 /* WANDN */
1638         wrd = (insn >> 12) & 0xf;
1639         rd0 = (insn >> 0) & 0xf;
1640         rd1 = (insn >> 16) & 0xf;
1641         gen_op_iwmmxt_movq_M0_wRn(rd0);
1642         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643         gen_op_iwmmxt_andq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x200:                                                 /* WAND */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 0) & 0xf;
1652         rd1 = (insn >> 16) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         gen_op_iwmmxt_andq_M0_wRn(rd1);
1655         gen_op_iwmmxt_setpsr_nz();
1656         gen_op_iwmmxt_movq_wRn_M0(wrd);
1657         gen_op_iwmmxt_set_mup();
1658         gen_op_iwmmxt_set_cup();
1659         break;
1660     case 0x810: case 0xa10:                             /* WMADD */
1661         wrd = (insn >> 12) & 0xf;
1662         rd0 = (insn >> 0) & 0xf;
1663         rd1 = (insn >> 16) & 0xf;
1664         gen_op_iwmmxt_movq_M0_wRn(rd0);
1665         if (insn & (1 << 21))
1666             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667         else
1668             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669         gen_op_iwmmxt_movq_wRn_M0(wrd);
1670         gen_op_iwmmxt_set_mup();
1671         break;
1672     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673         wrd = (insn >> 12) & 0xf;
1674         rd0 = (insn >> 16) & 0xf;
1675         rd1 = (insn >> 0) & 0xf;
1676         gen_op_iwmmxt_movq_M0_wRn(rd0);
1677         switch ((insn >> 22) & 3) {
1678         case 0:
1679             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680             break;
1681         case 1:
1682             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683             break;
1684         case 2:
1685             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686             break;
1687         case 3:
1688             return 1;
1689         }
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 16) & 0xf;
1697         rd1 = (insn >> 0) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         switch ((insn >> 22) & 3) {
1700         case 0:
1701             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702             break;
1703         case 1:
1704             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705             break;
1706         case 2:
1707             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708             break;
1709         case 3:
1710             return 1;
1711         }
1712         gen_op_iwmmxt_movq_wRn_M0(wrd);
1713         gen_op_iwmmxt_set_mup();
1714         gen_op_iwmmxt_set_cup();
1715         break;
1716     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717         wrd = (insn >> 12) & 0xf;
1718         rd0 = (insn >> 16) & 0xf;
1719         rd1 = (insn >> 0) & 0xf;
1720         gen_op_iwmmxt_movq_M0_wRn(rd0);
1721         if (insn & (1 << 22))
1722             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723         else
1724             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725         if (!(insn & (1 << 20)))
1726             gen_op_iwmmxt_addl_M0_wRn(wrd);
1727         gen_op_iwmmxt_movq_wRn_M0(wrd);
1728         gen_op_iwmmxt_set_mup();
1729         break;
1730     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731         wrd = (insn >> 12) & 0xf;
1732         rd0 = (insn >> 16) & 0xf;
1733         rd1 = (insn >> 0) & 0xf;
1734         gen_op_iwmmxt_movq_M0_wRn(rd0);
1735         if (insn & (1 << 21)) {
1736             if (insn & (1 << 20))
1737                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738             else
1739                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740         } else {
1741             if (insn & (1 << 20))
1742                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743             else
1744                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745         }
1746         gen_op_iwmmxt_movq_wRn_M0(wrd);
1747         gen_op_iwmmxt_set_mup();
1748         break;
1749     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750         wrd = (insn >> 12) & 0xf;
1751         rd0 = (insn >> 16) & 0xf;
1752         rd1 = (insn >> 0) & 0xf;
1753         gen_op_iwmmxt_movq_M0_wRn(rd0);
1754         if (insn & (1 << 21))
1755             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756         else
1757             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758         if (!(insn & (1 << 20))) {
1759             iwmmxt_load_reg(cpu_V1, wrd);
1760             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761         }
1762         gen_op_iwmmxt_movq_wRn_M0(wrd);
1763         gen_op_iwmmxt_set_mup();
1764         break;
1765     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766         wrd = (insn >> 12) & 0xf;
1767         rd0 = (insn >> 16) & 0xf;
1768         rd1 = (insn >> 0) & 0xf;
1769         gen_op_iwmmxt_movq_M0_wRn(rd0);
1770         switch ((insn >> 22) & 3) {
1771         case 0:
1772             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773             break;
1774         case 1:
1775             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776             break;
1777         case 2:
1778             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779             break;
1780         case 3:
1781             return 1;
1782         }
1783         gen_op_iwmmxt_movq_wRn_M0(wrd);
1784         gen_op_iwmmxt_set_mup();
1785         gen_op_iwmmxt_set_cup();
1786         break;
1787     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 22)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813         tcg_gen_andi_i32(tmp, tmp, 7);
1814         iwmmxt_load_reg(cpu_V1, rd1);
1815         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816         gen_op_iwmmxt_movq_wRn_M0(wrd);
1817         gen_op_iwmmxt_set_mup();
1818         break;
1819     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820         if (((insn >> 6) & 3) == 3)
1821             return 1;
1822         rd = (insn >> 12) & 0xf;
1823         wrd = (insn >> 16) & 0xf;
1824         tmp = load_reg(s, rd);
1825         gen_op_iwmmxt_movq_M0_wRn(wrd);
1826         switch ((insn >> 6) & 3) {
1827         case 0:
1828             tmp2 = tcg_constant_i32(0xff);
1829             tmp3 = tcg_constant_i32((insn & 7) << 3);
1830             break;
1831         case 1:
1832             tmp2 = tcg_constant_i32(0xffff);
1833             tmp3 = tcg_constant_i32((insn & 3) << 4);
1834             break;
1835         case 2:
1836             tmp2 = tcg_constant_i32(0xffffffff);
1837             tmp3 = tcg_constant_i32((insn & 1) << 5);
1838             break;
1839         default:
1840             g_assert_not_reached();
1841         }
1842         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         break;
1846     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847         rd = (insn >> 12) & 0xf;
1848         wrd = (insn >> 16) & 0xf;
1849         if (rd == 15 || ((insn >> 22) & 3) == 3)
1850             return 1;
1851         gen_op_iwmmxt_movq_M0_wRn(wrd);
1852         tmp = tcg_temp_new_i32();
1853         switch ((insn >> 22) & 3) {
1854         case 0:
1855             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857             if (insn & 8) {
1858                 tcg_gen_ext8s_i32(tmp, tmp);
1859             } else {
1860                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1861             }
1862             break;
1863         case 1:
1864             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866             if (insn & 8) {
1867                 tcg_gen_ext16s_i32(tmp, tmp);
1868             } else {
1869                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870             }
1871             break;
1872         case 2:
1873             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875             break;
1876         }
1877         store_reg(s, rd, tmp);
1878         break;
1879     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881             return 1;
1882         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883         switch ((insn >> 22) & 3) {
1884         case 0:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886             break;
1887         case 1:
1888             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889             break;
1890         case 2:
1891             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892             break;
1893         }
1894         tcg_gen_shli_i32(tmp, tmp, 28);
1895         gen_set_nzcv(tmp);
1896         break;
1897     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898         if (((insn >> 6) & 3) == 3)
1899             return 1;
1900         rd = (insn >> 12) & 0xf;
1901         wrd = (insn >> 16) & 0xf;
1902         tmp = load_reg(s, rd);
1903         switch ((insn >> 6) & 3) {
1904         case 0:
1905             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906             break;
1907         case 1:
1908             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909             break;
1910         case 2:
1911             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912             break;
1913         }
1914         gen_op_iwmmxt_movq_wRn_M0(wrd);
1915         gen_op_iwmmxt_set_mup();
1916         break;
1917     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919             return 1;
1920         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921         tmp2 = tcg_temp_new_i32();
1922         tcg_gen_mov_i32(tmp2, tmp);
1923         switch ((insn >> 22) & 3) {
1924         case 0:
1925             for (i = 0; i < 7; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 1:
1931             for (i = 0; i < 3; i ++) {
1932                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                 tcg_gen_and_i32(tmp, tmp, tmp2);
1934             }
1935             break;
1936         case 2:
1937             tcg_gen_shli_i32(tmp2, tmp2, 16);
1938             tcg_gen_and_i32(tmp, tmp, tmp2);
1939             break;
1940         }
1941         gen_set_nzcv(tmp);
1942         break;
1943     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944         wrd = (insn >> 12) & 0xf;
1945         rd0 = (insn >> 16) & 0xf;
1946         gen_op_iwmmxt_movq_M0_wRn(rd0);
1947         switch ((insn >> 22) & 3) {
1948         case 0:
1949             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950             break;
1951         case 1:
1952             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953             break;
1954         case 2:
1955             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956             break;
1957         case 3:
1958             return 1;
1959         }
1960         gen_op_iwmmxt_movq_wRn_M0(wrd);
1961         gen_op_iwmmxt_set_mup();
1962         break;
1963     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965             return 1;
1966         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967         tmp2 = tcg_temp_new_i32();
1968         tcg_gen_mov_i32(tmp2, tmp);
1969         switch ((insn >> 22) & 3) {
1970         case 0:
1971             for (i = 0; i < 7; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 1:
1977             for (i = 0; i < 3; i ++) {
1978                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                 tcg_gen_or_i32(tmp, tmp, tmp2);
1980             }
1981             break;
1982         case 2:
1983             tcg_gen_shli_i32(tmp2, tmp2, 16);
1984             tcg_gen_or_i32(tmp, tmp, tmp2);
1985             break;
1986         }
1987         gen_set_nzcv(tmp);
1988         break;
1989     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990         rd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993             return 1;
1994         gen_op_iwmmxt_movq_M0_wRn(rd0);
1995         tmp = tcg_temp_new_i32();
1996         switch ((insn >> 22) & 3) {
1997         case 0:
1998             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999             break;
2000         case 1:
2001             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002             break;
2003         case 2:
2004             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005             break;
2006         }
2007         store_reg(s, rd, tmp);
2008         break;
2009     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011         wrd = (insn >> 12) & 0xf;
2012         rd0 = (insn >> 16) & 0xf;
2013         rd1 = (insn >> 0) & 0xf;
2014         gen_op_iwmmxt_movq_M0_wRn(rd0);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021             break;
2022         case 1:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027             break;
2028         case 2:
2029             if (insn & (1 << 21))
2030                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031             else
2032                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033             break;
2034         case 3:
2035             return 1;
2036         }
2037         gen_op_iwmmxt_movq_wRn_M0(wrd);
2038         gen_op_iwmmxt_set_mup();
2039         gen_op_iwmmxt_set_cup();
2040         break;
2041     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043         wrd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsb_M0();
2050             else
2051                 gen_op_iwmmxt_unpacklub_M0();
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsw_M0();
2056             else
2057                 gen_op_iwmmxt_unpackluw_M0();
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_unpacklsl_M0();
2062             else
2063                 gen_op_iwmmxt_unpacklul_M0();
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhuw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpackhsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpackhul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104     case 0x214: case 0x614: case 0xa14: case 0xe14:
2105         if (((insn >> 22) & 3) == 0)
2106             return 1;
2107         wrd = (insn >> 12) & 0xf;
2108         rd0 = (insn >> 16) & 0xf;
2109         gen_op_iwmmxt_movq_M0_wRn(rd0);
2110         tmp = tcg_temp_new_i32();
2111         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112             return 1;
2113         }
2114         switch ((insn >> 22) & 3) {
2115         case 1:
2116             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2117             break;
2118         case 2:
2119             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2120             break;
2121         case 3:
2122             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2123             break;
2124         }
2125         gen_op_iwmmxt_movq_wRn_M0(wrd);
2126         gen_op_iwmmxt_set_mup();
2127         gen_op_iwmmxt_set_cup();
2128         break;
2129     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130     case 0x014: case 0x414: case 0x814: case 0xc14:
2131         if (((insn >> 22) & 3) == 0)
2132             return 1;
2133         wrd = (insn >> 12) & 0xf;
2134         rd0 = (insn >> 16) & 0xf;
2135         gen_op_iwmmxt_movq_M0_wRn(rd0);
2136         tmp = tcg_temp_new_i32();
2137         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138             return 1;
2139         }
2140         switch ((insn >> 22) & 3) {
2141         case 1:
2142             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2143             break;
2144         case 2:
2145             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2146             break;
2147         case 3:
2148             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2149             break;
2150         }
2151         gen_op_iwmmxt_movq_wRn_M0(wrd);
2152         gen_op_iwmmxt_set_mup();
2153         gen_op_iwmmxt_set_cup();
2154         break;
2155     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156     case 0x114: case 0x514: case 0x914: case 0xd14:
2157         if (((insn >> 22) & 3) == 0)
2158             return 1;
2159         wrd = (insn >> 12) & 0xf;
2160         rd0 = (insn >> 16) & 0xf;
2161         gen_op_iwmmxt_movq_M0_wRn(rd0);
2162         tmp = tcg_temp_new_i32();
2163         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164             return 1;
2165         }
2166         switch ((insn >> 22) & 3) {
2167         case 1:
2168             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2169             break;
2170         case 2:
2171             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2172             break;
2173         case 3:
2174             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2175             break;
2176         }
2177         gen_op_iwmmxt_movq_wRn_M0(wrd);
2178         gen_op_iwmmxt_set_mup();
2179         gen_op_iwmmxt_set_cup();
2180         break;
2181     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182     case 0x314: case 0x714: case 0xb14: case 0xf14:
2183         if (((insn >> 22) & 3) == 0)
2184             return 1;
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         gen_op_iwmmxt_movq_M0_wRn(rd0);
2188         tmp = tcg_temp_new_i32();
2189         switch ((insn >> 22) & 3) {
2190         case 1:
2191             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2195             break;
2196         case 2:
2197             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2201             break;
2202         case 3:
2203             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                 return 1;
2205             }
2206             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2207             break;
2208         }
2209         gen_op_iwmmxt_movq_wRn_M0(wrd);
2210         gen_op_iwmmxt_set_mup();
2211         gen_op_iwmmxt_set_cup();
2212         break;
2213     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215         wrd = (insn >> 12) & 0xf;
2216         rd0 = (insn >> 16) & 0xf;
2217         rd1 = (insn >> 0) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         switch ((insn >> 22) & 3) {
2220         case 0:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2225             break;
2226         case 1:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231             break;
2232         case 2:
2233             if (insn & (1 << 21))
2234                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235             else
2236                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2237             break;
2238         case 3:
2239             return 1;
2240         }
2241         gen_op_iwmmxt_movq_wRn_M0(wrd);
2242         gen_op_iwmmxt_set_mup();
2243         break;
2244     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246         wrd = (insn >> 12) & 0xf;
2247         rd0 = (insn >> 16) & 0xf;
2248         rd1 = (insn >> 0) & 0xf;
2249         gen_op_iwmmxt_movq_M0_wRn(rd0);
2250         switch ((insn >> 22) & 3) {
2251         case 0:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256             break;
2257         case 1:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262             break;
2263         case 2:
2264             if (insn & (1 << 21))
2265                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266             else
2267                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268             break;
2269         case 3:
2270             return 1;
2271         }
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         break;
2275     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276     case 0x402: case 0x502: case 0x602: case 0x702:
2277         wrd = (insn >> 12) & 0xf;
2278         rd0 = (insn >> 16) & 0xf;
2279         rd1 = (insn >> 0) & 0xf;
2280         gen_op_iwmmxt_movq_M0_wRn(rd0);
2281         iwmmxt_load_reg(cpu_V1, rd1);
2282         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                 tcg_constant_i32((insn >> 20) & 3));
2284         gen_op_iwmmxt_movq_wRn_M0(wrd);
2285         gen_op_iwmmxt_set_mup();
2286         break;
2287     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291         wrd = (insn >> 12) & 0xf;
2292         rd0 = (insn >> 16) & 0xf;
2293         rd1 = (insn >> 0) & 0xf;
2294         gen_op_iwmmxt_movq_M0_wRn(rd0);
2295         switch ((insn >> 20) & 0xf) {
2296         case 0x0:
2297             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298             break;
2299         case 0x1:
2300             gen_op_iwmmxt_subub_M0_wRn(rd1);
2301             break;
2302         case 0x3:
2303             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304             break;
2305         case 0x4:
2306             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307             break;
2308         case 0x5:
2309             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310             break;
2311         case 0x7:
2312             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313             break;
2314         case 0x8:
2315             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316             break;
2317         case 0x9:
2318             gen_op_iwmmxt_subul_M0_wRn(rd1);
2319             break;
2320         case 0xb:
2321             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322             break;
2323         default:
2324             return 1;
2325         }
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         gen_op_iwmmxt_set_cup();
2329         break;
2330     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2339         gen_op_iwmmxt_movq_wRn_M0(wrd);
2340         gen_op_iwmmxt_set_mup();
2341         gen_op_iwmmxt_set_cup();
2342         break;
2343     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344     case 0x418: case 0x518: case 0x618: case 0x718:
2345     case 0x818: case 0x918: case 0xa18: case 0xb18:
2346     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347         wrd = (insn >> 12) & 0xf;
2348         rd0 = (insn >> 16) & 0xf;
2349         rd1 = (insn >> 0) & 0xf;
2350         gen_op_iwmmxt_movq_M0_wRn(rd0);
2351         switch ((insn >> 20) & 0xf) {
2352         case 0x0:
2353             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354             break;
2355         case 0x1:
2356             gen_op_iwmmxt_addub_M0_wRn(rd1);
2357             break;
2358         case 0x3:
2359             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360             break;
2361         case 0x4:
2362             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363             break;
2364         case 0x5:
2365             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366             break;
2367         case 0x7:
2368             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369             break;
2370         case 0x8:
2371             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372             break;
2373         case 0x9:
2374             gen_op_iwmmxt_addul_M0_wRn(rd1);
2375             break;
2376         case 0xb:
2377             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378             break;
2379         default:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387     case 0x408: case 0x508: case 0x608: case 0x708:
2388     case 0x808: case 0x908: case 0xa08: case 0xb08:
2389     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391             return 1;
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         rd1 = (insn >> 0) & 0xf;
2395         gen_op_iwmmxt_movq_M0_wRn(rd0);
2396         switch ((insn >> 22) & 3) {
2397         case 1:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402             break;
2403         case 2:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2408             break;
2409         case 3:
2410             if (insn & (1 << 21))
2411                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412             else
2413                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414             break;
2415         }
2416         gen_op_iwmmxt_movq_wRn_M0(wrd);
2417         gen_op_iwmmxt_set_mup();
2418         gen_op_iwmmxt_set_cup();
2419         break;
2420     case 0x201: case 0x203: case 0x205: case 0x207:
2421     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422     case 0x211: case 0x213: case 0x215: case 0x217:
2423     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424         wrd = (insn >> 5) & 0xf;
2425         rd0 = (insn >> 12) & 0xf;
2426         rd1 = (insn >> 0) & 0xf;
2427         if (rd0 == 0xf || rd1 == 0xf)
2428             return 1;
2429         gen_op_iwmmxt_movq_M0_wRn(wrd);
2430         tmp = load_reg(s, rd0);
2431         tmp2 = load_reg(s, rd1);
2432         switch ((insn >> 16) & 0xf) {
2433         case 0x0:                                       /* TMIA */
2434             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435             break;
2436         case 0x8:                                       /* TMIAPH */
2437             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440             if (insn & (1 << 16))
2441                 tcg_gen_shri_i32(tmp, tmp, 16);
2442             if (insn & (1 << 17))
2443                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2444             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445             break;
2446         default:
2447             return 1;
2448         }
2449         gen_op_iwmmxt_movq_wRn_M0(wrd);
2450         gen_op_iwmmxt_set_mup();
2451         break;
2452     default:
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460    (ie. an undefined instruction).  */
2461 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462 {
2463     int acc, rd0, rd1, rdhi, rdlo;
2464     TCGv_i32 tmp, tmp2;
2465 
2466     if ((insn & 0x0ff00f10) == 0x0e200010) {
2467         /* Multiply with Internal Accumulate Format */
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = insn & 0xf;
2470         acc = (insn >> 5) & 7;
2471 
2472         if (acc != 0)
2473             return 1;
2474 
2475         tmp = load_reg(s, rd0);
2476         tmp2 = load_reg(s, rd1);
2477         switch ((insn >> 16) & 0xf) {
2478         case 0x0:                                       /* MIA */
2479             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480             break;
2481         case 0x8:                                       /* MIAPH */
2482             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483             break;
2484         case 0xc:                                       /* MIABB */
2485         case 0xd:                                       /* MIABT */
2486         case 0xe:                                       /* MIATB */
2487         case 0xf:                                       /* MIATT */
2488             if (insn & (1 << 16))
2489                 tcg_gen_shri_i32(tmp, tmp, 16);
2490             if (insn & (1 << 17))
2491                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2492             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         default:
2495             return 1;
2496         }
2497 
2498         gen_op_iwmmxt_movq_wRn_M0(acc);
2499         return 0;
2500     }
2501 
2502     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503         /* Internal Accumulator Access Format */
2504         rdhi = (insn >> 16) & 0xf;
2505         rdlo = (insn >> 12) & 0xf;
2506         acc = insn & 7;
2507 
2508         if (acc != 0)
2509             return 1;
2510 
2511         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512             iwmmxt_load_reg(cpu_V0, acc);
2513             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516         } else {                                        /* MAR */
2517             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518             iwmmxt_store_reg(cpu_V0, acc);
2519         }
2520         return 0;
2521     }
2522 
2523     return 1;
2524 }
2525 
2526 static void gen_goto_ptr(void)
2527 {
2528     tcg_gen_lookup_and_goto_ptr();
2529 }
2530 
2531 /* This will end the TB but doesn't guarantee we'll return to
2532  * cpu_loop_exec. Any live exit_requests will be processed as we
2533  * enter the next TB.
2534  */
2535 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536 {
2537     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538         /*
2539          * For pcrel, the pc must always be up-to-date on entry to
2540          * the linked TB, so that it can use simple additions for all
2541          * further adjustments.  For !pcrel, the linked TB is compiled
2542          * to know its full virtual address, so we can delay the
2543          * update to pc to the unlinked path.  A long chain of links
2544          * can thus avoid many updates to the PC.
2545          */
2546         if (tb_cflags(s->base.tb) & CF_PCREL) {
2547             gen_update_pc(s, diff);
2548             tcg_gen_goto_tb(n);
2549         } else {
2550             tcg_gen_goto_tb(n);
2551             gen_update_pc(s, diff);
2552         }
2553         tcg_gen_exit_tb(s->base.tb, n);
2554     } else {
2555         gen_update_pc(s, diff);
2556         gen_goto_ptr();
2557     }
2558     s->base.is_jmp = DISAS_NORETURN;
2559 }
2560 
2561 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2562 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563 {
2564     if (unlikely(s->ss_active)) {
2565         /* An indirect jump so that we still trigger the debug exception.  */
2566         gen_update_pc(s, diff);
2567         s->base.is_jmp = DISAS_JUMP;
2568         return;
2569     }
2570     switch (s->base.is_jmp) {
2571     case DISAS_NEXT:
2572     case DISAS_TOO_MANY:
2573     case DISAS_NORETURN:
2574         /*
2575          * The normal case: just go to the destination TB.
2576          * NB: NORETURN happens if we generate code like
2577          *    gen_brcondi(l);
2578          *    gen_jmp();
2579          *    gen_set_label(l);
2580          *    gen_jmp();
2581          * on the second call to gen_jmp().
2582          */
2583         gen_goto_tb(s, tbno, diff);
2584         break;
2585     case DISAS_UPDATE_NOCHAIN:
2586     case DISAS_UPDATE_EXIT:
2587         /*
2588          * We already decided we're leaving the TB for some other reason.
2589          * Avoid using goto_tb so we really do exit back to the main loop
2590          * and don't chain to another TB.
2591          */
2592         gen_update_pc(s, diff);
2593         gen_goto_ptr();
2594         s->base.is_jmp = DISAS_NORETURN;
2595         break;
2596     default:
2597         /*
2598          * We shouldn't be emitting code for a jump and also have
2599          * is_jmp set to one of the special cases like DISAS_SWI.
2600          */
2601         g_assert_not_reached();
2602     }
2603 }
2604 
2605 static inline void gen_jmp(DisasContext *s, target_long diff)
2606 {
2607     gen_jmp_tb(s, diff, 0);
2608 }
2609 
2610 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611 {
2612     if (x)
2613         tcg_gen_sari_i32(t0, t0, 16);
2614     else
2615         gen_sxth(t0);
2616     if (y)
2617         tcg_gen_sari_i32(t1, t1, 16);
2618     else
2619         gen_sxth(t1);
2620     tcg_gen_mul_i32(t0, t0, t1);
2621 }
2622 
2623 /* Return the mask of PSR bits set by a MSR instruction.  */
2624 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625 {
2626     uint32_t mask = 0;
2627 
2628     if (flags & (1 << 0)) {
2629         mask |= 0xff;
2630     }
2631     if (flags & (1 << 1)) {
2632         mask |= 0xff00;
2633     }
2634     if (flags & (1 << 2)) {
2635         mask |= 0xff0000;
2636     }
2637     if (flags & (1 << 3)) {
2638         mask |= 0xff000000;
2639     }
2640 
2641     /* Mask out undefined and reserved bits.  */
2642     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643 
2644     /* Mask out execution state.  */
2645     if (!spsr) {
2646         mask &= ~CPSR_EXEC;
2647     }
2648 
2649     /* Mask out privileged bits.  */
2650     if (IS_USER(s)) {
2651         mask &= CPSR_USER;
2652     }
2653     return mask;
2654 }
2655 
2656 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658 {
2659     TCGv_i32 tmp;
2660     if (spsr) {
2661         /* ??? This is also undefined in system mode.  */
2662         if (IS_USER(s))
2663             return 1;
2664 
2665         tmp = load_cpu_field(spsr);
2666         tcg_gen_andi_i32(tmp, tmp, ~mask);
2667         tcg_gen_andi_i32(t0, t0, mask);
2668         tcg_gen_or_i32(tmp, tmp, t0);
2669         store_cpu_field(tmp, spsr);
2670     } else {
2671         gen_set_cpsr(t0, mask);
2672     }
2673     gen_lookup_tb(s);
2674     return 0;
2675 }
2676 
2677 /* Returns nonzero if access to the PSR is not permitted.  */
2678 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679 {
2680     TCGv_i32 tmp;
2681     tmp = tcg_temp_new_i32();
2682     tcg_gen_movi_i32(tmp, val);
2683     return gen_set_psr(s, mask, spsr, tmp);
2684 }
2685 
2686 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                      int *tgtmode, int *regno)
2688 {
2689     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690      * the target mode and register number, and identify the various
2691      * unpredictable cases.
2692      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693      *  + executed in user mode
2694      *  + using R15 as the src/dest register
2695      *  + accessing an unimplemented register
2696      *  + accessing a register that's inaccessible at current PL/security state*
2697      *  + accessing a register that you could access with a different insn
2698      * We choose to UNDEF in all these cases.
2699      * Since we don't know which of the various AArch32 modes we are in
2700      * we have to defer some checks to runtime.
2701      * Accesses to Monitor mode registers from Secure EL1 (which implies
2702      * that EL3 is AArch64) must trap to EL3.
2703      *
2704      * If the access checks fail this function will emit code to take
2705      * an exception and return false. Otherwise it will return true,
2706      * and set *tgtmode and *regno appropriately.
2707      */
2708     /* These instructions are present only in ARMv8, or in ARMv7 with the
2709      * Virtualization Extensions.
2710      */
2711     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713         goto undef;
2714     }
2715 
2716     if (IS_USER(s) || rn == 15) {
2717         goto undef;
2718     }
2719 
2720     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721      * of registers into (r, sysm).
2722      */
2723     if (r) {
2724         /* SPSRs for other modes */
2725         switch (sysm) {
2726         case 0xe: /* SPSR_fiq */
2727             *tgtmode = ARM_CPU_MODE_FIQ;
2728             break;
2729         case 0x10: /* SPSR_irq */
2730             *tgtmode = ARM_CPU_MODE_IRQ;
2731             break;
2732         case 0x12: /* SPSR_svc */
2733             *tgtmode = ARM_CPU_MODE_SVC;
2734             break;
2735         case 0x14: /* SPSR_abt */
2736             *tgtmode = ARM_CPU_MODE_ABT;
2737             break;
2738         case 0x16: /* SPSR_und */
2739             *tgtmode = ARM_CPU_MODE_UND;
2740             break;
2741         case 0x1c: /* SPSR_mon */
2742             *tgtmode = ARM_CPU_MODE_MON;
2743             break;
2744         case 0x1e: /* SPSR_hyp */
2745             *tgtmode = ARM_CPU_MODE_HYP;
2746             break;
2747         default: /* unallocated */
2748             goto undef;
2749         }
2750         /* We arbitrarily assign SPSR a register number of 16. */
2751         *regno = 16;
2752     } else {
2753         /* general purpose registers for other modes */
2754         switch (sysm) {
2755         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756             *tgtmode = ARM_CPU_MODE_USR;
2757             *regno = sysm + 8;
2758             break;
2759         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760             *tgtmode = ARM_CPU_MODE_FIQ;
2761             *regno = sysm;
2762             break;
2763         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764             *tgtmode = ARM_CPU_MODE_IRQ;
2765             *regno = sysm & 1 ? 13 : 14;
2766             break;
2767         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768             *tgtmode = ARM_CPU_MODE_SVC;
2769             *regno = sysm & 1 ? 13 : 14;
2770             break;
2771         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772             *tgtmode = ARM_CPU_MODE_ABT;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776             *tgtmode = ARM_CPU_MODE_UND;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780             *tgtmode = ARM_CPU_MODE_MON;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784             *tgtmode = ARM_CPU_MODE_HYP;
2785             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786             *regno = sysm & 1 ? 13 : 17;
2787             break;
2788         default: /* unallocated */
2789             goto undef;
2790         }
2791     }
2792 
2793     /* Catch the 'accessing inaccessible register' cases we can detect
2794      * at translate time.
2795      */
2796     switch (*tgtmode) {
2797     case ARM_CPU_MODE_MON:
2798         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799             goto undef;
2800         }
2801         if (s->current_el == 1) {
2802             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803              * then accesses to Mon registers trap to Secure EL2, if it exists,
2804              * otherwise EL3.
2805              */
2806             TCGv_i32 tcg_el;
2807 
2808             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                 dc_isar_feature(aa64_sel2, s)) {
2810                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814             } else {
2815                 tcg_el = tcg_constant_i32(3);
2816             }
2817 
2818             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                     syn_uncategorized(), tcg_el);
2820             return false;
2821         }
2822         break;
2823     case ARM_CPU_MODE_HYP:
2824         /*
2825          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2826          * (and so we can forbid accesses from EL2 or below). elr_hyp
2827          * can be accessed also from Hyp mode, so forbid accesses from
2828          * EL0 or EL1.
2829          */
2830         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2831             (s->current_el < 3 && *regno != 17)) {
2832             goto undef;
2833         }
2834         break;
2835     default:
2836         break;
2837     }
2838 
2839     return true;
2840 
2841 undef:
2842     /* If we get here then some access check did not pass */
2843     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844     return false;
2845 }
2846 
2847 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848 {
2849     TCGv_i32 tcg_reg;
2850     int tgtmode = 0, regno = 0;
2851 
2852     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853         return;
2854     }
2855 
2856     /* Sync state because msr_banked() can raise exceptions */
2857     gen_set_condexec(s);
2858     gen_update_pc(s, 0);
2859     tcg_reg = load_reg(s, rn);
2860     gen_helper_msr_banked(cpu_env, tcg_reg,
2861                           tcg_constant_i32(tgtmode),
2862                           tcg_constant_i32(regno));
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864 }
2865 
2866 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867 {
2868     TCGv_i32 tcg_reg;
2869     int tgtmode = 0, regno = 0;
2870 
2871     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872         return;
2873     }
2874 
2875     /* Sync state because mrs_banked() can raise exceptions */
2876     gen_set_condexec(s);
2877     gen_update_pc(s, 0);
2878     tcg_reg = tcg_temp_new_i32();
2879     gen_helper_mrs_banked(tcg_reg, cpu_env,
2880                           tcg_constant_i32(tgtmode),
2881                           tcg_constant_i32(regno));
2882     store_reg(s, rn, tcg_reg);
2883     s->base.is_jmp = DISAS_UPDATE_EXIT;
2884 }
2885 
2886 /* Store value to PC as for an exception return (ie don't
2887  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888  * will do the masking based on the new value of the Thumb bit.
2889  */
2890 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891 {
2892     tcg_gen_mov_i32(cpu_R[15], pc);
2893 }
2894 
2895 /* Generate a v6 exception return.  Marks both values as dead.  */
2896 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897 {
2898     store_pc_exc_ret(s, pc);
2899     /* The cpsr_write_eret helper will mask the low bits of PC
2900      * appropriately depending on the new Thumb bit, so it must
2901      * be called after storing the new PC.
2902      */
2903     translator_io_start(&s->base);
2904     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2905     /* Must exit loop to check un-masked IRQs */
2906     s->base.is_jmp = DISAS_EXIT;
2907 }
2908 
2909 /* Generate an old-style exception return. Marks pc as dead. */
2910 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911 {
2912     gen_rfe(s, pc, load_cpu_field(spsr));
2913 }
2914 
2915 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                             uint32_t opr_sz, uint32_t max_sz,
2917                             gen_helper_gvec_3_ptr *fn)
2918 {
2919     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920 
2921     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2922     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                        opr_sz, max_sz, 0, fn);
2924 }
2925 
2926 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928 {
2929     static gen_helper_gvec_3_ptr * const fns[2] = {
2930         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931     };
2932     tcg_debug_assert(vece >= 1 && vece <= 2);
2933     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934 }
2935 
2936 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 #define GEN_CMP0(NAME, COND)                                            \
2947     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2948     {                                                                   \
2949         tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0));        \
2950     }                                                                   \
2951     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2952     {                                                                   \
2953         tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0));        \
2954     }                                                                   \
2955     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2956     {                                                                   \
2957         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2958         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2959     }                                                                   \
2960     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2961                             uint32_t opr_sz, uint32_t max_sz)           \
2962     {                                                                   \
2963         const GVecGen2 op[4] = {                                        \
2964             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2965               .fniv = gen_##NAME##0_vec,                                \
2966               .opt_opc = vecop_list_cmp,                                \
2967               .vece = MO_8 },                                           \
2968             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2969               .fniv = gen_##NAME##0_vec,                                \
2970               .opt_opc = vecop_list_cmp,                                \
2971               .vece = MO_16 },                                          \
2972             { .fni4 = gen_##NAME##0_i32,                                \
2973               .fniv = gen_##NAME##0_vec,                                \
2974               .opt_opc = vecop_list_cmp,                                \
2975               .vece = MO_32 },                                          \
2976             { .fni8 = gen_##NAME##0_i64,                                \
2977               .fniv = gen_##NAME##0_vec,                                \
2978               .opt_opc = vecop_list_cmp,                                \
2979               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2980               .vece = MO_64 },                                          \
2981         };                                                              \
2982         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2983     }
2984 
2985 static const TCGOpcode vecop_list_cmp[] = {
2986     INDEX_op_cmp_vec, 0
2987 };
2988 
2989 GEN_CMP0(ceq, TCG_COND_EQ)
2990 GEN_CMP0(cle, TCG_COND_LE)
2991 GEN_CMP0(cge, TCG_COND_GE)
2992 GEN_CMP0(clt, TCG_COND_LT)
2993 GEN_CMP0(cgt, TCG_COND_GT)
2994 
2995 #undef GEN_CMP0
2996 
2997 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2998 {
2999     tcg_gen_vec_sar8i_i64(a, a, shift);
3000     tcg_gen_vec_add8_i64(d, d, a);
3001 }
3002 
3003 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3004 {
3005     tcg_gen_vec_sar16i_i64(a, a, shift);
3006     tcg_gen_vec_add16_i64(d, d, a);
3007 }
3008 
3009 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3010 {
3011     tcg_gen_sari_i32(a, a, shift);
3012     tcg_gen_add_i32(d, d, a);
3013 }
3014 
3015 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016 {
3017     tcg_gen_sari_i64(a, a, shift);
3018     tcg_gen_add_i64(d, d, a);
3019 }
3020 
3021 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3022 {
3023     tcg_gen_sari_vec(vece, a, a, sh);
3024     tcg_gen_add_vec(vece, d, d, a);
3025 }
3026 
3027 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3028                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3029 {
3030     static const TCGOpcode vecop_list[] = {
3031         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3032     };
3033     static const GVecGen2i ops[4] = {
3034         { .fni8 = gen_ssra8_i64,
3035           .fniv = gen_ssra_vec,
3036           .fno = gen_helper_gvec_ssra_b,
3037           .load_dest = true,
3038           .opt_opc = vecop_list,
3039           .vece = MO_8 },
3040         { .fni8 = gen_ssra16_i64,
3041           .fniv = gen_ssra_vec,
3042           .fno = gen_helper_gvec_ssra_h,
3043           .load_dest = true,
3044           .opt_opc = vecop_list,
3045           .vece = MO_16 },
3046         { .fni4 = gen_ssra32_i32,
3047           .fniv = gen_ssra_vec,
3048           .fno = gen_helper_gvec_ssra_s,
3049           .load_dest = true,
3050           .opt_opc = vecop_list,
3051           .vece = MO_32 },
3052         { .fni8 = gen_ssra64_i64,
3053           .fniv = gen_ssra_vec,
3054           .fno = gen_helper_gvec_ssra_d,
3055           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3056           .opt_opc = vecop_list,
3057           .load_dest = true,
3058           .vece = MO_64 },
3059     };
3060 
3061     /* tszimm encoding produces immediates in the range [1..esize]. */
3062     tcg_debug_assert(shift > 0);
3063     tcg_debug_assert(shift <= (8 << vece));
3064 
3065     /*
3066      * Shifts larger than the element size are architecturally valid.
3067      * Signed results in all sign bits.
3068      */
3069     shift = MIN(shift, (8 << vece) - 1);
3070     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3071 }
3072 
3073 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3074 {
3075     tcg_gen_vec_shr8i_i64(a, a, shift);
3076     tcg_gen_vec_add8_i64(d, d, a);
3077 }
3078 
3079 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3080 {
3081     tcg_gen_vec_shr16i_i64(a, a, shift);
3082     tcg_gen_vec_add16_i64(d, d, a);
3083 }
3084 
3085 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3086 {
3087     tcg_gen_shri_i32(a, a, shift);
3088     tcg_gen_add_i32(d, d, a);
3089 }
3090 
3091 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3092 {
3093     tcg_gen_shri_i64(a, a, shift);
3094     tcg_gen_add_i64(d, d, a);
3095 }
3096 
3097 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3098 {
3099     tcg_gen_shri_vec(vece, a, a, sh);
3100     tcg_gen_add_vec(vece, d, d, a);
3101 }
3102 
3103 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3104                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3105 {
3106     static const TCGOpcode vecop_list[] = {
3107         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3108     };
3109     static const GVecGen2i ops[4] = {
3110         { .fni8 = gen_usra8_i64,
3111           .fniv = gen_usra_vec,
3112           .fno = gen_helper_gvec_usra_b,
3113           .load_dest = true,
3114           .opt_opc = vecop_list,
3115           .vece = MO_8, },
3116         { .fni8 = gen_usra16_i64,
3117           .fniv = gen_usra_vec,
3118           .fno = gen_helper_gvec_usra_h,
3119           .load_dest = true,
3120           .opt_opc = vecop_list,
3121           .vece = MO_16, },
3122         { .fni4 = gen_usra32_i32,
3123           .fniv = gen_usra_vec,
3124           .fno = gen_helper_gvec_usra_s,
3125           .load_dest = true,
3126           .opt_opc = vecop_list,
3127           .vece = MO_32, },
3128         { .fni8 = gen_usra64_i64,
3129           .fniv = gen_usra_vec,
3130           .fno = gen_helper_gvec_usra_d,
3131           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3132           .load_dest = true,
3133           .opt_opc = vecop_list,
3134           .vece = MO_64, },
3135     };
3136 
3137     /* tszimm encoding produces immediates in the range [1..esize]. */
3138     tcg_debug_assert(shift > 0);
3139     tcg_debug_assert(shift <= (8 << vece));
3140 
3141     /*
3142      * Shifts larger than the element size are architecturally valid.
3143      * Unsigned results in all zeros as input to accumulate: nop.
3144      */
3145     if (shift < (8 << vece)) {
3146         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3147     } else {
3148         /* Nop, but we do need to clear the tail. */
3149         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3150     }
3151 }
3152 
3153 /*
3154  * Shift one less than the requested amount, and the low bit is
3155  * the rounding bit.  For the 8 and 16-bit operations, because we
3156  * mask the low bit, we can perform a normal integer shift instead
3157  * of a vector shift.
3158  */
3159 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3160 {
3161     TCGv_i64 t = tcg_temp_new_i64();
3162 
3163     tcg_gen_shri_i64(t, a, sh - 1);
3164     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3165     tcg_gen_vec_sar8i_i64(d, a, sh);
3166     tcg_gen_vec_add8_i64(d, d, t);
3167 }
3168 
3169 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3170 {
3171     TCGv_i64 t = tcg_temp_new_i64();
3172 
3173     tcg_gen_shri_i64(t, a, sh - 1);
3174     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3175     tcg_gen_vec_sar16i_i64(d, a, sh);
3176     tcg_gen_vec_add16_i64(d, d, t);
3177 }
3178 
3179 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3180 {
3181     TCGv_i32 t;
3182 
3183     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3184     if (sh == 32) {
3185         tcg_gen_movi_i32(d, 0);
3186         return;
3187     }
3188     t = tcg_temp_new_i32();
3189     tcg_gen_extract_i32(t, a, sh - 1, 1);
3190     tcg_gen_sari_i32(d, a, sh);
3191     tcg_gen_add_i32(d, d, t);
3192 }
3193 
3194 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3195 {
3196     TCGv_i64 t = tcg_temp_new_i64();
3197 
3198     tcg_gen_extract_i64(t, a, sh - 1, 1);
3199     tcg_gen_sari_i64(d, a, sh);
3200     tcg_gen_add_i64(d, d, t);
3201 }
3202 
3203 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3204 {
3205     TCGv_vec t = tcg_temp_new_vec_matching(d);
3206     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3207 
3208     tcg_gen_shri_vec(vece, t, a, sh - 1);
3209     tcg_gen_dupi_vec(vece, ones, 1);
3210     tcg_gen_and_vec(vece, t, t, ones);
3211     tcg_gen_sari_vec(vece, d, a, sh);
3212     tcg_gen_add_vec(vece, d, d, t);
3213 }
3214 
3215 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3216                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3217 {
3218     static const TCGOpcode vecop_list[] = {
3219         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3220     };
3221     static const GVecGen2i ops[4] = {
3222         { .fni8 = gen_srshr8_i64,
3223           .fniv = gen_srshr_vec,
3224           .fno = gen_helper_gvec_srshr_b,
3225           .opt_opc = vecop_list,
3226           .vece = MO_8 },
3227         { .fni8 = gen_srshr16_i64,
3228           .fniv = gen_srshr_vec,
3229           .fno = gen_helper_gvec_srshr_h,
3230           .opt_opc = vecop_list,
3231           .vece = MO_16 },
3232         { .fni4 = gen_srshr32_i32,
3233           .fniv = gen_srshr_vec,
3234           .fno = gen_helper_gvec_srshr_s,
3235           .opt_opc = vecop_list,
3236           .vece = MO_32 },
3237         { .fni8 = gen_srshr64_i64,
3238           .fniv = gen_srshr_vec,
3239           .fno = gen_helper_gvec_srshr_d,
3240           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3241           .opt_opc = vecop_list,
3242           .vece = MO_64 },
3243     };
3244 
3245     /* tszimm encoding produces immediates in the range [1..esize] */
3246     tcg_debug_assert(shift > 0);
3247     tcg_debug_assert(shift <= (8 << vece));
3248 
3249     if (shift == (8 << vece)) {
3250         /*
3251          * Shifts larger than the element size are architecturally valid.
3252          * Signed results in all sign bits.  With rounding, this produces
3253          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3254          * I.e. always zero.
3255          */
3256         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3257     } else {
3258         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3259     }
3260 }
3261 
3262 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3263 {
3264     TCGv_i64 t = tcg_temp_new_i64();
3265 
3266     gen_srshr8_i64(t, a, sh);
3267     tcg_gen_vec_add8_i64(d, d, t);
3268 }
3269 
3270 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3271 {
3272     TCGv_i64 t = tcg_temp_new_i64();
3273 
3274     gen_srshr16_i64(t, a, sh);
3275     tcg_gen_vec_add16_i64(d, d, t);
3276 }
3277 
3278 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3279 {
3280     TCGv_i32 t = tcg_temp_new_i32();
3281 
3282     gen_srshr32_i32(t, a, sh);
3283     tcg_gen_add_i32(d, d, t);
3284 }
3285 
3286 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3287 {
3288     TCGv_i64 t = tcg_temp_new_i64();
3289 
3290     gen_srshr64_i64(t, a, sh);
3291     tcg_gen_add_i64(d, d, t);
3292 }
3293 
3294 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3295 {
3296     TCGv_vec t = tcg_temp_new_vec_matching(d);
3297 
3298     gen_srshr_vec(vece, t, a, sh);
3299     tcg_gen_add_vec(vece, d, d, t);
3300 }
3301 
3302 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3303                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3304 {
3305     static const TCGOpcode vecop_list[] = {
3306         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3307     };
3308     static const GVecGen2i ops[4] = {
3309         { .fni8 = gen_srsra8_i64,
3310           .fniv = gen_srsra_vec,
3311           .fno = gen_helper_gvec_srsra_b,
3312           .opt_opc = vecop_list,
3313           .load_dest = true,
3314           .vece = MO_8 },
3315         { .fni8 = gen_srsra16_i64,
3316           .fniv = gen_srsra_vec,
3317           .fno = gen_helper_gvec_srsra_h,
3318           .opt_opc = vecop_list,
3319           .load_dest = true,
3320           .vece = MO_16 },
3321         { .fni4 = gen_srsra32_i32,
3322           .fniv = gen_srsra_vec,
3323           .fno = gen_helper_gvec_srsra_s,
3324           .opt_opc = vecop_list,
3325           .load_dest = true,
3326           .vece = MO_32 },
3327         { .fni8 = gen_srsra64_i64,
3328           .fniv = gen_srsra_vec,
3329           .fno = gen_helper_gvec_srsra_d,
3330           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3331           .opt_opc = vecop_list,
3332           .load_dest = true,
3333           .vece = MO_64 },
3334     };
3335 
3336     /* tszimm encoding produces immediates in the range [1..esize] */
3337     tcg_debug_assert(shift > 0);
3338     tcg_debug_assert(shift <= (8 << vece));
3339 
3340     /*
3341      * Shifts larger than the element size are architecturally valid.
3342      * Signed results in all sign bits.  With rounding, this produces
3343      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3344      * I.e. always zero.  With accumulation, this leaves D unchanged.
3345      */
3346     if (shift == (8 << vece)) {
3347         /* Nop, but we do need to clear the tail. */
3348         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3349     } else {
3350         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3351     }
3352 }
3353 
3354 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3355 {
3356     TCGv_i64 t = tcg_temp_new_i64();
3357 
3358     tcg_gen_shri_i64(t, a, sh - 1);
3359     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3360     tcg_gen_vec_shr8i_i64(d, a, sh);
3361     tcg_gen_vec_add8_i64(d, d, t);
3362 }
3363 
3364 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3365 {
3366     TCGv_i64 t = tcg_temp_new_i64();
3367 
3368     tcg_gen_shri_i64(t, a, sh - 1);
3369     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3370     tcg_gen_vec_shr16i_i64(d, a, sh);
3371     tcg_gen_vec_add16_i64(d, d, t);
3372 }
3373 
3374 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3375 {
3376     TCGv_i32 t;
3377 
3378     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3379     if (sh == 32) {
3380         tcg_gen_extract_i32(d, a, sh - 1, 1);
3381         return;
3382     }
3383     t = tcg_temp_new_i32();
3384     tcg_gen_extract_i32(t, a, sh - 1, 1);
3385     tcg_gen_shri_i32(d, a, sh);
3386     tcg_gen_add_i32(d, d, t);
3387 }
3388 
3389 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3390 {
3391     TCGv_i64 t = tcg_temp_new_i64();
3392 
3393     tcg_gen_extract_i64(t, a, sh - 1, 1);
3394     tcg_gen_shri_i64(d, a, sh);
3395     tcg_gen_add_i64(d, d, t);
3396 }
3397 
3398 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3399 {
3400     TCGv_vec t = tcg_temp_new_vec_matching(d);
3401     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3402 
3403     tcg_gen_shri_vec(vece, t, a, shift - 1);
3404     tcg_gen_dupi_vec(vece, ones, 1);
3405     tcg_gen_and_vec(vece, t, t, ones);
3406     tcg_gen_shri_vec(vece, d, a, shift);
3407     tcg_gen_add_vec(vece, d, d, t);
3408 }
3409 
3410 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3411                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3412 {
3413     static const TCGOpcode vecop_list[] = {
3414         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3415     };
3416     static const GVecGen2i ops[4] = {
3417         { .fni8 = gen_urshr8_i64,
3418           .fniv = gen_urshr_vec,
3419           .fno = gen_helper_gvec_urshr_b,
3420           .opt_opc = vecop_list,
3421           .vece = MO_8 },
3422         { .fni8 = gen_urshr16_i64,
3423           .fniv = gen_urshr_vec,
3424           .fno = gen_helper_gvec_urshr_h,
3425           .opt_opc = vecop_list,
3426           .vece = MO_16 },
3427         { .fni4 = gen_urshr32_i32,
3428           .fniv = gen_urshr_vec,
3429           .fno = gen_helper_gvec_urshr_s,
3430           .opt_opc = vecop_list,
3431           .vece = MO_32 },
3432         { .fni8 = gen_urshr64_i64,
3433           .fniv = gen_urshr_vec,
3434           .fno = gen_helper_gvec_urshr_d,
3435           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3436           .opt_opc = vecop_list,
3437           .vece = MO_64 },
3438     };
3439 
3440     /* tszimm encoding produces immediates in the range [1..esize] */
3441     tcg_debug_assert(shift > 0);
3442     tcg_debug_assert(shift <= (8 << vece));
3443 
3444     if (shift == (8 << vece)) {
3445         /*
3446          * Shifts larger than the element size are architecturally valid.
3447          * Unsigned results in zero.  With rounding, this produces a
3448          * copy of the most significant bit.
3449          */
3450         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3451     } else {
3452         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3453     }
3454 }
3455 
3456 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3457 {
3458     TCGv_i64 t = tcg_temp_new_i64();
3459 
3460     if (sh == 8) {
3461         tcg_gen_vec_shr8i_i64(t, a, 7);
3462     } else {
3463         gen_urshr8_i64(t, a, sh);
3464     }
3465     tcg_gen_vec_add8_i64(d, d, t);
3466 }
3467 
3468 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3469 {
3470     TCGv_i64 t = tcg_temp_new_i64();
3471 
3472     if (sh == 16) {
3473         tcg_gen_vec_shr16i_i64(t, a, 15);
3474     } else {
3475         gen_urshr16_i64(t, a, sh);
3476     }
3477     tcg_gen_vec_add16_i64(d, d, t);
3478 }
3479 
3480 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3481 {
3482     TCGv_i32 t = tcg_temp_new_i32();
3483 
3484     if (sh == 32) {
3485         tcg_gen_shri_i32(t, a, 31);
3486     } else {
3487         gen_urshr32_i32(t, a, sh);
3488     }
3489     tcg_gen_add_i32(d, d, t);
3490 }
3491 
3492 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3493 {
3494     TCGv_i64 t = tcg_temp_new_i64();
3495 
3496     if (sh == 64) {
3497         tcg_gen_shri_i64(t, a, 63);
3498     } else {
3499         gen_urshr64_i64(t, a, sh);
3500     }
3501     tcg_gen_add_i64(d, d, t);
3502 }
3503 
3504 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3505 {
3506     TCGv_vec t = tcg_temp_new_vec_matching(d);
3507 
3508     if (sh == (8 << vece)) {
3509         tcg_gen_shri_vec(vece, t, a, sh - 1);
3510     } else {
3511         gen_urshr_vec(vece, t, a, sh);
3512     }
3513     tcg_gen_add_vec(vece, d, d, t);
3514 }
3515 
3516 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3517                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3518 {
3519     static const TCGOpcode vecop_list[] = {
3520         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3521     };
3522     static const GVecGen2i ops[4] = {
3523         { .fni8 = gen_ursra8_i64,
3524           .fniv = gen_ursra_vec,
3525           .fno = gen_helper_gvec_ursra_b,
3526           .opt_opc = vecop_list,
3527           .load_dest = true,
3528           .vece = MO_8 },
3529         { .fni8 = gen_ursra16_i64,
3530           .fniv = gen_ursra_vec,
3531           .fno = gen_helper_gvec_ursra_h,
3532           .opt_opc = vecop_list,
3533           .load_dest = true,
3534           .vece = MO_16 },
3535         { .fni4 = gen_ursra32_i32,
3536           .fniv = gen_ursra_vec,
3537           .fno = gen_helper_gvec_ursra_s,
3538           .opt_opc = vecop_list,
3539           .load_dest = true,
3540           .vece = MO_32 },
3541         { .fni8 = gen_ursra64_i64,
3542           .fniv = gen_ursra_vec,
3543           .fno = gen_helper_gvec_ursra_d,
3544           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3545           .opt_opc = vecop_list,
3546           .load_dest = true,
3547           .vece = MO_64 },
3548     };
3549 
3550     /* tszimm encoding produces immediates in the range [1..esize] */
3551     tcg_debug_assert(shift > 0);
3552     tcg_debug_assert(shift <= (8 << vece));
3553 
3554     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3555 }
3556 
3557 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3558 {
3559     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3560     TCGv_i64 t = tcg_temp_new_i64();
3561 
3562     tcg_gen_shri_i64(t, a, shift);
3563     tcg_gen_andi_i64(t, t, mask);
3564     tcg_gen_andi_i64(d, d, ~mask);
3565     tcg_gen_or_i64(d, d, t);
3566 }
3567 
3568 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3569 {
3570     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3571     TCGv_i64 t = tcg_temp_new_i64();
3572 
3573     tcg_gen_shri_i64(t, a, shift);
3574     tcg_gen_andi_i64(t, t, mask);
3575     tcg_gen_andi_i64(d, d, ~mask);
3576     tcg_gen_or_i64(d, d, t);
3577 }
3578 
3579 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3580 {
3581     tcg_gen_shri_i32(a, a, shift);
3582     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3583 }
3584 
3585 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3586 {
3587     tcg_gen_shri_i64(a, a, shift);
3588     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3589 }
3590 
3591 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3592 {
3593     TCGv_vec t = tcg_temp_new_vec_matching(d);
3594     TCGv_vec m = tcg_temp_new_vec_matching(d);
3595 
3596     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3597     tcg_gen_shri_vec(vece, t, a, sh);
3598     tcg_gen_and_vec(vece, d, d, m);
3599     tcg_gen_or_vec(vece, d, d, t);
3600 }
3601 
3602 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3603                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3604 {
3605     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3606     const GVecGen2i ops[4] = {
3607         { .fni8 = gen_shr8_ins_i64,
3608           .fniv = gen_shr_ins_vec,
3609           .fno = gen_helper_gvec_sri_b,
3610           .load_dest = true,
3611           .opt_opc = vecop_list,
3612           .vece = MO_8 },
3613         { .fni8 = gen_shr16_ins_i64,
3614           .fniv = gen_shr_ins_vec,
3615           .fno = gen_helper_gvec_sri_h,
3616           .load_dest = true,
3617           .opt_opc = vecop_list,
3618           .vece = MO_16 },
3619         { .fni4 = gen_shr32_ins_i32,
3620           .fniv = gen_shr_ins_vec,
3621           .fno = gen_helper_gvec_sri_s,
3622           .load_dest = true,
3623           .opt_opc = vecop_list,
3624           .vece = MO_32 },
3625         { .fni8 = gen_shr64_ins_i64,
3626           .fniv = gen_shr_ins_vec,
3627           .fno = gen_helper_gvec_sri_d,
3628           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3629           .load_dest = true,
3630           .opt_opc = vecop_list,
3631           .vece = MO_64 },
3632     };
3633 
3634     /* tszimm encoding produces immediates in the range [1..esize]. */
3635     tcg_debug_assert(shift > 0);
3636     tcg_debug_assert(shift <= (8 << vece));
3637 
3638     /* Shift of esize leaves destination unchanged. */
3639     if (shift < (8 << vece)) {
3640         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3641     } else {
3642         /* Nop, but we do need to clear the tail. */
3643         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3644     }
3645 }
3646 
3647 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3648 {
3649     uint64_t mask = dup_const(MO_8, 0xff << shift);
3650     TCGv_i64 t = tcg_temp_new_i64();
3651 
3652     tcg_gen_shli_i64(t, a, shift);
3653     tcg_gen_andi_i64(t, t, mask);
3654     tcg_gen_andi_i64(d, d, ~mask);
3655     tcg_gen_or_i64(d, d, t);
3656 }
3657 
3658 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3659 {
3660     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3661     TCGv_i64 t = tcg_temp_new_i64();
3662 
3663     tcg_gen_shli_i64(t, a, shift);
3664     tcg_gen_andi_i64(t, t, mask);
3665     tcg_gen_andi_i64(d, d, ~mask);
3666     tcg_gen_or_i64(d, d, t);
3667 }
3668 
3669 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3670 {
3671     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3672 }
3673 
3674 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3675 {
3676     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3677 }
3678 
3679 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3680 {
3681     TCGv_vec t = tcg_temp_new_vec_matching(d);
3682     TCGv_vec m = tcg_temp_new_vec_matching(d);
3683 
3684     tcg_gen_shli_vec(vece, t, a, sh);
3685     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3686     tcg_gen_and_vec(vece, d, d, m);
3687     tcg_gen_or_vec(vece, d, d, t);
3688 }
3689 
3690 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3691                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3692 {
3693     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3694     const GVecGen2i ops[4] = {
3695         { .fni8 = gen_shl8_ins_i64,
3696           .fniv = gen_shl_ins_vec,
3697           .fno = gen_helper_gvec_sli_b,
3698           .load_dest = true,
3699           .opt_opc = vecop_list,
3700           .vece = MO_8 },
3701         { .fni8 = gen_shl16_ins_i64,
3702           .fniv = gen_shl_ins_vec,
3703           .fno = gen_helper_gvec_sli_h,
3704           .load_dest = true,
3705           .opt_opc = vecop_list,
3706           .vece = MO_16 },
3707         { .fni4 = gen_shl32_ins_i32,
3708           .fniv = gen_shl_ins_vec,
3709           .fno = gen_helper_gvec_sli_s,
3710           .load_dest = true,
3711           .opt_opc = vecop_list,
3712           .vece = MO_32 },
3713         { .fni8 = gen_shl64_ins_i64,
3714           .fniv = gen_shl_ins_vec,
3715           .fno = gen_helper_gvec_sli_d,
3716           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3717           .load_dest = true,
3718           .opt_opc = vecop_list,
3719           .vece = MO_64 },
3720     };
3721 
3722     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3723     tcg_debug_assert(shift >= 0);
3724     tcg_debug_assert(shift < (8 << vece));
3725 
3726     if (shift == 0) {
3727         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3728     } else {
3729         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3730     }
3731 }
3732 
3733 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3734 {
3735     gen_helper_neon_mul_u8(a, a, b);
3736     gen_helper_neon_add_u8(d, d, a);
3737 }
3738 
3739 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3740 {
3741     gen_helper_neon_mul_u8(a, a, b);
3742     gen_helper_neon_sub_u8(d, d, a);
3743 }
3744 
3745 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3746 {
3747     gen_helper_neon_mul_u16(a, a, b);
3748     gen_helper_neon_add_u16(d, d, a);
3749 }
3750 
3751 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3752 {
3753     gen_helper_neon_mul_u16(a, a, b);
3754     gen_helper_neon_sub_u16(d, d, a);
3755 }
3756 
3757 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3758 {
3759     tcg_gen_mul_i32(a, a, b);
3760     tcg_gen_add_i32(d, d, a);
3761 }
3762 
3763 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3764 {
3765     tcg_gen_mul_i32(a, a, b);
3766     tcg_gen_sub_i32(d, d, a);
3767 }
3768 
3769 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3770 {
3771     tcg_gen_mul_i64(a, a, b);
3772     tcg_gen_add_i64(d, d, a);
3773 }
3774 
3775 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3776 {
3777     tcg_gen_mul_i64(a, a, b);
3778     tcg_gen_sub_i64(d, d, a);
3779 }
3780 
3781 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3782 {
3783     tcg_gen_mul_vec(vece, a, a, b);
3784     tcg_gen_add_vec(vece, d, d, a);
3785 }
3786 
3787 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3788 {
3789     tcg_gen_mul_vec(vece, a, a, b);
3790     tcg_gen_sub_vec(vece, d, d, a);
3791 }
3792 
3793 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3794  * these tables are shared with AArch64 which does support them.
3795  */
3796 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3797                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3798 {
3799     static const TCGOpcode vecop_list[] = {
3800         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3801     };
3802     static const GVecGen3 ops[4] = {
3803         { .fni4 = gen_mla8_i32,
3804           .fniv = gen_mla_vec,
3805           .load_dest = true,
3806           .opt_opc = vecop_list,
3807           .vece = MO_8 },
3808         { .fni4 = gen_mla16_i32,
3809           .fniv = gen_mla_vec,
3810           .load_dest = true,
3811           .opt_opc = vecop_list,
3812           .vece = MO_16 },
3813         { .fni4 = gen_mla32_i32,
3814           .fniv = gen_mla_vec,
3815           .load_dest = true,
3816           .opt_opc = vecop_list,
3817           .vece = MO_32 },
3818         { .fni8 = gen_mla64_i64,
3819           .fniv = gen_mla_vec,
3820           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3821           .load_dest = true,
3822           .opt_opc = vecop_list,
3823           .vece = MO_64 },
3824     };
3825     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3826 }
3827 
3828 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3829                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3830 {
3831     static const TCGOpcode vecop_list[] = {
3832         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3833     };
3834     static const GVecGen3 ops[4] = {
3835         { .fni4 = gen_mls8_i32,
3836           .fniv = gen_mls_vec,
3837           .load_dest = true,
3838           .opt_opc = vecop_list,
3839           .vece = MO_8 },
3840         { .fni4 = gen_mls16_i32,
3841           .fniv = gen_mls_vec,
3842           .load_dest = true,
3843           .opt_opc = vecop_list,
3844           .vece = MO_16 },
3845         { .fni4 = gen_mls32_i32,
3846           .fniv = gen_mls_vec,
3847           .load_dest = true,
3848           .opt_opc = vecop_list,
3849           .vece = MO_32 },
3850         { .fni8 = gen_mls64_i64,
3851           .fniv = gen_mls_vec,
3852           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3853           .load_dest = true,
3854           .opt_opc = vecop_list,
3855           .vece = MO_64 },
3856     };
3857     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3858 }
3859 
3860 /* CMTST : test is "if (X & Y != 0)". */
3861 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3862 {
3863     tcg_gen_and_i32(d, a, b);
3864     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3865 }
3866 
3867 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3868 {
3869     tcg_gen_and_i64(d, a, b);
3870     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3871 }
3872 
3873 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3874 {
3875     tcg_gen_and_vec(vece, d, a, b);
3876     tcg_gen_dupi_vec(vece, a, 0);
3877     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3878 }
3879 
3880 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3881                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3882 {
3883     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3884     static const GVecGen3 ops[4] = {
3885         { .fni4 = gen_helper_neon_tst_u8,
3886           .fniv = gen_cmtst_vec,
3887           .opt_opc = vecop_list,
3888           .vece = MO_8 },
3889         { .fni4 = gen_helper_neon_tst_u16,
3890           .fniv = gen_cmtst_vec,
3891           .opt_opc = vecop_list,
3892           .vece = MO_16 },
3893         { .fni4 = gen_cmtst_i32,
3894           .fniv = gen_cmtst_vec,
3895           .opt_opc = vecop_list,
3896           .vece = MO_32 },
3897         { .fni8 = gen_cmtst_i64,
3898           .fniv = gen_cmtst_vec,
3899           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3900           .opt_opc = vecop_list,
3901           .vece = MO_64 },
3902     };
3903     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3904 }
3905 
3906 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3907 {
3908     TCGv_i32 lval = tcg_temp_new_i32();
3909     TCGv_i32 rval = tcg_temp_new_i32();
3910     TCGv_i32 lsh = tcg_temp_new_i32();
3911     TCGv_i32 rsh = tcg_temp_new_i32();
3912     TCGv_i32 zero = tcg_constant_i32(0);
3913     TCGv_i32 max = tcg_constant_i32(32);
3914 
3915     /*
3916      * Rely on the TCG guarantee that out of range shifts produce
3917      * unspecified results, not undefined behaviour (i.e. no trap).
3918      * Discard out-of-range results after the fact.
3919      */
3920     tcg_gen_ext8s_i32(lsh, shift);
3921     tcg_gen_neg_i32(rsh, lsh);
3922     tcg_gen_shl_i32(lval, src, lsh);
3923     tcg_gen_shr_i32(rval, src, rsh);
3924     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3925     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3926 }
3927 
3928 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3929 {
3930     TCGv_i64 lval = tcg_temp_new_i64();
3931     TCGv_i64 rval = tcg_temp_new_i64();
3932     TCGv_i64 lsh = tcg_temp_new_i64();
3933     TCGv_i64 rsh = tcg_temp_new_i64();
3934     TCGv_i64 zero = tcg_constant_i64(0);
3935     TCGv_i64 max = tcg_constant_i64(64);
3936 
3937     /*
3938      * Rely on the TCG guarantee that out of range shifts produce
3939      * unspecified results, not undefined behaviour (i.e. no trap).
3940      * Discard out-of-range results after the fact.
3941      */
3942     tcg_gen_ext8s_i64(lsh, shift);
3943     tcg_gen_neg_i64(rsh, lsh);
3944     tcg_gen_shl_i64(lval, src, lsh);
3945     tcg_gen_shr_i64(rval, src, rsh);
3946     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3947     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3948 }
3949 
3950 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3951                          TCGv_vec src, TCGv_vec shift)
3952 {
3953     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3954     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3955     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3956     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3957     TCGv_vec msk, max;
3958 
3959     tcg_gen_neg_vec(vece, rsh, shift);
3960     if (vece == MO_8) {
3961         tcg_gen_mov_vec(lsh, shift);
3962     } else {
3963         msk = tcg_temp_new_vec_matching(dst);
3964         tcg_gen_dupi_vec(vece, msk, 0xff);
3965         tcg_gen_and_vec(vece, lsh, shift, msk);
3966         tcg_gen_and_vec(vece, rsh, rsh, msk);
3967     }
3968 
3969     /*
3970      * Rely on the TCG guarantee that out of range shifts produce
3971      * unspecified results, not undefined behaviour (i.e. no trap).
3972      * Discard out-of-range results after the fact.
3973      */
3974     tcg_gen_shlv_vec(vece, lval, src, lsh);
3975     tcg_gen_shrv_vec(vece, rval, src, rsh);
3976 
3977     max = tcg_temp_new_vec_matching(dst);
3978     tcg_gen_dupi_vec(vece, max, 8 << vece);
3979 
3980     /*
3981      * The choice of LT (signed) and GEU (unsigned) are biased toward
3982      * the instructions of the x86_64 host.  For MO_8, the whole byte
3983      * is significant so we must use an unsigned compare; otherwise we
3984      * have already masked to a byte and so a signed compare works.
3985      * Other tcg hosts have a full set of comparisons and do not care.
3986      */
3987     if (vece == MO_8) {
3988         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3989         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3990         tcg_gen_andc_vec(vece, lval, lval, lsh);
3991         tcg_gen_andc_vec(vece, rval, rval, rsh);
3992     } else {
3993         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3994         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3995         tcg_gen_and_vec(vece, lval, lval, lsh);
3996         tcg_gen_and_vec(vece, rval, rval, rsh);
3997     }
3998     tcg_gen_or_vec(vece, dst, lval, rval);
3999 }
4000 
4001 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4002                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4003 {
4004     static const TCGOpcode vecop_list[] = {
4005         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4006         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4007     };
4008     static const GVecGen3 ops[4] = {
4009         { .fniv = gen_ushl_vec,
4010           .fno = gen_helper_gvec_ushl_b,
4011           .opt_opc = vecop_list,
4012           .vece = MO_8 },
4013         { .fniv = gen_ushl_vec,
4014           .fno = gen_helper_gvec_ushl_h,
4015           .opt_opc = vecop_list,
4016           .vece = MO_16 },
4017         { .fni4 = gen_ushl_i32,
4018           .fniv = gen_ushl_vec,
4019           .opt_opc = vecop_list,
4020           .vece = MO_32 },
4021         { .fni8 = gen_ushl_i64,
4022           .fniv = gen_ushl_vec,
4023           .opt_opc = vecop_list,
4024           .vece = MO_64 },
4025     };
4026     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4027 }
4028 
4029 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4030 {
4031     TCGv_i32 lval = tcg_temp_new_i32();
4032     TCGv_i32 rval = tcg_temp_new_i32();
4033     TCGv_i32 lsh = tcg_temp_new_i32();
4034     TCGv_i32 rsh = tcg_temp_new_i32();
4035     TCGv_i32 zero = tcg_constant_i32(0);
4036     TCGv_i32 max = tcg_constant_i32(31);
4037 
4038     /*
4039      * Rely on the TCG guarantee that out of range shifts produce
4040      * unspecified results, not undefined behaviour (i.e. no trap).
4041      * Discard out-of-range results after the fact.
4042      */
4043     tcg_gen_ext8s_i32(lsh, shift);
4044     tcg_gen_neg_i32(rsh, lsh);
4045     tcg_gen_shl_i32(lval, src, lsh);
4046     tcg_gen_umin_i32(rsh, rsh, max);
4047     tcg_gen_sar_i32(rval, src, rsh);
4048     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4049     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4050 }
4051 
4052 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4053 {
4054     TCGv_i64 lval = tcg_temp_new_i64();
4055     TCGv_i64 rval = tcg_temp_new_i64();
4056     TCGv_i64 lsh = tcg_temp_new_i64();
4057     TCGv_i64 rsh = tcg_temp_new_i64();
4058     TCGv_i64 zero = tcg_constant_i64(0);
4059     TCGv_i64 max = tcg_constant_i64(63);
4060 
4061     /*
4062      * Rely on the TCG guarantee that out of range shifts produce
4063      * unspecified results, not undefined behaviour (i.e. no trap).
4064      * Discard out-of-range results after the fact.
4065      */
4066     tcg_gen_ext8s_i64(lsh, shift);
4067     tcg_gen_neg_i64(rsh, lsh);
4068     tcg_gen_shl_i64(lval, src, lsh);
4069     tcg_gen_umin_i64(rsh, rsh, max);
4070     tcg_gen_sar_i64(rval, src, rsh);
4071     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4072     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4073 }
4074 
4075 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4076                          TCGv_vec src, TCGv_vec shift)
4077 {
4078     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4079     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4080     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4081     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4082     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4083 
4084     /*
4085      * Rely on the TCG guarantee that out of range shifts produce
4086      * unspecified results, not undefined behaviour (i.e. no trap).
4087      * Discard out-of-range results after the fact.
4088      */
4089     tcg_gen_neg_vec(vece, rsh, shift);
4090     if (vece == MO_8) {
4091         tcg_gen_mov_vec(lsh, shift);
4092     } else {
4093         tcg_gen_dupi_vec(vece, tmp, 0xff);
4094         tcg_gen_and_vec(vece, lsh, shift, tmp);
4095         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4096     }
4097 
4098     /* Bound rsh so out of bound right shift gets -1.  */
4099     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4100     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4101     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4102 
4103     tcg_gen_shlv_vec(vece, lval, src, lsh);
4104     tcg_gen_sarv_vec(vece, rval, src, rsh);
4105 
4106     /* Select in-bound left shift.  */
4107     tcg_gen_andc_vec(vece, lval, lval, tmp);
4108 
4109     /* Select between left and right shift.  */
4110     if (vece == MO_8) {
4111         tcg_gen_dupi_vec(vece, tmp, 0);
4112         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4113     } else {
4114         tcg_gen_dupi_vec(vece, tmp, 0x80);
4115         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4116     }
4117 }
4118 
4119 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4120                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4121 {
4122     static const TCGOpcode vecop_list[] = {
4123         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4124         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4125     };
4126     static const GVecGen3 ops[4] = {
4127         { .fniv = gen_sshl_vec,
4128           .fno = gen_helper_gvec_sshl_b,
4129           .opt_opc = vecop_list,
4130           .vece = MO_8 },
4131         { .fniv = gen_sshl_vec,
4132           .fno = gen_helper_gvec_sshl_h,
4133           .opt_opc = vecop_list,
4134           .vece = MO_16 },
4135         { .fni4 = gen_sshl_i32,
4136           .fniv = gen_sshl_vec,
4137           .opt_opc = vecop_list,
4138           .vece = MO_32 },
4139         { .fni8 = gen_sshl_i64,
4140           .fniv = gen_sshl_vec,
4141           .opt_opc = vecop_list,
4142           .vece = MO_64 },
4143     };
4144     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4145 }
4146 
4147 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4148                           TCGv_vec a, TCGv_vec b)
4149 {
4150     TCGv_vec x = tcg_temp_new_vec_matching(t);
4151     tcg_gen_add_vec(vece, x, a, b);
4152     tcg_gen_usadd_vec(vece, t, a, b);
4153     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4154     tcg_gen_or_vec(vece, sat, sat, x);
4155 }
4156 
4157 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4158                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4159 {
4160     static const TCGOpcode vecop_list[] = {
4161         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4162     };
4163     static const GVecGen4 ops[4] = {
4164         { .fniv = gen_uqadd_vec,
4165           .fno = gen_helper_gvec_uqadd_b,
4166           .write_aofs = true,
4167           .opt_opc = vecop_list,
4168           .vece = MO_8 },
4169         { .fniv = gen_uqadd_vec,
4170           .fno = gen_helper_gvec_uqadd_h,
4171           .write_aofs = true,
4172           .opt_opc = vecop_list,
4173           .vece = MO_16 },
4174         { .fniv = gen_uqadd_vec,
4175           .fno = gen_helper_gvec_uqadd_s,
4176           .write_aofs = true,
4177           .opt_opc = vecop_list,
4178           .vece = MO_32 },
4179         { .fniv = gen_uqadd_vec,
4180           .fno = gen_helper_gvec_uqadd_d,
4181           .write_aofs = true,
4182           .opt_opc = vecop_list,
4183           .vece = MO_64 },
4184     };
4185     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4186                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4187 }
4188 
4189 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4190                           TCGv_vec a, TCGv_vec b)
4191 {
4192     TCGv_vec x = tcg_temp_new_vec_matching(t);
4193     tcg_gen_add_vec(vece, x, a, b);
4194     tcg_gen_ssadd_vec(vece, t, a, b);
4195     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4196     tcg_gen_or_vec(vece, sat, sat, x);
4197 }
4198 
4199 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4200                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4201 {
4202     static const TCGOpcode vecop_list[] = {
4203         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4204     };
4205     static const GVecGen4 ops[4] = {
4206         { .fniv = gen_sqadd_vec,
4207           .fno = gen_helper_gvec_sqadd_b,
4208           .opt_opc = vecop_list,
4209           .write_aofs = true,
4210           .vece = MO_8 },
4211         { .fniv = gen_sqadd_vec,
4212           .fno = gen_helper_gvec_sqadd_h,
4213           .opt_opc = vecop_list,
4214           .write_aofs = true,
4215           .vece = MO_16 },
4216         { .fniv = gen_sqadd_vec,
4217           .fno = gen_helper_gvec_sqadd_s,
4218           .opt_opc = vecop_list,
4219           .write_aofs = true,
4220           .vece = MO_32 },
4221         { .fniv = gen_sqadd_vec,
4222           .fno = gen_helper_gvec_sqadd_d,
4223           .opt_opc = vecop_list,
4224           .write_aofs = true,
4225           .vece = MO_64 },
4226     };
4227     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4228                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4229 }
4230 
4231 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4232                           TCGv_vec a, TCGv_vec b)
4233 {
4234     TCGv_vec x = tcg_temp_new_vec_matching(t);
4235     tcg_gen_sub_vec(vece, x, a, b);
4236     tcg_gen_ussub_vec(vece, t, a, b);
4237     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4238     tcg_gen_or_vec(vece, sat, sat, x);
4239 }
4240 
4241 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4242                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4243 {
4244     static const TCGOpcode vecop_list[] = {
4245         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4246     };
4247     static const GVecGen4 ops[4] = {
4248         { .fniv = gen_uqsub_vec,
4249           .fno = gen_helper_gvec_uqsub_b,
4250           .opt_opc = vecop_list,
4251           .write_aofs = true,
4252           .vece = MO_8 },
4253         { .fniv = gen_uqsub_vec,
4254           .fno = gen_helper_gvec_uqsub_h,
4255           .opt_opc = vecop_list,
4256           .write_aofs = true,
4257           .vece = MO_16 },
4258         { .fniv = gen_uqsub_vec,
4259           .fno = gen_helper_gvec_uqsub_s,
4260           .opt_opc = vecop_list,
4261           .write_aofs = true,
4262           .vece = MO_32 },
4263         { .fniv = gen_uqsub_vec,
4264           .fno = gen_helper_gvec_uqsub_d,
4265           .opt_opc = vecop_list,
4266           .write_aofs = true,
4267           .vece = MO_64 },
4268     };
4269     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4270                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4271 }
4272 
4273 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4274                           TCGv_vec a, TCGv_vec b)
4275 {
4276     TCGv_vec x = tcg_temp_new_vec_matching(t);
4277     tcg_gen_sub_vec(vece, x, a, b);
4278     tcg_gen_sssub_vec(vece, t, a, b);
4279     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4280     tcg_gen_or_vec(vece, sat, sat, x);
4281 }
4282 
4283 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4284                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4285 {
4286     static const TCGOpcode vecop_list[] = {
4287         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4288     };
4289     static const GVecGen4 ops[4] = {
4290         { .fniv = gen_sqsub_vec,
4291           .fno = gen_helper_gvec_sqsub_b,
4292           .opt_opc = vecop_list,
4293           .write_aofs = true,
4294           .vece = MO_8 },
4295         { .fniv = gen_sqsub_vec,
4296           .fno = gen_helper_gvec_sqsub_h,
4297           .opt_opc = vecop_list,
4298           .write_aofs = true,
4299           .vece = MO_16 },
4300         { .fniv = gen_sqsub_vec,
4301           .fno = gen_helper_gvec_sqsub_s,
4302           .opt_opc = vecop_list,
4303           .write_aofs = true,
4304           .vece = MO_32 },
4305         { .fniv = gen_sqsub_vec,
4306           .fno = gen_helper_gvec_sqsub_d,
4307           .opt_opc = vecop_list,
4308           .write_aofs = true,
4309           .vece = MO_64 },
4310     };
4311     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4312                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4313 }
4314 
4315 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4316 {
4317     TCGv_i32 t = tcg_temp_new_i32();
4318 
4319     tcg_gen_sub_i32(t, a, b);
4320     tcg_gen_sub_i32(d, b, a);
4321     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4322 }
4323 
4324 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4325 {
4326     TCGv_i64 t = tcg_temp_new_i64();
4327 
4328     tcg_gen_sub_i64(t, a, b);
4329     tcg_gen_sub_i64(d, b, a);
4330     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4331 }
4332 
4333 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4334 {
4335     TCGv_vec t = tcg_temp_new_vec_matching(d);
4336 
4337     tcg_gen_smin_vec(vece, t, a, b);
4338     tcg_gen_smax_vec(vece, d, a, b);
4339     tcg_gen_sub_vec(vece, d, d, t);
4340 }
4341 
4342 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4343                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4344 {
4345     static const TCGOpcode vecop_list[] = {
4346         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4347     };
4348     static const GVecGen3 ops[4] = {
4349         { .fniv = gen_sabd_vec,
4350           .fno = gen_helper_gvec_sabd_b,
4351           .opt_opc = vecop_list,
4352           .vece = MO_8 },
4353         { .fniv = gen_sabd_vec,
4354           .fno = gen_helper_gvec_sabd_h,
4355           .opt_opc = vecop_list,
4356           .vece = MO_16 },
4357         { .fni4 = gen_sabd_i32,
4358           .fniv = gen_sabd_vec,
4359           .fno = gen_helper_gvec_sabd_s,
4360           .opt_opc = vecop_list,
4361           .vece = MO_32 },
4362         { .fni8 = gen_sabd_i64,
4363           .fniv = gen_sabd_vec,
4364           .fno = gen_helper_gvec_sabd_d,
4365           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4366           .opt_opc = vecop_list,
4367           .vece = MO_64 },
4368     };
4369     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4370 }
4371 
4372 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4373 {
4374     TCGv_i32 t = tcg_temp_new_i32();
4375 
4376     tcg_gen_sub_i32(t, a, b);
4377     tcg_gen_sub_i32(d, b, a);
4378     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4379 }
4380 
4381 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4382 {
4383     TCGv_i64 t = tcg_temp_new_i64();
4384 
4385     tcg_gen_sub_i64(t, a, b);
4386     tcg_gen_sub_i64(d, b, a);
4387     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4388 }
4389 
4390 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4391 {
4392     TCGv_vec t = tcg_temp_new_vec_matching(d);
4393 
4394     tcg_gen_umin_vec(vece, t, a, b);
4395     tcg_gen_umax_vec(vece, d, a, b);
4396     tcg_gen_sub_vec(vece, d, d, t);
4397 }
4398 
4399 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4400                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4401 {
4402     static const TCGOpcode vecop_list[] = {
4403         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4404     };
4405     static const GVecGen3 ops[4] = {
4406         { .fniv = gen_uabd_vec,
4407           .fno = gen_helper_gvec_uabd_b,
4408           .opt_opc = vecop_list,
4409           .vece = MO_8 },
4410         { .fniv = gen_uabd_vec,
4411           .fno = gen_helper_gvec_uabd_h,
4412           .opt_opc = vecop_list,
4413           .vece = MO_16 },
4414         { .fni4 = gen_uabd_i32,
4415           .fniv = gen_uabd_vec,
4416           .fno = gen_helper_gvec_uabd_s,
4417           .opt_opc = vecop_list,
4418           .vece = MO_32 },
4419         { .fni8 = gen_uabd_i64,
4420           .fniv = gen_uabd_vec,
4421           .fno = gen_helper_gvec_uabd_d,
4422           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4423           .opt_opc = vecop_list,
4424           .vece = MO_64 },
4425     };
4426     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4427 }
4428 
4429 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4430 {
4431     TCGv_i32 t = tcg_temp_new_i32();
4432     gen_sabd_i32(t, a, b);
4433     tcg_gen_add_i32(d, d, t);
4434 }
4435 
4436 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4437 {
4438     TCGv_i64 t = tcg_temp_new_i64();
4439     gen_sabd_i64(t, a, b);
4440     tcg_gen_add_i64(d, d, t);
4441 }
4442 
4443 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4444 {
4445     TCGv_vec t = tcg_temp_new_vec_matching(d);
4446     gen_sabd_vec(vece, t, a, b);
4447     tcg_gen_add_vec(vece, d, d, t);
4448 }
4449 
4450 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4451                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4452 {
4453     static const TCGOpcode vecop_list[] = {
4454         INDEX_op_sub_vec, INDEX_op_add_vec,
4455         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4456     };
4457     static const GVecGen3 ops[4] = {
4458         { .fniv = gen_saba_vec,
4459           .fno = gen_helper_gvec_saba_b,
4460           .opt_opc = vecop_list,
4461           .load_dest = true,
4462           .vece = MO_8 },
4463         { .fniv = gen_saba_vec,
4464           .fno = gen_helper_gvec_saba_h,
4465           .opt_opc = vecop_list,
4466           .load_dest = true,
4467           .vece = MO_16 },
4468         { .fni4 = gen_saba_i32,
4469           .fniv = gen_saba_vec,
4470           .fno = gen_helper_gvec_saba_s,
4471           .opt_opc = vecop_list,
4472           .load_dest = true,
4473           .vece = MO_32 },
4474         { .fni8 = gen_saba_i64,
4475           .fniv = gen_saba_vec,
4476           .fno = gen_helper_gvec_saba_d,
4477           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4478           .opt_opc = vecop_list,
4479           .load_dest = true,
4480           .vece = MO_64 },
4481     };
4482     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4483 }
4484 
4485 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4486 {
4487     TCGv_i32 t = tcg_temp_new_i32();
4488     gen_uabd_i32(t, a, b);
4489     tcg_gen_add_i32(d, d, t);
4490 }
4491 
4492 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4493 {
4494     TCGv_i64 t = tcg_temp_new_i64();
4495     gen_uabd_i64(t, a, b);
4496     tcg_gen_add_i64(d, d, t);
4497 }
4498 
4499 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4500 {
4501     TCGv_vec t = tcg_temp_new_vec_matching(d);
4502     gen_uabd_vec(vece, t, a, b);
4503     tcg_gen_add_vec(vece, d, d, t);
4504 }
4505 
4506 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4507                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4508 {
4509     static const TCGOpcode vecop_list[] = {
4510         INDEX_op_sub_vec, INDEX_op_add_vec,
4511         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4512     };
4513     static const GVecGen3 ops[4] = {
4514         { .fniv = gen_uaba_vec,
4515           .fno = gen_helper_gvec_uaba_b,
4516           .opt_opc = vecop_list,
4517           .load_dest = true,
4518           .vece = MO_8 },
4519         { .fniv = gen_uaba_vec,
4520           .fno = gen_helper_gvec_uaba_h,
4521           .opt_opc = vecop_list,
4522           .load_dest = true,
4523           .vece = MO_16 },
4524         { .fni4 = gen_uaba_i32,
4525           .fniv = gen_uaba_vec,
4526           .fno = gen_helper_gvec_uaba_s,
4527           .opt_opc = vecop_list,
4528           .load_dest = true,
4529           .vece = MO_32 },
4530         { .fni8 = gen_uaba_i64,
4531           .fniv = gen_uaba_vec,
4532           .fno = gen_helper_gvec_uaba_d,
4533           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4534           .opt_opc = vecop_list,
4535           .load_dest = true,
4536           .vece = MO_64 },
4537     };
4538     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4539 }
4540 
4541 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4542                            int opc1, int crn, int crm, int opc2,
4543                            bool isread, int rt, int rt2)
4544 {
4545     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4546     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4547     TCGv_ptr tcg_ri = NULL;
4548     bool need_exit_tb = false;
4549     uint32_t syndrome;
4550 
4551     /*
4552      * Note that since we are an implementation which takes an
4553      * exception on a trapped conditional instruction only if the
4554      * instruction passes its condition code check, we can take
4555      * advantage of the clause in the ARM ARM that allows us to set
4556      * the COND field in the instruction to 0xE in all cases.
4557      * We could fish the actual condition out of the insn (ARM)
4558      * or the condexec bits (Thumb) but it isn't necessary.
4559      */
4560     switch (cpnum) {
4561     case 14:
4562         if (is64) {
4563             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4564                                          isread, false);
4565         } else {
4566             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4567                                         rt, isread, false);
4568         }
4569         break;
4570     case 15:
4571         if (is64) {
4572             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4573                                          isread, false);
4574         } else {
4575             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4576                                         rt, isread, false);
4577         }
4578         break;
4579     default:
4580         /*
4581          * ARMv8 defines that only coprocessors 14 and 15 exist,
4582          * so this can only happen if this is an ARMv7 or earlier CPU,
4583          * in which case the syndrome information won't actually be
4584          * guest visible.
4585          */
4586         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4587         syndrome = syn_uncategorized();
4588         break;
4589     }
4590 
4591     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4592         /*
4593          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4594          * over the UNDEF for "no such register" or the UNDEF for "access
4595          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4596          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4597          * access_check_cp_reg(), after the checks for whether the access
4598          * configurably trapped to EL1.
4599          */
4600         uint32_t maskbit = is64 ? crm : crn;
4601 
4602         if (maskbit != 4 && maskbit != 14) {
4603             /* T4 and T14 are RES0 so never cause traps */
4604             TCGv_i32 t;
4605             DisasLabel over = gen_disas_label(s);
4606 
4607             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4608             tcg_gen_andi_i32(t, t, 1u << maskbit);
4609             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4610 
4611             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4612             /*
4613              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4614              * but since we're conditionally branching over it, we want
4615              * to assume continue-to-next-instruction.
4616              */
4617             s->base.is_jmp = DISAS_NEXT;
4618             set_disas_label(s, over);
4619         }
4620     }
4621 
4622     if (!ri) {
4623         /*
4624          * Unknown register; this might be a guest error or a QEMU
4625          * unimplemented feature.
4626          */
4627         if (is64) {
4628             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4629                           "64 bit system register cp:%d opc1: %d crm:%d "
4630                           "(%s)\n",
4631                           isread ? "read" : "write", cpnum, opc1, crm,
4632                           s->ns ? "non-secure" : "secure");
4633         } else {
4634             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4635                           "system register cp:%d opc1:%d crn:%d crm:%d "
4636                           "opc2:%d (%s)\n",
4637                           isread ? "read" : "write", cpnum, opc1, crn,
4638                           crm, opc2, s->ns ? "non-secure" : "secure");
4639         }
4640         unallocated_encoding(s);
4641         return;
4642     }
4643 
4644     /* Check access permissions */
4645     if (!cp_access_ok(s->current_el, ri, isread)) {
4646         unallocated_encoding(s);
4647         return;
4648     }
4649 
4650     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4651         (ri->fgt && s->fgt_active) ||
4652         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4653         /*
4654          * Emit code to perform further access permissions checks at
4655          * runtime; this may result in an exception.
4656          * Note that on XScale all cp0..c13 registers do an access check
4657          * call in order to handle c15_cpar.
4658          */
4659         gen_set_condexec(s);
4660         gen_update_pc(s, 0);
4661         tcg_ri = tcg_temp_new_ptr();
4662         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4663                                        tcg_constant_i32(key),
4664                                        tcg_constant_i32(syndrome),
4665                                        tcg_constant_i32(isread));
4666     } else if (ri->type & ARM_CP_RAISES_EXC) {
4667         /*
4668          * The readfn or writefn might raise an exception;
4669          * synchronize the CPU state in case it does.
4670          */
4671         gen_set_condexec(s);
4672         gen_update_pc(s, 0);
4673     }
4674 
4675     /* Handle special cases first */
4676     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4677     case 0:
4678         break;
4679     case ARM_CP_NOP:
4680         return;
4681     case ARM_CP_WFI:
4682         if (isread) {
4683             unallocated_encoding(s);
4684         } else {
4685             gen_update_pc(s, curr_insn_len(s));
4686             s->base.is_jmp = DISAS_WFI;
4687         }
4688         return;
4689     default:
4690         g_assert_not_reached();
4691     }
4692 
4693     if (ri->type & ARM_CP_IO) {
4694         /* I/O operations must end the TB here (whether read or write) */
4695         need_exit_tb = translator_io_start(&s->base);
4696     }
4697 
4698     if (isread) {
4699         /* Read */
4700         if (is64) {
4701             TCGv_i64 tmp64;
4702             TCGv_i32 tmp;
4703             if (ri->type & ARM_CP_CONST) {
4704                 tmp64 = tcg_constant_i64(ri->resetvalue);
4705             } else if (ri->readfn) {
4706                 if (!tcg_ri) {
4707                     tcg_ri = gen_lookup_cp_reg(key);
4708                 }
4709                 tmp64 = tcg_temp_new_i64();
4710                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4711             } else {
4712                 tmp64 = tcg_temp_new_i64();
4713                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4714             }
4715             tmp = tcg_temp_new_i32();
4716             tcg_gen_extrl_i64_i32(tmp, tmp64);
4717             store_reg(s, rt, tmp);
4718             tmp = tcg_temp_new_i32();
4719             tcg_gen_extrh_i64_i32(tmp, tmp64);
4720             store_reg(s, rt2, tmp);
4721         } else {
4722             TCGv_i32 tmp;
4723             if (ri->type & ARM_CP_CONST) {
4724                 tmp = tcg_constant_i32(ri->resetvalue);
4725             } else if (ri->readfn) {
4726                 if (!tcg_ri) {
4727                     tcg_ri = gen_lookup_cp_reg(key);
4728                 }
4729                 tmp = tcg_temp_new_i32();
4730                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4731             } else {
4732                 tmp = load_cpu_offset(ri->fieldoffset);
4733             }
4734             if (rt == 15) {
4735                 /* Destination register of r15 for 32 bit loads sets
4736                  * the condition codes from the high 4 bits of the value
4737                  */
4738                 gen_set_nzcv(tmp);
4739             } else {
4740                 store_reg(s, rt, tmp);
4741             }
4742         }
4743     } else {
4744         /* Write */
4745         if (ri->type & ARM_CP_CONST) {
4746             /* If not forbidden by access permissions, treat as WI */
4747             return;
4748         }
4749 
4750         if (is64) {
4751             TCGv_i32 tmplo, tmphi;
4752             TCGv_i64 tmp64 = tcg_temp_new_i64();
4753             tmplo = load_reg(s, rt);
4754             tmphi = load_reg(s, rt2);
4755             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4756             if (ri->writefn) {
4757                 if (!tcg_ri) {
4758                     tcg_ri = gen_lookup_cp_reg(key);
4759                 }
4760                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4761             } else {
4762                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4763             }
4764         } else {
4765             TCGv_i32 tmp = load_reg(s, rt);
4766             if (ri->writefn) {
4767                 if (!tcg_ri) {
4768                     tcg_ri = gen_lookup_cp_reg(key);
4769                 }
4770                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4771             } else {
4772                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4773             }
4774         }
4775     }
4776 
4777     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4778         /*
4779          * A write to any coprocessor register that ends a TB
4780          * must rebuild the hflags for the next TB.
4781          */
4782         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4783         /*
4784          * We default to ending the TB on a coprocessor register write,
4785          * but allow this to be suppressed by the register definition
4786          * (usually only necessary to work around guest bugs).
4787          */
4788         need_exit_tb = true;
4789     }
4790     if (need_exit_tb) {
4791         gen_lookup_tb(s);
4792     }
4793 }
4794 
4795 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4796 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4797 {
4798     int cpnum = (insn >> 8) & 0xf;
4799 
4800     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4801         unallocated_encoding(s);
4802     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4803         if (disas_iwmmxt_insn(s, insn)) {
4804             unallocated_encoding(s);
4805         }
4806     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4807         if (disas_dsp_insn(s, insn)) {
4808             unallocated_encoding(s);
4809         }
4810     }
4811 }
4812 
4813 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4814 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4815 {
4816     TCGv_i32 tmp;
4817     tmp = tcg_temp_new_i32();
4818     tcg_gen_extrl_i64_i32(tmp, val);
4819     store_reg(s, rlow, tmp);
4820     tmp = tcg_temp_new_i32();
4821     tcg_gen_extrh_i64_i32(tmp, val);
4822     store_reg(s, rhigh, tmp);
4823 }
4824 
4825 /* load and add a 64-bit value from a register pair.  */
4826 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4827 {
4828     TCGv_i64 tmp;
4829     TCGv_i32 tmpl;
4830     TCGv_i32 tmph;
4831 
4832     /* Load 64-bit value rd:rn.  */
4833     tmpl = load_reg(s, rlow);
4834     tmph = load_reg(s, rhigh);
4835     tmp = tcg_temp_new_i64();
4836     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4837     tcg_gen_add_i64(val, val, tmp);
4838 }
4839 
4840 /* Set N and Z flags from hi|lo.  */
4841 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4842 {
4843     tcg_gen_mov_i32(cpu_NF, hi);
4844     tcg_gen_or_i32(cpu_ZF, lo, hi);
4845 }
4846 
4847 /* Load/Store exclusive instructions are implemented by remembering
4848    the value/address loaded, and seeing if these are the same
4849    when the store is performed.  This should be sufficient to implement
4850    the architecturally mandated semantics, and avoids having to monitor
4851    regular stores.  The compare vs the remembered value is done during
4852    the cmpxchg operation, but we must compare the addresses manually.  */
4853 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4854                                TCGv_i32 addr, int size)
4855 {
4856     TCGv_i32 tmp = tcg_temp_new_i32();
4857     MemOp opc = size | MO_ALIGN | s->be_data;
4858 
4859     s->is_ldex = true;
4860 
4861     if (size == 3) {
4862         TCGv_i32 tmp2 = tcg_temp_new_i32();
4863         TCGv_i64 t64 = tcg_temp_new_i64();
4864 
4865         /*
4866          * For AArch32, architecturally the 32-bit word at the lowest
4867          * address is always Rt and the one at addr+4 is Rt2, even if
4868          * the CPU is big-endian. That means we don't want to do a
4869          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4870          * architecturally 64-bit access, but instead do a 64-bit access
4871          * using MO_BE if appropriate and then split the two halves.
4872          */
4873         TCGv taddr = gen_aa32_addr(s, addr, opc);
4874 
4875         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4876         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4877         if (s->be_data == MO_BE) {
4878             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4879         } else {
4880             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4881         }
4882         store_reg(s, rt2, tmp2);
4883     } else {
4884         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4885         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4886     }
4887 
4888     store_reg(s, rt, tmp);
4889     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4890 }
4891 
4892 static void gen_clrex(DisasContext *s)
4893 {
4894     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4895 }
4896 
4897 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4898                                 TCGv_i32 addr, int size)
4899 {
4900     TCGv_i32 t0, t1, t2;
4901     TCGv_i64 extaddr;
4902     TCGv taddr;
4903     TCGLabel *done_label;
4904     TCGLabel *fail_label;
4905     MemOp opc = size | MO_ALIGN | s->be_data;
4906 
4907     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4908          [addr] = {Rt};
4909          {Rd} = 0;
4910        } else {
4911          {Rd} = 1;
4912        } */
4913     fail_label = gen_new_label();
4914     done_label = gen_new_label();
4915     extaddr = tcg_temp_new_i64();
4916     tcg_gen_extu_i32_i64(extaddr, addr);
4917     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4918 
4919     taddr = gen_aa32_addr(s, addr, opc);
4920     t0 = tcg_temp_new_i32();
4921     t1 = load_reg(s, rt);
4922     if (size == 3) {
4923         TCGv_i64 o64 = tcg_temp_new_i64();
4924         TCGv_i64 n64 = tcg_temp_new_i64();
4925 
4926         t2 = load_reg(s, rt2);
4927 
4928         /*
4929          * For AArch32, architecturally the 32-bit word at the lowest
4930          * address is always Rt and the one at addr+4 is Rt2, even if
4931          * the CPU is big-endian. Since we're going to treat this as a
4932          * single 64-bit BE store, we need to put the two halves in the
4933          * opposite order for BE to LE, so that they end up in the right
4934          * places.  We don't want gen_aa32_st_i64, because that checks
4935          * SCTLR_B as if for an architectural 64-bit access.
4936          */
4937         if (s->be_data == MO_BE) {
4938             tcg_gen_concat_i32_i64(n64, t2, t1);
4939         } else {
4940             tcg_gen_concat_i32_i64(n64, t1, t2);
4941         }
4942 
4943         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4944                                    get_mem_index(s), opc);
4945 
4946         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4947         tcg_gen_extrl_i64_i32(t0, o64);
4948     } else {
4949         t2 = tcg_temp_new_i32();
4950         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4951         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4952         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4953     }
4954     tcg_gen_mov_i32(cpu_R[rd], t0);
4955     tcg_gen_br(done_label);
4956 
4957     gen_set_label(fail_label);
4958     tcg_gen_movi_i32(cpu_R[rd], 1);
4959     gen_set_label(done_label);
4960     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4961 }
4962 
4963 /* gen_srs:
4964  * @env: CPUARMState
4965  * @s: DisasContext
4966  * @mode: mode field from insn (which stack to store to)
4967  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4968  * @writeback: true if writeback bit set
4969  *
4970  * Generate code for the SRS (Store Return State) insn.
4971  */
4972 static void gen_srs(DisasContext *s,
4973                     uint32_t mode, uint32_t amode, bool writeback)
4974 {
4975     int32_t offset;
4976     TCGv_i32 addr, tmp;
4977     bool undef = false;
4978 
4979     /* SRS is:
4980      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4981      *   and specified mode is monitor mode
4982      * - UNDEFINED in Hyp mode
4983      * - UNPREDICTABLE in User or System mode
4984      * - UNPREDICTABLE if the specified mode is:
4985      * -- not implemented
4986      * -- not a valid mode number
4987      * -- a mode that's at a higher exception level
4988      * -- Monitor, if we are Non-secure
4989      * For the UNPREDICTABLE cases we choose to UNDEF.
4990      */
4991     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4992         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4993         return;
4994     }
4995 
4996     if (s->current_el == 0 || s->current_el == 2) {
4997         undef = true;
4998     }
4999 
5000     switch (mode) {
5001     case ARM_CPU_MODE_USR:
5002     case ARM_CPU_MODE_FIQ:
5003     case ARM_CPU_MODE_IRQ:
5004     case ARM_CPU_MODE_SVC:
5005     case ARM_CPU_MODE_ABT:
5006     case ARM_CPU_MODE_UND:
5007     case ARM_CPU_MODE_SYS:
5008         break;
5009     case ARM_CPU_MODE_HYP:
5010         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5011             undef = true;
5012         }
5013         break;
5014     case ARM_CPU_MODE_MON:
5015         /* No need to check specifically for "are we non-secure" because
5016          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5017          * so if this isn't EL3 then we must be non-secure.
5018          */
5019         if (s->current_el != 3) {
5020             undef = true;
5021         }
5022         break;
5023     default:
5024         undef = true;
5025     }
5026 
5027     if (undef) {
5028         unallocated_encoding(s);
5029         return;
5030     }
5031 
5032     addr = tcg_temp_new_i32();
5033     /* get_r13_banked() will raise an exception if called from System mode */
5034     gen_set_condexec(s);
5035     gen_update_pc(s, 0);
5036     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5037     switch (amode) {
5038     case 0: /* DA */
5039         offset = -4;
5040         break;
5041     case 1: /* IA */
5042         offset = 0;
5043         break;
5044     case 2: /* DB */
5045         offset = -8;
5046         break;
5047     case 3: /* IB */
5048         offset = 4;
5049         break;
5050     default:
5051         g_assert_not_reached();
5052     }
5053     tcg_gen_addi_i32(addr, addr, offset);
5054     tmp = load_reg(s, 14);
5055     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5056     tmp = load_cpu_field(spsr);
5057     tcg_gen_addi_i32(addr, addr, 4);
5058     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5059     if (writeback) {
5060         switch (amode) {
5061         case 0:
5062             offset = -8;
5063             break;
5064         case 1:
5065             offset = 4;
5066             break;
5067         case 2:
5068             offset = -4;
5069             break;
5070         case 3:
5071             offset = 0;
5072             break;
5073         default:
5074             g_assert_not_reached();
5075         }
5076         tcg_gen_addi_i32(addr, addr, offset);
5077         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5078     }
5079     s->base.is_jmp = DISAS_UPDATE_EXIT;
5080 }
5081 
5082 /* Skip this instruction if the ARM condition is false */
5083 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5084 {
5085     arm_gen_condlabel(s);
5086     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5087 }
5088 
5089 
5090 /*
5091  * Constant expanders used by T16/T32 decode
5092  */
5093 
5094 /* Return only the rotation part of T32ExpandImm.  */
5095 static int t32_expandimm_rot(DisasContext *s, int x)
5096 {
5097     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5098 }
5099 
5100 /* Return the unrotated immediate from T32ExpandImm.  */
5101 static int t32_expandimm_imm(DisasContext *s, int x)
5102 {
5103     int imm = extract32(x, 0, 8);
5104 
5105     switch (extract32(x, 8, 4)) {
5106     case 0: /* XY */
5107         /* Nothing to do.  */
5108         break;
5109     case 1: /* 00XY00XY */
5110         imm *= 0x00010001;
5111         break;
5112     case 2: /* XY00XY00 */
5113         imm *= 0x01000100;
5114         break;
5115     case 3: /* XYXYXYXY */
5116         imm *= 0x01010101;
5117         break;
5118     default:
5119         /* Rotated constant.  */
5120         imm |= 0x80;
5121         break;
5122     }
5123     return imm;
5124 }
5125 
5126 static int t32_branch24(DisasContext *s, int x)
5127 {
5128     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5129     x ^= !(x < 0) * (3 << 21);
5130     /* Append the final zero.  */
5131     return x << 1;
5132 }
5133 
5134 static int t16_setflags(DisasContext *s)
5135 {
5136     return s->condexec_mask == 0;
5137 }
5138 
5139 static int t16_push_list(DisasContext *s, int x)
5140 {
5141     return (x & 0xff) | (x & 0x100) << (14 - 8);
5142 }
5143 
5144 static int t16_pop_list(DisasContext *s, int x)
5145 {
5146     return (x & 0xff) | (x & 0x100) << (15 - 8);
5147 }
5148 
5149 /*
5150  * Include the generated decoders.
5151  */
5152 
5153 #include "decode-a32.c.inc"
5154 #include "decode-a32-uncond.c.inc"
5155 #include "decode-t32.c.inc"
5156 #include "decode-t16.c.inc"
5157 
5158 static bool valid_cp(DisasContext *s, int cp)
5159 {
5160     /*
5161      * Return true if this coprocessor field indicates something
5162      * that's really a possible coprocessor.
5163      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5164      * and of those only cp14 and cp15 were used for registers.
5165      * cp10 and cp11 were used for VFP and Neon, whose decode is
5166      * dealt with elsewhere. With the advent of fp16, cp9 is also
5167      * now part of VFP.
5168      * For v8A and later, the encoding has been tightened so that
5169      * only cp14 and cp15 are valid, and other values aren't considered
5170      * to be in the coprocessor-instruction space at all. v8M still
5171      * permits coprocessors 0..7.
5172      * For XScale, we must not decode the XScale cp0, cp1 space as
5173      * a standard coprocessor insn, because we want to fall through to
5174      * the legacy disas_xscale_insn() decoder after decodetree is done.
5175      */
5176     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5177         return false;
5178     }
5179 
5180     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5181         !arm_dc_feature(s, ARM_FEATURE_M)) {
5182         return cp >= 14;
5183     }
5184     return cp < 8 || cp >= 14;
5185 }
5186 
5187 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5188 {
5189     if (!valid_cp(s, a->cp)) {
5190         return false;
5191     }
5192     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5193                    false, a->rt, 0);
5194     return true;
5195 }
5196 
5197 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5198 {
5199     if (!valid_cp(s, a->cp)) {
5200         return false;
5201     }
5202     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5203                    true, a->rt, 0);
5204     return true;
5205 }
5206 
5207 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5208 {
5209     if (!valid_cp(s, a->cp)) {
5210         return false;
5211     }
5212     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5213                    false, a->rt, a->rt2);
5214     return true;
5215 }
5216 
5217 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5218 {
5219     if (!valid_cp(s, a->cp)) {
5220         return false;
5221     }
5222     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5223                    true, a->rt, a->rt2);
5224     return true;
5225 }
5226 
5227 /* Helpers to swap operands for reverse-subtract.  */
5228 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5229 {
5230     tcg_gen_sub_i32(dst, b, a);
5231 }
5232 
5233 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5234 {
5235     gen_sub_CC(dst, b, a);
5236 }
5237 
5238 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5239 {
5240     gen_sub_carry(dest, b, a);
5241 }
5242 
5243 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5244 {
5245     gen_sbc_CC(dest, b, a);
5246 }
5247 
5248 /*
5249  * Helpers for the data processing routines.
5250  *
5251  * After the computation store the results back.
5252  * This may be suppressed altogether (STREG_NONE), require a runtime
5253  * check against the stack limits (STREG_SP_CHECK), or generate an
5254  * exception return.  Oh, or store into a register.
5255  *
5256  * Always return true, indicating success for a trans_* function.
5257  */
5258 typedef enum {
5259    STREG_NONE,
5260    STREG_NORMAL,
5261    STREG_SP_CHECK,
5262    STREG_EXC_RET,
5263 } StoreRegKind;
5264 
5265 static bool store_reg_kind(DisasContext *s, int rd,
5266                             TCGv_i32 val, StoreRegKind kind)
5267 {
5268     switch (kind) {
5269     case STREG_NONE:
5270         return true;
5271     case STREG_NORMAL:
5272         /* See ALUWritePC: Interworking only from a32 mode. */
5273         if (s->thumb) {
5274             store_reg(s, rd, val);
5275         } else {
5276             store_reg_bx(s, rd, val);
5277         }
5278         return true;
5279     case STREG_SP_CHECK:
5280         store_sp_checked(s, val);
5281         return true;
5282     case STREG_EXC_RET:
5283         gen_exception_return(s, val);
5284         return true;
5285     }
5286     g_assert_not_reached();
5287 }
5288 
5289 /*
5290  * Data Processing (register)
5291  *
5292  * Operate, with set flags, one register source,
5293  * one immediate shifted register source, and a destination.
5294  */
5295 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5296                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5297                          int logic_cc, StoreRegKind kind)
5298 {
5299     TCGv_i32 tmp1, tmp2;
5300 
5301     tmp2 = load_reg(s, a->rm);
5302     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5303     tmp1 = load_reg(s, a->rn);
5304 
5305     gen(tmp1, tmp1, tmp2);
5306 
5307     if (logic_cc) {
5308         gen_logic_CC(tmp1);
5309     }
5310     return store_reg_kind(s, a->rd, tmp1, kind);
5311 }
5312 
5313 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5314                          void (*gen)(TCGv_i32, TCGv_i32),
5315                          int logic_cc, StoreRegKind kind)
5316 {
5317     TCGv_i32 tmp;
5318 
5319     tmp = load_reg(s, a->rm);
5320     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5321 
5322     gen(tmp, tmp);
5323     if (logic_cc) {
5324         gen_logic_CC(tmp);
5325     }
5326     return store_reg_kind(s, a->rd, tmp, kind);
5327 }
5328 
5329 /*
5330  * Data-processing (register-shifted register)
5331  *
5332  * Operate, with set flags, one register source,
5333  * one register shifted register source, and a destination.
5334  */
5335 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5336                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5337                          int logic_cc, StoreRegKind kind)
5338 {
5339     TCGv_i32 tmp1, tmp2;
5340 
5341     tmp1 = load_reg(s, a->rs);
5342     tmp2 = load_reg(s, a->rm);
5343     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5344     tmp1 = load_reg(s, a->rn);
5345 
5346     gen(tmp1, tmp1, tmp2);
5347 
5348     if (logic_cc) {
5349         gen_logic_CC(tmp1);
5350     }
5351     return store_reg_kind(s, a->rd, tmp1, kind);
5352 }
5353 
5354 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5355                          void (*gen)(TCGv_i32, TCGv_i32),
5356                          int logic_cc, StoreRegKind kind)
5357 {
5358     TCGv_i32 tmp1, tmp2;
5359 
5360     tmp1 = load_reg(s, a->rs);
5361     tmp2 = load_reg(s, a->rm);
5362     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5363 
5364     gen(tmp2, tmp2);
5365     if (logic_cc) {
5366         gen_logic_CC(tmp2);
5367     }
5368     return store_reg_kind(s, a->rd, tmp2, kind);
5369 }
5370 
5371 /*
5372  * Data-processing (immediate)
5373  *
5374  * Operate, with set flags, one register source,
5375  * one rotated immediate, and a destination.
5376  *
5377  * Note that logic_cc && a->rot setting CF based on the msb of the
5378  * immediate is the reason why we must pass in the unrotated form
5379  * of the immediate.
5380  */
5381 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5382                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5383                          int logic_cc, StoreRegKind kind)
5384 {
5385     TCGv_i32 tmp1;
5386     uint32_t imm;
5387 
5388     imm = ror32(a->imm, a->rot);
5389     if (logic_cc && a->rot) {
5390         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5391     }
5392     tmp1 = load_reg(s, a->rn);
5393 
5394     gen(tmp1, tmp1, tcg_constant_i32(imm));
5395 
5396     if (logic_cc) {
5397         gen_logic_CC(tmp1);
5398     }
5399     return store_reg_kind(s, a->rd, tmp1, kind);
5400 }
5401 
5402 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5403                          void (*gen)(TCGv_i32, TCGv_i32),
5404                          int logic_cc, StoreRegKind kind)
5405 {
5406     TCGv_i32 tmp;
5407     uint32_t imm;
5408 
5409     imm = ror32(a->imm, a->rot);
5410     if (logic_cc && a->rot) {
5411         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5412     }
5413 
5414     tmp = tcg_temp_new_i32();
5415     gen(tmp, tcg_constant_i32(imm));
5416 
5417     if (logic_cc) {
5418         gen_logic_CC(tmp);
5419     }
5420     return store_reg_kind(s, a->rd, tmp, kind);
5421 }
5422 
5423 #define DO_ANY3(NAME, OP, L, K)                                         \
5424     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5425     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5426     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5427     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5428     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5429     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5430 
5431 #define DO_ANY2(NAME, OP, L, K)                                         \
5432     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5433     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5434     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5435     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5436     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5437     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5438 
5439 #define DO_CMP2(NAME, OP, L)                                            \
5440     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5441     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5442     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5443     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5444     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5445     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5446 
5447 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5448 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5449 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5450 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5451 
5452 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5453 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5454 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5455 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5456 
5457 DO_CMP2(TST, tcg_gen_and_i32, true)
5458 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5459 DO_CMP2(CMN, gen_add_CC, false)
5460 DO_CMP2(CMP, gen_sub_CC, false)
5461 
5462 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5463         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5464 
5465 /*
5466  * Note for the computation of StoreRegKind we return out of the
5467  * middle of the functions that are expanded by DO_ANY3, and that
5468  * we modify a->s via that parameter before it is used by OP.
5469  */
5470 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5471         ({
5472             StoreRegKind ret = STREG_NORMAL;
5473             if (a->rd == 15 && a->s) {
5474                 /*
5475                  * See ALUExceptionReturn:
5476                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5477                  * In Hyp mode, UNDEFINED.
5478                  */
5479                 if (IS_USER(s) || s->current_el == 2) {
5480                     unallocated_encoding(s);
5481                     return true;
5482                 }
5483                 /* There is no writeback of nzcv to PSTATE.  */
5484                 a->s = 0;
5485                 ret = STREG_EXC_RET;
5486             } else if (a->rd == 13 && a->rn == 13) {
5487                 ret = STREG_SP_CHECK;
5488             }
5489             ret;
5490         }))
5491 
5492 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5493         ({
5494             StoreRegKind ret = STREG_NORMAL;
5495             if (a->rd == 15 && a->s) {
5496                 /*
5497                  * See ALUExceptionReturn:
5498                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5499                  * In Hyp mode, UNDEFINED.
5500                  */
5501                 if (IS_USER(s) || s->current_el == 2) {
5502                     unallocated_encoding(s);
5503                     return true;
5504                 }
5505                 /* There is no writeback of nzcv to PSTATE.  */
5506                 a->s = 0;
5507                 ret = STREG_EXC_RET;
5508             } else if (a->rd == 13) {
5509                 ret = STREG_SP_CHECK;
5510             }
5511             ret;
5512         }))
5513 
5514 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5515 
5516 /*
5517  * ORN is only available with T32, so there is no register-shifted-register
5518  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5519  */
5520 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5521 {
5522     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5523 }
5524 
5525 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5526 {
5527     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5528 }
5529 
5530 #undef DO_ANY3
5531 #undef DO_ANY2
5532 #undef DO_CMP2
5533 
5534 static bool trans_ADR(DisasContext *s, arg_ri *a)
5535 {
5536     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5537     return true;
5538 }
5539 
5540 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5541 {
5542     if (!ENABLE_ARCH_6T2) {
5543         return false;
5544     }
5545 
5546     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5547     return true;
5548 }
5549 
5550 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5551 {
5552     TCGv_i32 tmp;
5553 
5554     if (!ENABLE_ARCH_6T2) {
5555         return false;
5556     }
5557 
5558     tmp = load_reg(s, a->rd);
5559     tcg_gen_ext16u_i32(tmp, tmp);
5560     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5561     store_reg(s, a->rd, tmp);
5562     return true;
5563 }
5564 
5565 /*
5566  * v8.1M MVE wide-shifts
5567  */
5568 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5569                           WideShiftImmFn *fn)
5570 {
5571     TCGv_i64 rda;
5572     TCGv_i32 rdalo, rdahi;
5573 
5574     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5575         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5576         return false;
5577     }
5578     if (a->rdahi == 15) {
5579         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5580         return false;
5581     }
5582     if (!dc_isar_feature(aa32_mve, s) ||
5583         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5584         a->rdahi == 13) {
5585         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5586         unallocated_encoding(s);
5587         return true;
5588     }
5589 
5590     if (a->shim == 0) {
5591         a->shim = 32;
5592     }
5593 
5594     rda = tcg_temp_new_i64();
5595     rdalo = load_reg(s, a->rdalo);
5596     rdahi = load_reg(s, a->rdahi);
5597     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5598 
5599     fn(rda, rda, a->shim);
5600 
5601     tcg_gen_extrl_i64_i32(rdalo, rda);
5602     tcg_gen_extrh_i64_i32(rdahi, rda);
5603     store_reg(s, a->rdalo, rdalo);
5604     store_reg(s, a->rdahi, rdahi);
5605 
5606     return true;
5607 }
5608 
5609 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5610 {
5611     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5612 }
5613 
5614 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5615 {
5616     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5617 }
5618 
5619 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5620 {
5621     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5622 }
5623 
5624 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5625 {
5626     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5627 }
5628 
5629 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5630 {
5631     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5632 }
5633 
5634 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5635 {
5636     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5637 }
5638 
5639 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5640 {
5641     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5642 }
5643 
5644 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5645 {
5646     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5647 }
5648 
5649 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5650 {
5651     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5652 }
5653 
5654 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5655 {
5656     TCGv_i64 rda;
5657     TCGv_i32 rdalo, rdahi;
5658 
5659     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5660         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5661         return false;
5662     }
5663     if (a->rdahi == 15) {
5664         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5665         return false;
5666     }
5667     if (!dc_isar_feature(aa32_mve, s) ||
5668         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5669         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5670         a->rm == a->rdahi || a->rm == a->rdalo) {
5671         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5672         unallocated_encoding(s);
5673         return true;
5674     }
5675 
5676     rda = tcg_temp_new_i64();
5677     rdalo = load_reg(s, a->rdalo);
5678     rdahi = load_reg(s, a->rdahi);
5679     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5680 
5681     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5682     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5683 
5684     tcg_gen_extrl_i64_i32(rdalo, rda);
5685     tcg_gen_extrh_i64_i32(rdahi, rda);
5686     store_reg(s, a->rdalo, rdalo);
5687     store_reg(s, a->rdahi, rdahi);
5688 
5689     return true;
5690 }
5691 
5692 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5693 {
5694     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5695 }
5696 
5697 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5698 {
5699     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5700 }
5701 
5702 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5703 {
5704     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5705 }
5706 
5707 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5708 {
5709     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5710 }
5711 
5712 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5713 {
5714     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5715 }
5716 
5717 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5718 {
5719     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5720 }
5721 
5722 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5723 {
5724     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5725         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5726         return false;
5727     }
5728     if (!dc_isar_feature(aa32_mve, s) ||
5729         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5730         a->rda == 13 || a->rda == 15) {
5731         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5732         unallocated_encoding(s);
5733         return true;
5734     }
5735 
5736     if (a->shim == 0) {
5737         a->shim = 32;
5738     }
5739     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5740 
5741     return true;
5742 }
5743 
5744 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5745 {
5746     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5747 }
5748 
5749 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5750 {
5751     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5752 }
5753 
5754 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5755 {
5756     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5757 }
5758 
5759 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5760 {
5761     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5762 }
5763 
5764 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5765 {
5766     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5767 }
5768 
5769 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5770 {
5771     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5772 }
5773 
5774 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5775 {
5776     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5777         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5778         return false;
5779     }
5780     if (!dc_isar_feature(aa32_mve, s) ||
5781         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5782         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5783         a->rm == a->rda) {
5784         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5785         unallocated_encoding(s);
5786         return true;
5787     }
5788 
5789     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5790     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5791     return true;
5792 }
5793 
5794 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5795 {
5796     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5797 }
5798 
5799 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5800 {
5801     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5802 }
5803 
5804 /*
5805  * Multiply and multiply accumulate
5806  */
5807 
5808 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5809 {
5810     TCGv_i32 t1, t2;
5811 
5812     t1 = load_reg(s, a->rn);
5813     t2 = load_reg(s, a->rm);
5814     tcg_gen_mul_i32(t1, t1, t2);
5815     if (add) {
5816         t2 = load_reg(s, a->ra);
5817         tcg_gen_add_i32(t1, t1, t2);
5818     }
5819     if (a->s) {
5820         gen_logic_CC(t1);
5821     }
5822     store_reg(s, a->rd, t1);
5823     return true;
5824 }
5825 
5826 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5827 {
5828     return op_mla(s, a, false);
5829 }
5830 
5831 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5832 {
5833     return op_mla(s, a, true);
5834 }
5835 
5836 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5837 {
5838     TCGv_i32 t1, t2;
5839 
5840     if (!ENABLE_ARCH_6T2) {
5841         return false;
5842     }
5843     t1 = load_reg(s, a->rn);
5844     t2 = load_reg(s, a->rm);
5845     tcg_gen_mul_i32(t1, t1, t2);
5846     t2 = load_reg(s, a->ra);
5847     tcg_gen_sub_i32(t1, t2, t1);
5848     store_reg(s, a->rd, t1);
5849     return true;
5850 }
5851 
5852 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5853 {
5854     TCGv_i32 t0, t1, t2, t3;
5855 
5856     t0 = load_reg(s, a->rm);
5857     t1 = load_reg(s, a->rn);
5858     if (uns) {
5859         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5860     } else {
5861         tcg_gen_muls2_i32(t0, t1, t0, t1);
5862     }
5863     if (add) {
5864         t2 = load_reg(s, a->ra);
5865         t3 = load_reg(s, a->rd);
5866         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5867     }
5868     if (a->s) {
5869         gen_logicq_cc(t0, t1);
5870     }
5871     store_reg(s, a->ra, t0);
5872     store_reg(s, a->rd, t1);
5873     return true;
5874 }
5875 
5876 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5877 {
5878     return op_mlal(s, a, true, false);
5879 }
5880 
5881 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5882 {
5883     return op_mlal(s, a, false, false);
5884 }
5885 
5886 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5887 {
5888     return op_mlal(s, a, true, true);
5889 }
5890 
5891 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5892 {
5893     return op_mlal(s, a, false, true);
5894 }
5895 
5896 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5897 {
5898     TCGv_i32 t0, t1, t2, zero;
5899 
5900     if (s->thumb
5901         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5902         : !ENABLE_ARCH_6) {
5903         return false;
5904     }
5905 
5906     t0 = load_reg(s, a->rm);
5907     t1 = load_reg(s, a->rn);
5908     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5909     zero = tcg_constant_i32(0);
5910     t2 = load_reg(s, a->ra);
5911     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5912     t2 = load_reg(s, a->rd);
5913     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5914     store_reg(s, a->ra, t0);
5915     store_reg(s, a->rd, t1);
5916     return true;
5917 }
5918 
5919 /*
5920  * Saturating addition and subtraction
5921  */
5922 
5923 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5924 {
5925     TCGv_i32 t0, t1;
5926 
5927     if (s->thumb
5928         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5929         : !ENABLE_ARCH_5TE) {
5930         return false;
5931     }
5932 
5933     t0 = load_reg(s, a->rm);
5934     t1 = load_reg(s, a->rn);
5935     if (doub) {
5936         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5937     }
5938     if (add) {
5939         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5940     } else {
5941         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5942     }
5943     store_reg(s, a->rd, t0);
5944     return true;
5945 }
5946 
5947 #define DO_QADDSUB(NAME, ADD, DOUB) \
5948 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5949 {                                                        \
5950     return op_qaddsub(s, a, ADD, DOUB);                  \
5951 }
5952 
5953 DO_QADDSUB(QADD, true, false)
5954 DO_QADDSUB(QSUB, false, false)
5955 DO_QADDSUB(QDADD, true, true)
5956 DO_QADDSUB(QDSUB, false, true)
5957 
5958 #undef DO_QADDSUB
5959 
5960 /*
5961  * Halfword multiply and multiply accumulate
5962  */
5963 
5964 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5965                        int add_long, bool nt, bool mt)
5966 {
5967     TCGv_i32 t0, t1, tl, th;
5968 
5969     if (s->thumb
5970         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5971         : !ENABLE_ARCH_5TE) {
5972         return false;
5973     }
5974 
5975     t0 = load_reg(s, a->rn);
5976     t1 = load_reg(s, a->rm);
5977     gen_mulxy(t0, t1, nt, mt);
5978 
5979     switch (add_long) {
5980     case 0:
5981         store_reg(s, a->rd, t0);
5982         break;
5983     case 1:
5984         t1 = load_reg(s, a->ra);
5985         gen_helper_add_setq(t0, cpu_env, t0, t1);
5986         store_reg(s, a->rd, t0);
5987         break;
5988     case 2:
5989         tl = load_reg(s, a->ra);
5990         th = load_reg(s, a->rd);
5991         /* Sign-extend the 32-bit product to 64 bits.  */
5992         t1 = tcg_temp_new_i32();
5993         tcg_gen_sari_i32(t1, t0, 31);
5994         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5995         store_reg(s, a->ra, tl);
5996         store_reg(s, a->rd, th);
5997         break;
5998     default:
5999         g_assert_not_reached();
6000     }
6001     return true;
6002 }
6003 
6004 #define DO_SMLAX(NAME, add, nt, mt) \
6005 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6006 {                                                          \
6007     return op_smlaxxx(s, a, add, nt, mt);                  \
6008 }
6009 
6010 DO_SMLAX(SMULBB, 0, 0, 0)
6011 DO_SMLAX(SMULBT, 0, 0, 1)
6012 DO_SMLAX(SMULTB, 0, 1, 0)
6013 DO_SMLAX(SMULTT, 0, 1, 1)
6014 
6015 DO_SMLAX(SMLABB, 1, 0, 0)
6016 DO_SMLAX(SMLABT, 1, 0, 1)
6017 DO_SMLAX(SMLATB, 1, 1, 0)
6018 DO_SMLAX(SMLATT, 1, 1, 1)
6019 
6020 DO_SMLAX(SMLALBB, 2, 0, 0)
6021 DO_SMLAX(SMLALBT, 2, 0, 1)
6022 DO_SMLAX(SMLALTB, 2, 1, 0)
6023 DO_SMLAX(SMLALTT, 2, 1, 1)
6024 
6025 #undef DO_SMLAX
6026 
6027 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6028 {
6029     TCGv_i32 t0, t1;
6030 
6031     if (!ENABLE_ARCH_5TE) {
6032         return false;
6033     }
6034 
6035     t0 = load_reg(s, a->rn);
6036     t1 = load_reg(s, a->rm);
6037     /*
6038      * Since the nominal result is product<47:16>, shift the 16-bit
6039      * input up by 16 bits, so that the result is at product<63:32>.
6040      */
6041     if (mt) {
6042         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6043     } else {
6044         tcg_gen_shli_i32(t1, t1, 16);
6045     }
6046     tcg_gen_muls2_i32(t0, t1, t0, t1);
6047     if (add) {
6048         t0 = load_reg(s, a->ra);
6049         gen_helper_add_setq(t1, cpu_env, t1, t0);
6050     }
6051     store_reg(s, a->rd, t1);
6052     return true;
6053 }
6054 
6055 #define DO_SMLAWX(NAME, add, mt) \
6056 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6057 {                                                          \
6058     return op_smlawx(s, a, add, mt);                       \
6059 }
6060 
6061 DO_SMLAWX(SMULWB, 0, 0)
6062 DO_SMLAWX(SMULWT, 0, 1)
6063 DO_SMLAWX(SMLAWB, 1, 0)
6064 DO_SMLAWX(SMLAWT, 1, 1)
6065 
6066 #undef DO_SMLAWX
6067 
6068 /*
6069  * MSR (immediate) and hints
6070  */
6071 
6072 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6073 {
6074     /*
6075      * When running single-threaded TCG code, use the helper to ensure that
6076      * the next round-robin scheduled vCPU gets a crack.  When running in
6077      * MTTCG we don't generate jumps to the helper as it won't affect the
6078      * scheduling of other vCPUs.
6079      */
6080     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6081         gen_update_pc(s, curr_insn_len(s));
6082         s->base.is_jmp = DISAS_YIELD;
6083     }
6084     return true;
6085 }
6086 
6087 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6088 {
6089     /*
6090      * When running single-threaded TCG code, use the helper to ensure that
6091      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6092      * just skip this instruction.  Currently the SEV/SEVL instructions,
6093      * which are *one* of many ways to wake the CPU from WFE, are not
6094      * implemented so we can't sleep like WFI does.
6095      */
6096     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6097         gen_update_pc(s, curr_insn_len(s));
6098         s->base.is_jmp = DISAS_WFE;
6099     }
6100     return true;
6101 }
6102 
6103 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6104 {
6105     /* For WFI, halt the vCPU until an IRQ. */
6106     gen_update_pc(s, curr_insn_len(s));
6107     s->base.is_jmp = DISAS_WFI;
6108     return true;
6109 }
6110 
6111 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6112 {
6113     /*
6114      * For M-profile, minimal-RAS ESB can be a NOP.
6115      * Without RAS, we must implement this as NOP.
6116      */
6117     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6118         /*
6119          * QEMU does not have a source of physical SErrors,
6120          * so we are only concerned with virtual SErrors.
6121          * The pseudocode in the ARM for this case is
6122          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6123          *      AArch32.vESBOperation();
6124          * Most of the condition can be evaluated at translation time.
6125          * Test for EL2 present, and defer test for SEL2 to runtime.
6126          */
6127         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6128             gen_helper_vesb(cpu_env);
6129         }
6130     }
6131     return true;
6132 }
6133 
6134 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6135 {
6136     return true;
6137 }
6138 
6139 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6140 {
6141     uint32_t val = ror32(a->imm, a->rot * 2);
6142     uint32_t mask = msr_mask(s, a->mask, a->r);
6143 
6144     if (gen_set_psr_im(s, mask, a->r, val)) {
6145         unallocated_encoding(s);
6146     }
6147     return true;
6148 }
6149 
6150 /*
6151  * Cyclic Redundancy Check
6152  */
6153 
6154 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6155 {
6156     TCGv_i32 t1, t2, t3;
6157 
6158     if (!dc_isar_feature(aa32_crc32, s)) {
6159         return false;
6160     }
6161 
6162     t1 = load_reg(s, a->rn);
6163     t2 = load_reg(s, a->rm);
6164     switch (sz) {
6165     case MO_8:
6166         gen_uxtb(t2);
6167         break;
6168     case MO_16:
6169         gen_uxth(t2);
6170         break;
6171     case MO_32:
6172         break;
6173     default:
6174         g_assert_not_reached();
6175     }
6176     t3 = tcg_constant_i32(1 << sz);
6177     if (c) {
6178         gen_helper_crc32c(t1, t1, t2, t3);
6179     } else {
6180         gen_helper_crc32(t1, t1, t2, t3);
6181     }
6182     store_reg(s, a->rd, t1);
6183     return true;
6184 }
6185 
6186 #define DO_CRC32(NAME, c, sz) \
6187 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6188     { return op_crc32(s, a, c, sz); }
6189 
6190 DO_CRC32(CRC32B, false, MO_8)
6191 DO_CRC32(CRC32H, false, MO_16)
6192 DO_CRC32(CRC32W, false, MO_32)
6193 DO_CRC32(CRC32CB, true, MO_8)
6194 DO_CRC32(CRC32CH, true, MO_16)
6195 DO_CRC32(CRC32CW, true, MO_32)
6196 
6197 #undef DO_CRC32
6198 
6199 /*
6200  * Miscellaneous instructions
6201  */
6202 
6203 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6204 {
6205     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6206         return false;
6207     }
6208     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6209     return true;
6210 }
6211 
6212 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6213 {
6214     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6215         return false;
6216     }
6217     gen_msr_banked(s, a->r, a->sysm, a->rn);
6218     return true;
6219 }
6220 
6221 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6222 {
6223     TCGv_i32 tmp;
6224 
6225     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6226         return false;
6227     }
6228     if (a->r) {
6229         if (IS_USER(s)) {
6230             unallocated_encoding(s);
6231             return true;
6232         }
6233         tmp = load_cpu_field(spsr);
6234     } else {
6235         tmp = tcg_temp_new_i32();
6236         gen_helper_cpsr_read(tmp, cpu_env);
6237     }
6238     store_reg(s, a->rd, tmp);
6239     return true;
6240 }
6241 
6242 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6243 {
6244     TCGv_i32 tmp;
6245     uint32_t mask = msr_mask(s, a->mask, a->r);
6246 
6247     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6248         return false;
6249     }
6250     tmp = load_reg(s, a->rn);
6251     if (gen_set_psr(s, mask, a->r, tmp)) {
6252         unallocated_encoding(s);
6253     }
6254     return true;
6255 }
6256 
6257 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6258 {
6259     TCGv_i32 tmp;
6260 
6261     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6262         return false;
6263     }
6264     tmp = tcg_temp_new_i32();
6265     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6266     store_reg(s, a->rd, tmp);
6267     return true;
6268 }
6269 
6270 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6271 {
6272     TCGv_i32 addr, reg;
6273 
6274     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6275         return false;
6276     }
6277     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6278     reg = load_reg(s, a->rn);
6279     gen_helper_v7m_msr(cpu_env, addr, reg);
6280     /* If we wrote to CONTROL, the EL might have changed */
6281     gen_rebuild_hflags(s, true);
6282     gen_lookup_tb(s);
6283     return true;
6284 }
6285 
6286 static bool trans_BX(DisasContext *s, arg_BX *a)
6287 {
6288     if (!ENABLE_ARCH_4T) {
6289         return false;
6290     }
6291     gen_bx_excret(s, load_reg(s, a->rm));
6292     return true;
6293 }
6294 
6295 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6296 {
6297     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6298         return false;
6299     }
6300     /*
6301      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6302      * TBFLAGS bit on a basically-never-happens case, so call a helper
6303      * function to check for the trap and raise the exception if needed
6304      * (passing it the register number for the syndrome value).
6305      * v8A doesn't have this HSTR bit.
6306      */
6307     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6308         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6309         s->current_el < 2 && s->ns) {
6310         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6311     }
6312     /* Trivial implementation equivalent to bx.  */
6313     gen_bx(s, load_reg(s, a->rm));
6314     return true;
6315 }
6316 
6317 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6318 {
6319     TCGv_i32 tmp;
6320 
6321     if (!ENABLE_ARCH_5) {
6322         return false;
6323     }
6324     tmp = load_reg(s, a->rm);
6325     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6326     gen_bx(s, tmp);
6327     return true;
6328 }
6329 
6330 /*
6331  * BXNS/BLXNS: only exist for v8M with the security extensions,
6332  * and always UNDEF if NonSecure.  We don't implement these in
6333  * the user-only mode either (in theory you can use them from
6334  * Secure User mode but they are too tied in to system emulation).
6335  */
6336 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6337 {
6338     if (!s->v8m_secure || IS_USER_ONLY) {
6339         unallocated_encoding(s);
6340     } else {
6341         gen_bxns(s, a->rm);
6342     }
6343     return true;
6344 }
6345 
6346 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6347 {
6348     if (!s->v8m_secure || IS_USER_ONLY) {
6349         unallocated_encoding(s);
6350     } else {
6351         gen_blxns(s, a->rm);
6352     }
6353     return true;
6354 }
6355 
6356 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6357 {
6358     TCGv_i32 tmp;
6359 
6360     if (!ENABLE_ARCH_5) {
6361         return false;
6362     }
6363     tmp = load_reg(s, a->rm);
6364     tcg_gen_clzi_i32(tmp, tmp, 32);
6365     store_reg(s, a->rd, tmp);
6366     return true;
6367 }
6368 
6369 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6370 {
6371     TCGv_i32 tmp;
6372 
6373     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6374         return false;
6375     }
6376     if (IS_USER(s)) {
6377         unallocated_encoding(s);
6378         return true;
6379     }
6380     if (s->current_el == 2) {
6381         /* ERET from Hyp uses ELR_Hyp, not LR */
6382         tmp = load_cpu_field_low32(elr_el[2]);
6383     } else {
6384         tmp = load_reg(s, 14);
6385     }
6386     gen_exception_return(s, tmp);
6387     return true;
6388 }
6389 
6390 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6391 {
6392     gen_hlt(s, a->imm);
6393     return true;
6394 }
6395 
6396 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6397 {
6398     if (!ENABLE_ARCH_5) {
6399         return false;
6400     }
6401     /* BKPT is OK with ECI set and leaves it untouched */
6402     s->eci_handled = true;
6403     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6404         semihosting_enabled(s->current_el == 0) &&
6405         (a->imm == 0xab)) {
6406         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6407     } else {
6408         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6409     }
6410     return true;
6411 }
6412 
6413 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6414 {
6415     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6416         return false;
6417     }
6418     if (IS_USER(s)) {
6419         unallocated_encoding(s);
6420     } else {
6421         gen_hvc(s, a->imm);
6422     }
6423     return true;
6424 }
6425 
6426 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6427 {
6428     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6429         return false;
6430     }
6431     if (IS_USER(s)) {
6432         unallocated_encoding(s);
6433     } else {
6434         gen_smc(s);
6435     }
6436     return true;
6437 }
6438 
6439 static bool trans_SG(DisasContext *s, arg_SG *a)
6440 {
6441     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6442         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6443         return false;
6444     }
6445     /*
6446      * SG (v8M only)
6447      * The bulk of the behaviour for this instruction is implemented
6448      * in v7m_handle_execute_nsc(), which deals with the insn when
6449      * it is executed by a CPU in non-secure state from memory
6450      * which is Secure & NonSecure-Callable.
6451      * Here we only need to handle the remaining cases:
6452      *  * in NS memory (including the "security extension not
6453      *    implemented" case) : NOP
6454      *  * in S memory but CPU already secure (clear IT bits)
6455      * We know that the attribute for the memory this insn is
6456      * in must match the current CPU state, because otherwise
6457      * get_phys_addr_pmsav8 would have generated an exception.
6458      */
6459     if (s->v8m_secure) {
6460         /* Like the IT insn, we don't need to generate any code */
6461         s->condexec_cond = 0;
6462         s->condexec_mask = 0;
6463     }
6464     return true;
6465 }
6466 
6467 static bool trans_TT(DisasContext *s, arg_TT *a)
6468 {
6469     TCGv_i32 addr, tmp;
6470 
6471     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6472         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6473         return false;
6474     }
6475     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6476         /* We UNDEF for these UNPREDICTABLE cases */
6477         unallocated_encoding(s);
6478         return true;
6479     }
6480     if (a->A && !s->v8m_secure) {
6481         /* This case is UNDEFINED.  */
6482         unallocated_encoding(s);
6483         return true;
6484     }
6485 
6486     addr = load_reg(s, a->rn);
6487     tmp = tcg_temp_new_i32();
6488     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6489     store_reg(s, a->rd, tmp);
6490     return true;
6491 }
6492 
6493 /*
6494  * Load/store register index
6495  */
6496 
6497 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6498 {
6499     ISSInfo ret;
6500 
6501     /* ISS not valid if writeback */
6502     if (p && !w) {
6503         ret = rd;
6504         if (curr_insn_len(s) == 2) {
6505             ret |= ISSIs16Bit;
6506         }
6507     } else {
6508         ret = ISSInvalid;
6509     }
6510     return ret;
6511 }
6512 
6513 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6514 {
6515     TCGv_i32 addr = load_reg(s, a->rn);
6516 
6517     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6518         gen_helper_v8m_stackcheck(cpu_env, addr);
6519     }
6520 
6521     if (a->p) {
6522         TCGv_i32 ofs = load_reg(s, a->rm);
6523         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6524         if (a->u) {
6525             tcg_gen_add_i32(addr, addr, ofs);
6526         } else {
6527             tcg_gen_sub_i32(addr, addr, ofs);
6528         }
6529     }
6530     return addr;
6531 }
6532 
6533 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6534                             TCGv_i32 addr, int address_offset)
6535 {
6536     if (!a->p) {
6537         TCGv_i32 ofs = load_reg(s, a->rm);
6538         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6539         if (a->u) {
6540             tcg_gen_add_i32(addr, addr, ofs);
6541         } else {
6542             tcg_gen_sub_i32(addr, addr, ofs);
6543         }
6544     } else if (!a->w) {
6545         return;
6546     }
6547     tcg_gen_addi_i32(addr, addr, address_offset);
6548     store_reg(s, a->rn, addr);
6549 }
6550 
6551 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6552                        MemOp mop, int mem_idx)
6553 {
6554     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6555     TCGv_i32 addr, tmp;
6556 
6557     addr = op_addr_rr_pre(s, a);
6558 
6559     tmp = tcg_temp_new_i32();
6560     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6561     disas_set_da_iss(s, mop, issinfo);
6562 
6563     /*
6564      * Perform base writeback before the loaded value to
6565      * ensure correct behavior with overlapping index registers.
6566      */
6567     op_addr_rr_post(s, a, addr, 0);
6568     store_reg_from_load(s, a->rt, tmp);
6569     return true;
6570 }
6571 
6572 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6573                         MemOp mop, int mem_idx)
6574 {
6575     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6576     TCGv_i32 addr, tmp;
6577 
6578     /*
6579      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6580      * is either UNPREDICTABLE or has defined behaviour
6581      */
6582     if (s->thumb && a->rn == 15) {
6583         return false;
6584     }
6585 
6586     addr = op_addr_rr_pre(s, a);
6587 
6588     tmp = load_reg(s, a->rt);
6589     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6590     disas_set_da_iss(s, mop, issinfo);
6591 
6592     op_addr_rr_post(s, a, addr, 0);
6593     return true;
6594 }
6595 
6596 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6597 {
6598     int mem_idx = get_mem_index(s);
6599     TCGv_i32 addr, tmp;
6600 
6601     if (!ENABLE_ARCH_5TE) {
6602         return false;
6603     }
6604     if (a->rt & 1) {
6605         unallocated_encoding(s);
6606         return true;
6607     }
6608     addr = op_addr_rr_pre(s, a);
6609 
6610     tmp = tcg_temp_new_i32();
6611     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6612     store_reg(s, a->rt, tmp);
6613 
6614     tcg_gen_addi_i32(addr, addr, 4);
6615 
6616     tmp = tcg_temp_new_i32();
6617     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6618     store_reg(s, a->rt + 1, tmp);
6619 
6620     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6621     op_addr_rr_post(s, a, addr, -4);
6622     return true;
6623 }
6624 
6625 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6626 {
6627     int mem_idx = get_mem_index(s);
6628     TCGv_i32 addr, tmp;
6629 
6630     if (!ENABLE_ARCH_5TE) {
6631         return false;
6632     }
6633     if (a->rt & 1) {
6634         unallocated_encoding(s);
6635         return true;
6636     }
6637     addr = op_addr_rr_pre(s, a);
6638 
6639     tmp = load_reg(s, a->rt);
6640     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6641 
6642     tcg_gen_addi_i32(addr, addr, 4);
6643 
6644     tmp = load_reg(s, a->rt + 1);
6645     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6646 
6647     op_addr_rr_post(s, a, addr, -4);
6648     return true;
6649 }
6650 
6651 /*
6652  * Load/store immediate index
6653  */
6654 
6655 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6656 {
6657     int ofs = a->imm;
6658 
6659     if (!a->u) {
6660         ofs = -ofs;
6661     }
6662 
6663     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6664         /*
6665          * Stackcheck. Here we know 'addr' is the current SP;
6666          * U is set if we're moving SP up, else down. It is
6667          * UNKNOWN whether the limit check triggers when SP starts
6668          * below the limit and ends up above it; we chose to do so.
6669          */
6670         if (!a->u) {
6671             TCGv_i32 newsp = tcg_temp_new_i32();
6672             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6673             gen_helper_v8m_stackcheck(cpu_env, newsp);
6674         } else {
6675             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6676         }
6677     }
6678 
6679     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6680 }
6681 
6682 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6683                             TCGv_i32 addr, int address_offset)
6684 {
6685     if (!a->p) {
6686         if (a->u) {
6687             address_offset += a->imm;
6688         } else {
6689             address_offset -= a->imm;
6690         }
6691     } else if (!a->w) {
6692         return;
6693     }
6694     tcg_gen_addi_i32(addr, addr, address_offset);
6695     store_reg(s, a->rn, addr);
6696 }
6697 
6698 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6699                        MemOp mop, int mem_idx)
6700 {
6701     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6702     TCGv_i32 addr, tmp;
6703 
6704     addr = op_addr_ri_pre(s, a);
6705 
6706     tmp = tcg_temp_new_i32();
6707     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6708     disas_set_da_iss(s, mop, issinfo);
6709 
6710     /*
6711      * Perform base writeback before the loaded value to
6712      * ensure correct behavior with overlapping index registers.
6713      */
6714     op_addr_ri_post(s, a, addr, 0);
6715     store_reg_from_load(s, a->rt, tmp);
6716     return true;
6717 }
6718 
6719 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6720                         MemOp mop, int mem_idx)
6721 {
6722     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6723     TCGv_i32 addr, tmp;
6724 
6725     /*
6726      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6727      * is either UNPREDICTABLE or has defined behaviour
6728      */
6729     if (s->thumb && a->rn == 15) {
6730         return false;
6731     }
6732 
6733     addr = op_addr_ri_pre(s, a);
6734 
6735     tmp = load_reg(s, a->rt);
6736     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6737     disas_set_da_iss(s, mop, issinfo);
6738 
6739     op_addr_ri_post(s, a, addr, 0);
6740     return true;
6741 }
6742 
6743 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6744 {
6745     int mem_idx = get_mem_index(s);
6746     TCGv_i32 addr, tmp;
6747 
6748     addr = op_addr_ri_pre(s, a);
6749 
6750     tmp = tcg_temp_new_i32();
6751     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6752     store_reg(s, a->rt, tmp);
6753 
6754     tcg_gen_addi_i32(addr, addr, 4);
6755 
6756     tmp = tcg_temp_new_i32();
6757     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6758     store_reg(s, rt2, tmp);
6759 
6760     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6761     op_addr_ri_post(s, a, addr, -4);
6762     return true;
6763 }
6764 
6765 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6766 {
6767     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6768         return false;
6769     }
6770     return op_ldrd_ri(s, a, a->rt + 1);
6771 }
6772 
6773 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6774 {
6775     arg_ldst_ri b = {
6776         .u = a->u, .w = a->w, .p = a->p,
6777         .rn = a->rn, .rt = a->rt, .imm = a->imm
6778     };
6779     return op_ldrd_ri(s, &b, a->rt2);
6780 }
6781 
6782 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6783 {
6784     int mem_idx = get_mem_index(s);
6785     TCGv_i32 addr, tmp;
6786 
6787     addr = op_addr_ri_pre(s, a);
6788 
6789     tmp = load_reg(s, a->rt);
6790     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6791 
6792     tcg_gen_addi_i32(addr, addr, 4);
6793 
6794     tmp = load_reg(s, rt2);
6795     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6796 
6797     op_addr_ri_post(s, a, addr, -4);
6798     return true;
6799 }
6800 
6801 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6802 {
6803     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6804         return false;
6805     }
6806     return op_strd_ri(s, a, a->rt + 1);
6807 }
6808 
6809 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6810 {
6811     arg_ldst_ri b = {
6812         .u = a->u, .w = a->w, .p = a->p,
6813         .rn = a->rn, .rt = a->rt, .imm = a->imm
6814     };
6815     return op_strd_ri(s, &b, a->rt2);
6816 }
6817 
6818 #define DO_LDST(NAME, WHICH, MEMOP) \
6819 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6820 {                                                                     \
6821     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6822 }                                                                     \
6823 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6824 {                                                                     \
6825     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6826 }                                                                     \
6827 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6828 {                                                                     \
6829     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6830 }                                                                     \
6831 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6832 {                                                                     \
6833     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6834 }
6835 
6836 DO_LDST(LDR, load, MO_UL)
6837 DO_LDST(LDRB, load, MO_UB)
6838 DO_LDST(LDRH, load, MO_UW)
6839 DO_LDST(LDRSB, load, MO_SB)
6840 DO_LDST(LDRSH, load, MO_SW)
6841 
6842 DO_LDST(STR, store, MO_UL)
6843 DO_LDST(STRB, store, MO_UB)
6844 DO_LDST(STRH, store, MO_UW)
6845 
6846 #undef DO_LDST
6847 
6848 /*
6849  * Synchronization primitives
6850  */
6851 
6852 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6853 {
6854     TCGv_i32 addr, tmp;
6855     TCGv taddr;
6856 
6857     opc |= s->be_data;
6858     addr = load_reg(s, a->rn);
6859     taddr = gen_aa32_addr(s, addr, opc);
6860 
6861     tmp = load_reg(s, a->rt2);
6862     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6863 
6864     store_reg(s, a->rt, tmp);
6865     return true;
6866 }
6867 
6868 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6869 {
6870     return op_swp(s, a, MO_UL | MO_ALIGN);
6871 }
6872 
6873 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6874 {
6875     return op_swp(s, a, MO_UB);
6876 }
6877 
6878 /*
6879  * Load/Store Exclusive and Load-Acquire/Store-Release
6880  */
6881 
6882 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6883 {
6884     TCGv_i32 addr;
6885     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6886     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6887 
6888     /* We UNDEF for these UNPREDICTABLE cases.  */
6889     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6890         || a->rd == a->rn || a->rd == a->rt
6891         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6892         || (mop == MO_64
6893             && (a->rt2 == 15
6894                 || a->rd == a->rt2
6895                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6896         unallocated_encoding(s);
6897         return true;
6898     }
6899 
6900     if (rel) {
6901         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6902     }
6903 
6904     addr = tcg_temp_new_i32();
6905     load_reg_var(s, addr, a->rn);
6906     tcg_gen_addi_i32(addr, addr, a->imm);
6907 
6908     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6909     return true;
6910 }
6911 
6912 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6913 {
6914     if (!ENABLE_ARCH_6) {
6915         return false;
6916     }
6917     return op_strex(s, a, MO_32, false);
6918 }
6919 
6920 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6921 {
6922     if (!ENABLE_ARCH_6K) {
6923         return false;
6924     }
6925     /* We UNDEF for these UNPREDICTABLE cases.  */
6926     if (a->rt & 1) {
6927         unallocated_encoding(s);
6928         return true;
6929     }
6930     a->rt2 = a->rt + 1;
6931     return op_strex(s, a, MO_64, false);
6932 }
6933 
6934 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6935 {
6936     return op_strex(s, a, MO_64, false);
6937 }
6938 
6939 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6940 {
6941     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6942         return false;
6943     }
6944     return op_strex(s, a, MO_8, false);
6945 }
6946 
6947 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6948 {
6949     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6950         return false;
6951     }
6952     return op_strex(s, a, MO_16, false);
6953 }
6954 
6955 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6956 {
6957     if (!ENABLE_ARCH_8) {
6958         return false;
6959     }
6960     return op_strex(s, a, MO_32, true);
6961 }
6962 
6963 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6964 {
6965     if (!ENABLE_ARCH_8) {
6966         return false;
6967     }
6968     /* We UNDEF for these UNPREDICTABLE cases.  */
6969     if (a->rt & 1) {
6970         unallocated_encoding(s);
6971         return true;
6972     }
6973     a->rt2 = a->rt + 1;
6974     return op_strex(s, a, MO_64, true);
6975 }
6976 
6977 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6978 {
6979     if (!ENABLE_ARCH_8) {
6980         return false;
6981     }
6982     return op_strex(s, a, MO_64, true);
6983 }
6984 
6985 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6986 {
6987     if (!ENABLE_ARCH_8) {
6988         return false;
6989     }
6990     return op_strex(s, a, MO_8, true);
6991 }
6992 
6993 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6994 {
6995     if (!ENABLE_ARCH_8) {
6996         return false;
6997     }
6998     return op_strex(s, a, MO_16, true);
6999 }
7000 
7001 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7002 {
7003     TCGv_i32 addr, tmp;
7004 
7005     if (!ENABLE_ARCH_8) {
7006         return false;
7007     }
7008     /* We UNDEF for these UNPREDICTABLE cases.  */
7009     if (a->rn == 15 || a->rt == 15) {
7010         unallocated_encoding(s);
7011         return true;
7012     }
7013 
7014     addr = load_reg(s, a->rn);
7015     tmp = load_reg(s, a->rt);
7016     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7017     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7018     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7019 
7020     return true;
7021 }
7022 
7023 static bool trans_STL(DisasContext *s, arg_STL *a)
7024 {
7025     return op_stl(s, a, MO_UL);
7026 }
7027 
7028 static bool trans_STLB(DisasContext *s, arg_STL *a)
7029 {
7030     return op_stl(s, a, MO_UB);
7031 }
7032 
7033 static bool trans_STLH(DisasContext *s, arg_STL *a)
7034 {
7035     return op_stl(s, a, MO_UW);
7036 }
7037 
7038 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7039 {
7040     TCGv_i32 addr;
7041     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7042     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7043 
7044     /* We UNDEF for these UNPREDICTABLE cases.  */
7045     if (a->rn == 15 || a->rt == 15
7046         || (!v8a && s->thumb && a->rt == 13)
7047         || (mop == MO_64
7048             && (a->rt2 == 15 || a->rt == a->rt2
7049                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7050         unallocated_encoding(s);
7051         return true;
7052     }
7053 
7054     addr = tcg_temp_new_i32();
7055     load_reg_var(s, addr, a->rn);
7056     tcg_gen_addi_i32(addr, addr, a->imm);
7057 
7058     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7059 
7060     if (acq) {
7061         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7062     }
7063     return true;
7064 }
7065 
7066 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7067 {
7068     if (!ENABLE_ARCH_6) {
7069         return false;
7070     }
7071     return op_ldrex(s, a, MO_32, false);
7072 }
7073 
7074 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7075 {
7076     if (!ENABLE_ARCH_6K) {
7077         return false;
7078     }
7079     /* We UNDEF for these UNPREDICTABLE cases.  */
7080     if (a->rt & 1) {
7081         unallocated_encoding(s);
7082         return true;
7083     }
7084     a->rt2 = a->rt + 1;
7085     return op_ldrex(s, a, MO_64, false);
7086 }
7087 
7088 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7089 {
7090     return op_ldrex(s, a, MO_64, false);
7091 }
7092 
7093 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7094 {
7095     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7096         return false;
7097     }
7098     return op_ldrex(s, a, MO_8, false);
7099 }
7100 
7101 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7102 {
7103     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7104         return false;
7105     }
7106     return op_ldrex(s, a, MO_16, false);
7107 }
7108 
7109 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7110 {
7111     if (!ENABLE_ARCH_8) {
7112         return false;
7113     }
7114     return op_ldrex(s, a, MO_32, true);
7115 }
7116 
7117 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7118 {
7119     if (!ENABLE_ARCH_8) {
7120         return false;
7121     }
7122     /* We UNDEF for these UNPREDICTABLE cases.  */
7123     if (a->rt & 1) {
7124         unallocated_encoding(s);
7125         return true;
7126     }
7127     a->rt2 = a->rt + 1;
7128     return op_ldrex(s, a, MO_64, true);
7129 }
7130 
7131 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7132 {
7133     if (!ENABLE_ARCH_8) {
7134         return false;
7135     }
7136     return op_ldrex(s, a, MO_64, true);
7137 }
7138 
7139 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7140 {
7141     if (!ENABLE_ARCH_8) {
7142         return false;
7143     }
7144     return op_ldrex(s, a, MO_8, true);
7145 }
7146 
7147 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7148 {
7149     if (!ENABLE_ARCH_8) {
7150         return false;
7151     }
7152     return op_ldrex(s, a, MO_16, true);
7153 }
7154 
7155 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7156 {
7157     TCGv_i32 addr, tmp;
7158 
7159     if (!ENABLE_ARCH_8) {
7160         return false;
7161     }
7162     /* We UNDEF for these UNPREDICTABLE cases.  */
7163     if (a->rn == 15 || a->rt == 15) {
7164         unallocated_encoding(s);
7165         return true;
7166     }
7167 
7168     addr = load_reg(s, a->rn);
7169     tmp = tcg_temp_new_i32();
7170     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7171     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7172 
7173     store_reg(s, a->rt, tmp);
7174     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7175     return true;
7176 }
7177 
7178 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7179 {
7180     return op_lda(s, a, MO_UL);
7181 }
7182 
7183 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7184 {
7185     return op_lda(s, a, MO_UB);
7186 }
7187 
7188 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7189 {
7190     return op_lda(s, a, MO_UW);
7191 }
7192 
7193 /*
7194  * Media instructions
7195  */
7196 
7197 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7198 {
7199     TCGv_i32 t1, t2;
7200 
7201     if (!ENABLE_ARCH_6) {
7202         return false;
7203     }
7204 
7205     t1 = load_reg(s, a->rn);
7206     t2 = load_reg(s, a->rm);
7207     gen_helper_usad8(t1, t1, t2);
7208     if (a->ra != 15) {
7209         t2 = load_reg(s, a->ra);
7210         tcg_gen_add_i32(t1, t1, t2);
7211     }
7212     store_reg(s, a->rd, t1);
7213     return true;
7214 }
7215 
7216 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7217 {
7218     TCGv_i32 tmp;
7219     int width = a->widthm1 + 1;
7220     int shift = a->lsb;
7221 
7222     if (!ENABLE_ARCH_6T2) {
7223         return false;
7224     }
7225     if (shift + width > 32) {
7226         /* UNPREDICTABLE; we choose to UNDEF */
7227         unallocated_encoding(s);
7228         return true;
7229     }
7230 
7231     tmp = load_reg(s, a->rn);
7232     if (u) {
7233         tcg_gen_extract_i32(tmp, tmp, shift, width);
7234     } else {
7235         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7236     }
7237     store_reg(s, a->rd, tmp);
7238     return true;
7239 }
7240 
7241 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7242 {
7243     return op_bfx(s, a, false);
7244 }
7245 
7246 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7247 {
7248     return op_bfx(s, a, true);
7249 }
7250 
7251 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7252 {
7253     int msb = a->msb, lsb = a->lsb;
7254     TCGv_i32 t_in, t_rd;
7255     int width;
7256 
7257     if (!ENABLE_ARCH_6T2) {
7258         return false;
7259     }
7260     if (msb < lsb) {
7261         /* UNPREDICTABLE; we choose to UNDEF */
7262         unallocated_encoding(s);
7263         return true;
7264     }
7265 
7266     width = msb + 1 - lsb;
7267     if (a->rn == 15) {
7268         /* BFC */
7269         t_in = tcg_constant_i32(0);
7270     } else {
7271         /* BFI */
7272         t_in = load_reg(s, a->rn);
7273     }
7274     t_rd = load_reg(s, a->rd);
7275     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7276     store_reg(s, a->rd, t_rd);
7277     return true;
7278 }
7279 
7280 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7281 {
7282     unallocated_encoding(s);
7283     return true;
7284 }
7285 
7286 /*
7287  * Parallel addition and subtraction
7288  */
7289 
7290 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7291                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7292 {
7293     TCGv_i32 t0, t1;
7294 
7295     if (s->thumb
7296         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7297         : !ENABLE_ARCH_6) {
7298         return false;
7299     }
7300 
7301     t0 = load_reg(s, a->rn);
7302     t1 = load_reg(s, a->rm);
7303 
7304     gen(t0, t0, t1);
7305 
7306     store_reg(s, a->rd, t0);
7307     return true;
7308 }
7309 
7310 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7311                              void (*gen)(TCGv_i32, TCGv_i32,
7312                                          TCGv_i32, TCGv_ptr))
7313 {
7314     TCGv_i32 t0, t1;
7315     TCGv_ptr ge;
7316 
7317     if (s->thumb
7318         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7319         : !ENABLE_ARCH_6) {
7320         return false;
7321     }
7322 
7323     t0 = load_reg(s, a->rn);
7324     t1 = load_reg(s, a->rm);
7325 
7326     ge = tcg_temp_new_ptr();
7327     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7328     gen(t0, t0, t1, ge);
7329 
7330     store_reg(s, a->rd, t0);
7331     return true;
7332 }
7333 
7334 #define DO_PAR_ADDSUB(NAME, helper) \
7335 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7336 {                                                       \
7337     return op_par_addsub(s, a, helper);                 \
7338 }
7339 
7340 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7341 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7342 {                                                       \
7343     return op_par_addsub_ge(s, a, helper);              \
7344 }
7345 
7346 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7347 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7348 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7349 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7350 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7351 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7352 
7353 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7354 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7355 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7356 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7357 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7358 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7359 
7360 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7361 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7362 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7363 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7364 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7365 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7366 
7367 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7368 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7369 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7370 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7371 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7372 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7373 
7374 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7375 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7376 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7377 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7378 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7379 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7380 
7381 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7382 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7383 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7384 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7385 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7386 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7387 
7388 #undef DO_PAR_ADDSUB
7389 #undef DO_PAR_ADDSUB_GE
7390 
7391 /*
7392  * Packing, unpacking, saturation, and reversal
7393  */
7394 
7395 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7396 {
7397     TCGv_i32 tn, tm;
7398     int shift = a->imm;
7399 
7400     if (s->thumb
7401         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7402         : !ENABLE_ARCH_6) {
7403         return false;
7404     }
7405 
7406     tn = load_reg(s, a->rn);
7407     tm = load_reg(s, a->rm);
7408     if (a->tb) {
7409         /* PKHTB */
7410         if (shift == 0) {
7411             shift = 31;
7412         }
7413         tcg_gen_sari_i32(tm, tm, shift);
7414         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7415     } else {
7416         /* PKHBT */
7417         tcg_gen_shli_i32(tm, tm, shift);
7418         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7419     }
7420     store_reg(s, a->rd, tn);
7421     return true;
7422 }
7423 
7424 static bool op_sat(DisasContext *s, arg_sat *a,
7425                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7426 {
7427     TCGv_i32 tmp;
7428     int shift = a->imm;
7429 
7430     if (!ENABLE_ARCH_6) {
7431         return false;
7432     }
7433 
7434     tmp = load_reg(s, a->rn);
7435     if (a->sh) {
7436         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7437     } else {
7438         tcg_gen_shli_i32(tmp, tmp, shift);
7439     }
7440 
7441     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7442 
7443     store_reg(s, a->rd, tmp);
7444     return true;
7445 }
7446 
7447 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7448 {
7449     return op_sat(s, a, gen_helper_ssat);
7450 }
7451 
7452 static bool trans_USAT(DisasContext *s, arg_sat *a)
7453 {
7454     return op_sat(s, a, gen_helper_usat);
7455 }
7456 
7457 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7458 {
7459     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7460         return false;
7461     }
7462     return op_sat(s, a, gen_helper_ssat16);
7463 }
7464 
7465 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7466 {
7467     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7468         return false;
7469     }
7470     return op_sat(s, a, gen_helper_usat16);
7471 }
7472 
7473 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7474                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7475                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7476 {
7477     TCGv_i32 tmp;
7478 
7479     if (!ENABLE_ARCH_6) {
7480         return false;
7481     }
7482 
7483     tmp = load_reg(s, a->rm);
7484     /*
7485      * TODO: In many cases we could do a shift instead of a rotate.
7486      * Combined with a simple extend, that becomes an extract.
7487      */
7488     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7489     gen_extract(tmp, tmp);
7490 
7491     if (a->rn != 15) {
7492         TCGv_i32 tmp2 = load_reg(s, a->rn);
7493         gen_add(tmp, tmp, tmp2);
7494     }
7495     store_reg(s, a->rd, tmp);
7496     return true;
7497 }
7498 
7499 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7500 {
7501     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7502 }
7503 
7504 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7505 {
7506     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7507 }
7508 
7509 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7510 {
7511     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7512         return false;
7513     }
7514     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7515 }
7516 
7517 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7518 {
7519     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7520 }
7521 
7522 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7523 {
7524     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7525 }
7526 
7527 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7528 {
7529     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7530         return false;
7531     }
7532     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7533 }
7534 
7535 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7536 {
7537     TCGv_i32 t1, t2, t3;
7538 
7539     if (s->thumb
7540         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7541         : !ENABLE_ARCH_6) {
7542         return false;
7543     }
7544 
7545     t1 = load_reg(s, a->rn);
7546     t2 = load_reg(s, a->rm);
7547     t3 = tcg_temp_new_i32();
7548     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7549     gen_helper_sel_flags(t1, t3, t1, t2);
7550     store_reg(s, a->rd, t1);
7551     return true;
7552 }
7553 
7554 static bool op_rr(DisasContext *s, arg_rr *a,
7555                   void (*gen)(TCGv_i32, TCGv_i32))
7556 {
7557     TCGv_i32 tmp;
7558 
7559     tmp = load_reg(s, a->rm);
7560     gen(tmp, tmp);
7561     store_reg(s, a->rd, tmp);
7562     return true;
7563 }
7564 
7565 static bool trans_REV(DisasContext *s, arg_rr *a)
7566 {
7567     if (!ENABLE_ARCH_6) {
7568         return false;
7569     }
7570     return op_rr(s, a, tcg_gen_bswap32_i32);
7571 }
7572 
7573 static bool trans_REV16(DisasContext *s, arg_rr *a)
7574 {
7575     if (!ENABLE_ARCH_6) {
7576         return false;
7577     }
7578     return op_rr(s, a, gen_rev16);
7579 }
7580 
7581 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7582 {
7583     if (!ENABLE_ARCH_6) {
7584         return false;
7585     }
7586     return op_rr(s, a, gen_revsh);
7587 }
7588 
7589 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7590 {
7591     if (!ENABLE_ARCH_6T2) {
7592         return false;
7593     }
7594     return op_rr(s, a, gen_helper_rbit);
7595 }
7596 
7597 /*
7598  * Signed multiply, signed and unsigned divide
7599  */
7600 
7601 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7602 {
7603     TCGv_i32 t1, t2;
7604 
7605     if (!ENABLE_ARCH_6) {
7606         return false;
7607     }
7608 
7609     t1 = load_reg(s, a->rn);
7610     t2 = load_reg(s, a->rm);
7611     if (m_swap) {
7612         gen_swap_half(t2, t2);
7613     }
7614     gen_smul_dual(t1, t2);
7615 
7616     if (sub) {
7617         /*
7618          * This subtraction cannot overflow, so we can do a simple
7619          * 32-bit subtraction and then a possible 32-bit saturating
7620          * addition of Ra.
7621          */
7622         tcg_gen_sub_i32(t1, t1, t2);
7623 
7624         if (a->ra != 15) {
7625             t2 = load_reg(s, a->ra);
7626             gen_helper_add_setq(t1, cpu_env, t1, t2);
7627         }
7628     } else if (a->ra == 15) {
7629         /* Single saturation-checking addition */
7630         gen_helper_add_setq(t1, cpu_env, t1, t2);
7631     } else {
7632         /*
7633          * We need to add the products and Ra together and then
7634          * determine whether the final result overflowed. Doing
7635          * this as two separate add-and-check-overflow steps incorrectly
7636          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7637          * Do all the arithmetic at 64-bits and then check for overflow.
7638          */
7639         TCGv_i64 p64, q64;
7640         TCGv_i32 t3, qf, one;
7641 
7642         p64 = tcg_temp_new_i64();
7643         q64 = tcg_temp_new_i64();
7644         tcg_gen_ext_i32_i64(p64, t1);
7645         tcg_gen_ext_i32_i64(q64, t2);
7646         tcg_gen_add_i64(p64, p64, q64);
7647         load_reg_var(s, t2, a->ra);
7648         tcg_gen_ext_i32_i64(q64, t2);
7649         tcg_gen_add_i64(p64, p64, q64);
7650 
7651         tcg_gen_extr_i64_i32(t1, t2, p64);
7652         /*
7653          * t1 is the low half of the result which goes into Rd.
7654          * We have overflow and must set Q if the high half (t2)
7655          * is different from the sign-extension of t1.
7656          */
7657         t3 = tcg_temp_new_i32();
7658         tcg_gen_sari_i32(t3, t1, 31);
7659         qf = load_cpu_field(QF);
7660         one = tcg_constant_i32(1);
7661         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7662         store_cpu_field(qf, QF);
7663     }
7664     store_reg(s, a->rd, t1);
7665     return true;
7666 }
7667 
7668 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7669 {
7670     return op_smlad(s, a, false, false);
7671 }
7672 
7673 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7674 {
7675     return op_smlad(s, a, true, false);
7676 }
7677 
7678 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7679 {
7680     return op_smlad(s, a, false, true);
7681 }
7682 
7683 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7684 {
7685     return op_smlad(s, a, true, true);
7686 }
7687 
7688 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7689 {
7690     TCGv_i32 t1, t2;
7691     TCGv_i64 l1, l2;
7692 
7693     if (!ENABLE_ARCH_6) {
7694         return false;
7695     }
7696 
7697     t1 = load_reg(s, a->rn);
7698     t2 = load_reg(s, a->rm);
7699     if (m_swap) {
7700         gen_swap_half(t2, t2);
7701     }
7702     gen_smul_dual(t1, t2);
7703 
7704     l1 = tcg_temp_new_i64();
7705     l2 = tcg_temp_new_i64();
7706     tcg_gen_ext_i32_i64(l1, t1);
7707     tcg_gen_ext_i32_i64(l2, t2);
7708 
7709     if (sub) {
7710         tcg_gen_sub_i64(l1, l1, l2);
7711     } else {
7712         tcg_gen_add_i64(l1, l1, l2);
7713     }
7714 
7715     gen_addq(s, l1, a->ra, a->rd);
7716     gen_storeq_reg(s, a->ra, a->rd, l1);
7717     return true;
7718 }
7719 
7720 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7721 {
7722     return op_smlald(s, a, false, false);
7723 }
7724 
7725 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7726 {
7727     return op_smlald(s, a, true, false);
7728 }
7729 
7730 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7731 {
7732     return op_smlald(s, a, false, true);
7733 }
7734 
7735 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7736 {
7737     return op_smlald(s, a, true, true);
7738 }
7739 
7740 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7741 {
7742     TCGv_i32 t1, t2;
7743 
7744     if (s->thumb
7745         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7746         : !ENABLE_ARCH_6) {
7747         return false;
7748     }
7749 
7750     t1 = load_reg(s, a->rn);
7751     t2 = load_reg(s, a->rm);
7752     tcg_gen_muls2_i32(t2, t1, t1, t2);
7753 
7754     if (a->ra != 15) {
7755         TCGv_i32 t3 = load_reg(s, a->ra);
7756         if (sub) {
7757             /*
7758              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7759              * a non-zero multiplicand lowpart, and the correct result
7760              * lowpart for rounding.
7761              */
7762             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7763         } else {
7764             tcg_gen_add_i32(t1, t1, t3);
7765         }
7766     }
7767     if (round) {
7768         /*
7769          * Adding 0x80000000 to the 64-bit quantity means that we have
7770          * carry in to the high word when the low word has the msb set.
7771          */
7772         tcg_gen_shri_i32(t2, t2, 31);
7773         tcg_gen_add_i32(t1, t1, t2);
7774     }
7775     store_reg(s, a->rd, t1);
7776     return true;
7777 }
7778 
7779 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7780 {
7781     return op_smmla(s, a, false, false);
7782 }
7783 
7784 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7785 {
7786     return op_smmla(s, a, true, false);
7787 }
7788 
7789 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7790 {
7791     return op_smmla(s, a, false, true);
7792 }
7793 
7794 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7795 {
7796     return op_smmla(s, a, true, true);
7797 }
7798 
7799 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7800 {
7801     TCGv_i32 t1, t2;
7802 
7803     if (s->thumb
7804         ? !dc_isar_feature(aa32_thumb_div, s)
7805         : !dc_isar_feature(aa32_arm_div, s)) {
7806         return false;
7807     }
7808 
7809     t1 = load_reg(s, a->rn);
7810     t2 = load_reg(s, a->rm);
7811     if (u) {
7812         gen_helper_udiv(t1, cpu_env, t1, t2);
7813     } else {
7814         gen_helper_sdiv(t1, cpu_env, t1, t2);
7815     }
7816     store_reg(s, a->rd, t1);
7817     return true;
7818 }
7819 
7820 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7821 {
7822     return op_div(s, a, false);
7823 }
7824 
7825 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7826 {
7827     return op_div(s, a, true);
7828 }
7829 
7830 /*
7831  * Block data transfer
7832  */
7833 
7834 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7835 {
7836     TCGv_i32 addr = load_reg(s, a->rn);
7837 
7838     if (a->b) {
7839         if (a->i) {
7840             /* pre increment */
7841             tcg_gen_addi_i32(addr, addr, 4);
7842         } else {
7843             /* pre decrement */
7844             tcg_gen_addi_i32(addr, addr, -(n * 4));
7845         }
7846     } else if (!a->i && n != 1) {
7847         /* post decrement */
7848         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7849     }
7850 
7851     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7852         /*
7853          * If the writeback is incrementing SP rather than
7854          * decrementing it, and the initial SP is below the
7855          * stack limit but the final written-back SP would
7856          * be above, then we must not perform any memory
7857          * accesses, but it is IMPDEF whether we generate
7858          * an exception. We choose to do so in this case.
7859          * At this point 'addr' is the lowest address, so
7860          * either the original SP (if incrementing) or our
7861          * final SP (if decrementing), so that's what we check.
7862          */
7863         gen_helper_v8m_stackcheck(cpu_env, addr);
7864     }
7865 
7866     return addr;
7867 }
7868 
7869 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7870                                TCGv_i32 addr, int n)
7871 {
7872     if (a->w) {
7873         /* write back */
7874         if (!a->b) {
7875             if (a->i) {
7876                 /* post increment */
7877                 tcg_gen_addi_i32(addr, addr, 4);
7878             } else {
7879                 /* post decrement */
7880                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7881             }
7882         } else if (!a->i && n != 1) {
7883             /* pre decrement */
7884             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7885         }
7886         store_reg(s, a->rn, addr);
7887     }
7888 }
7889 
7890 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7891 {
7892     int i, j, n, list, mem_idx;
7893     bool user = a->u;
7894     TCGv_i32 addr, tmp;
7895 
7896     if (user) {
7897         /* STM (user) */
7898         if (IS_USER(s)) {
7899             /* Only usable in supervisor mode.  */
7900             unallocated_encoding(s);
7901             return true;
7902         }
7903     }
7904 
7905     list = a->list;
7906     n = ctpop16(list);
7907     if (n < min_n || a->rn == 15) {
7908         unallocated_encoding(s);
7909         return true;
7910     }
7911 
7912     s->eci_handled = true;
7913 
7914     addr = op_addr_block_pre(s, a, n);
7915     mem_idx = get_mem_index(s);
7916 
7917     for (i = j = 0; i < 16; i++) {
7918         if (!(list & (1 << i))) {
7919             continue;
7920         }
7921 
7922         if (user && i != 15) {
7923             tmp = tcg_temp_new_i32();
7924             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7925         } else {
7926             tmp = load_reg(s, i);
7927         }
7928         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7929 
7930         /* No need to add after the last transfer.  */
7931         if (++j != n) {
7932             tcg_gen_addi_i32(addr, addr, 4);
7933         }
7934     }
7935 
7936     op_addr_block_post(s, a, addr, n);
7937     clear_eci_state(s);
7938     return true;
7939 }
7940 
7941 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7942 {
7943     /* BitCount(list) < 1 is UNPREDICTABLE */
7944     return op_stm(s, a, 1);
7945 }
7946 
7947 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7948 {
7949     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7950     if (a->w && (a->list & (1 << a->rn))) {
7951         unallocated_encoding(s);
7952         return true;
7953     }
7954     /* BitCount(list) < 2 is UNPREDICTABLE */
7955     return op_stm(s, a, 2);
7956 }
7957 
7958 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7959 {
7960     int i, j, n, list, mem_idx;
7961     bool loaded_base;
7962     bool user = a->u;
7963     bool exc_return = false;
7964     TCGv_i32 addr, tmp, loaded_var;
7965 
7966     if (user) {
7967         /* LDM (user), LDM (exception return) */
7968         if (IS_USER(s)) {
7969             /* Only usable in supervisor mode.  */
7970             unallocated_encoding(s);
7971             return true;
7972         }
7973         if (extract32(a->list, 15, 1)) {
7974             exc_return = true;
7975             user = false;
7976         } else {
7977             /* LDM (user) does not allow writeback.  */
7978             if (a->w) {
7979                 unallocated_encoding(s);
7980                 return true;
7981             }
7982         }
7983     }
7984 
7985     list = a->list;
7986     n = ctpop16(list);
7987     if (n < min_n || a->rn == 15) {
7988         unallocated_encoding(s);
7989         return true;
7990     }
7991 
7992     s->eci_handled = true;
7993 
7994     addr = op_addr_block_pre(s, a, n);
7995     mem_idx = get_mem_index(s);
7996     loaded_base = false;
7997     loaded_var = NULL;
7998 
7999     for (i = j = 0; i < 16; i++) {
8000         if (!(list & (1 << i))) {
8001             continue;
8002         }
8003 
8004         tmp = tcg_temp_new_i32();
8005         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8006         if (user) {
8007             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8008         } else if (i == a->rn) {
8009             loaded_var = tmp;
8010             loaded_base = true;
8011         } else if (i == 15 && exc_return) {
8012             store_pc_exc_ret(s, tmp);
8013         } else {
8014             store_reg_from_load(s, i, tmp);
8015         }
8016 
8017         /* No need to add after the last transfer.  */
8018         if (++j != n) {
8019             tcg_gen_addi_i32(addr, addr, 4);
8020         }
8021     }
8022 
8023     op_addr_block_post(s, a, addr, n);
8024 
8025     if (loaded_base) {
8026         /* Note that we reject base == pc above.  */
8027         store_reg(s, a->rn, loaded_var);
8028     }
8029 
8030     if (exc_return) {
8031         /* Restore CPSR from SPSR.  */
8032         tmp = load_cpu_field(spsr);
8033         translator_io_start(&s->base);
8034         gen_helper_cpsr_write_eret(cpu_env, tmp);
8035         /* Must exit loop to check un-masked IRQs */
8036         s->base.is_jmp = DISAS_EXIT;
8037     }
8038     clear_eci_state(s);
8039     return true;
8040 }
8041 
8042 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8043 {
8044     /*
8045      * Writeback register in register list is UNPREDICTABLE
8046      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8047      * an UNKNOWN value to the base register.
8048      */
8049     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8050         unallocated_encoding(s);
8051         return true;
8052     }
8053     /* BitCount(list) < 1 is UNPREDICTABLE */
8054     return do_ldm(s, a, 1);
8055 }
8056 
8057 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8058 {
8059     /* Writeback register in register list is UNPREDICTABLE for T32. */
8060     if (a->w && (a->list & (1 << a->rn))) {
8061         unallocated_encoding(s);
8062         return true;
8063     }
8064     /* BitCount(list) < 2 is UNPREDICTABLE */
8065     return do_ldm(s, a, 2);
8066 }
8067 
8068 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8069 {
8070     /* Writeback is conditional on the base register not being loaded.  */
8071     a->w = !(a->list & (1 << a->rn));
8072     /* BitCount(list) < 1 is UNPREDICTABLE */
8073     return do_ldm(s, a, 1);
8074 }
8075 
8076 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8077 {
8078     int i;
8079     TCGv_i32 zero;
8080 
8081     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8082         return false;
8083     }
8084 
8085     if (extract32(a->list, 13, 1)) {
8086         return false;
8087     }
8088 
8089     if (!a->list) {
8090         /* UNPREDICTABLE; we choose to UNDEF */
8091         return false;
8092     }
8093 
8094     s->eci_handled = true;
8095 
8096     zero = tcg_constant_i32(0);
8097     for (i = 0; i < 15; i++) {
8098         if (extract32(a->list, i, 1)) {
8099             /* Clear R[i] */
8100             tcg_gen_mov_i32(cpu_R[i], zero);
8101         }
8102     }
8103     if (extract32(a->list, 15, 1)) {
8104         /*
8105          * Clear APSR (by calling the MSR helper with the same argument
8106          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8107          */
8108         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8109     }
8110     clear_eci_state(s);
8111     return true;
8112 }
8113 
8114 /*
8115  * Branch, branch with link
8116  */
8117 
8118 static bool trans_B(DisasContext *s, arg_i *a)
8119 {
8120     gen_jmp(s, jmp_diff(s, a->imm));
8121     return true;
8122 }
8123 
8124 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8125 {
8126     /* This has cond from encoding, required to be outside IT block.  */
8127     if (a->cond >= 0xe) {
8128         return false;
8129     }
8130     if (s->condexec_mask) {
8131         unallocated_encoding(s);
8132         return true;
8133     }
8134     arm_skip_unless(s, a->cond);
8135     gen_jmp(s, jmp_diff(s, a->imm));
8136     return true;
8137 }
8138 
8139 static bool trans_BL(DisasContext *s, arg_i *a)
8140 {
8141     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8142     gen_jmp(s, jmp_diff(s, a->imm));
8143     return true;
8144 }
8145 
8146 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8147 {
8148     /*
8149      * BLX <imm> would be useless on M-profile; the encoding space
8150      * is used for other insns from v8.1M onward, and UNDEFs before that.
8151      */
8152     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8153         return false;
8154     }
8155 
8156     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8157     if (s->thumb && (a->imm & 2)) {
8158         return false;
8159     }
8160     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8161     store_cpu_field_constant(!s->thumb, thumb);
8162     /* This jump is computed from an aligned PC: subtract off the low bits. */
8163     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8164     return true;
8165 }
8166 
8167 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8168 {
8169     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8170     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8171     return true;
8172 }
8173 
8174 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8175 {
8176     TCGv_i32 tmp = tcg_temp_new_i32();
8177 
8178     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8179     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8180     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8181     gen_bx(s, tmp);
8182     return true;
8183 }
8184 
8185 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8186 {
8187     TCGv_i32 tmp;
8188 
8189     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8190     if (!ENABLE_ARCH_5) {
8191         return false;
8192     }
8193     tmp = tcg_temp_new_i32();
8194     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8195     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8196     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8197     gen_bx(s, tmp);
8198     return true;
8199 }
8200 
8201 static bool trans_BF(DisasContext *s, arg_BF *a)
8202 {
8203     /*
8204      * M-profile branch future insns. The architecture permits an
8205      * implementation to implement these as NOPs (equivalent to
8206      * discarding the LO_BRANCH_INFO cache immediately), and we
8207      * take that IMPDEF option because for QEMU a "real" implementation
8208      * would be complicated and wouldn't execute any faster.
8209      */
8210     if (!dc_isar_feature(aa32_lob, s)) {
8211         return false;
8212     }
8213     if (a->boff == 0) {
8214         /* SEE "Related encodings" (loop insns) */
8215         return false;
8216     }
8217     /* Handle as NOP */
8218     return true;
8219 }
8220 
8221 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8222 {
8223     /* M-profile low-overhead loop start */
8224     TCGv_i32 tmp;
8225 
8226     if (!dc_isar_feature(aa32_lob, s)) {
8227         return false;
8228     }
8229     if (a->rn == 13 || a->rn == 15) {
8230         /*
8231          * For DLSTP rn == 15 is a related encoding (LCTP); the
8232          * other cases caught by this condition are all
8233          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8234          */
8235         return false;
8236     }
8237 
8238     if (a->size != 4) {
8239         /* DLSTP */
8240         if (!dc_isar_feature(aa32_mve, s)) {
8241             return false;
8242         }
8243         if (!vfp_access_check(s)) {
8244             return true;
8245         }
8246     }
8247 
8248     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8249     tmp = load_reg(s, a->rn);
8250     store_reg(s, 14, tmp);
8251     if (a->size != 4) {
8252         /* DLSTP: set FPSCR.LTPSIZE */
8253         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8254         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8255     }
8256     return true;
8257 }
8258 
8259 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8260 {
8261     /* M-profile low-overhead while-loop start */
8262     TCGv_i32 tmp;
8263     DisasLabel nextlabel;
8264 
8265     if (!dc_isar_feature(aa32_lob, s)) {
8266         return false;
8267     }
8268     if (a->rn == 13 || a->rn == 15) {
8269         /*
8270          * For WLSTP rn == 15 is a related encoding (LE); the
8271          * other cases caught by this condition are all
8272          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8273          */
8274         return false;
8275     }
8276     if (s->condexec_mask) {
8277         /*
8278          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8279          * we choose to UNDEF, because otherwise our use of
8280          * gen_goto_tb(1) would clash with the use of TB exit 1
8281          * in the dc->condjmp condition-failed codepath in
8282          * arm_tr_tb_stop() and we'd get an assertion.
8283          */
8284         return false;
8285     }
8286     if (a->size != 4) {
8287         /* WLSTP */
8288         if (!dc_isar_feature(aa32_mve, s)) {
8289             return false;
8290         }
8291         /*
8292          * We need to check that the FPU is enabled here, but mustn't
8293          * call vfp_access_check() to do that because we don't want to
8294          * do the lazy state preservation in the "loop count is zero" case.
8295          * Do the check-and-raise-exception by hand.
8296          */
8297         if (s->fp_excp_el) {
8298             gen_exception_insn_el(s, 0, EXCP_NOCP,
8299                                   syn_uncategorized(), s->fp_excp_el);
8300             return true;
8301         }
8302     }
8303 
8304     nextlabel = gen_disas_label(s);
8305     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8306     tmp = load_reg(s, a->rn);
8307     store_reg(s, 14, tmp);
8308     if (a->size != 4) {
8309         /*
8310          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8311          * lazy state preservation, new FP context creation, etc,
8312          * that vfp_access_check() does. We know that the actual
8313          * access check will succeed (ie it won't generate code that
8314          * throws an exception) because we did that check by hand earlier.
8315          */
8316         bool ok = vfp_access_check(s);
8317         assert(ok);
8318         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8319         /*
8320          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8321          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8322          */
8323     }
8324     gen_jmp_tb(s, curr_insn_len(s), 1);
8325 
8326     set_disas_label(s, nextlabel);
8327     gen_jmp(s, jmp_diff(s, a->imm));
8328     return true;
8329 }
8330 
8331 static bool trans_LE(DisasContext *s, arg_LE *a)
8332 {
8333     /*
8334      * M-profile low-overhead loop end. The architecture permits an
8335      * implementation to discard the LO_BRANCH_INFO cache at any time,
8336      * and we take the IMPDEF option to never set it in the first place
8337      * (equivalent to always discarding it immediately), because for QEMU
8338      * a "real" implementation would be complicated and wouldn't execute
8339      * any faster.
8340      */
8341     TCGv_i32 tmp;
8342     DisasLabel loopend;
8343     bool fpu_active;
8344 
8345     if (!dc_isar_feature(aa32_lob, s)) {
8346         return false;
8347     }
8348     if (a->f && a->tp) {
8349         return false;
8350     }
8351     if (s->condexec_mask) {
8352         /*
8353          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8354          * we choose to UNDEF, because otherwise our use of
8355          * gen_goto_tb(1) would clash with the use of TB exit 1
8356          * in the dc->condjmp condition-failed codepath in
8357          * arm_tr_tb_stop() and we'd get an assertion.
8358          */
8359         return false;
8360     }
8361     if (a->tp) {
8362         /* LETP */
8363         if (!dc_isar_feature(aa32_mve, s)) {
8364             return false;
8365         }
8366         if (!vfp_access_check(s)) {
8367             s->eci_handled = true;
8368             return true;
8369         }
8370     }
8371 
8372     /* LE/LETP is OK with ECI set and leaves it untouched */
8373     s->eci_handled = true;
8374 
8375     /*
8376      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8377      * UsageFault exception for the LE insn in that case. Note that we
8378      * are not directly checking FPSCR.LTPSIZE but instead check the
8379      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8380      * not currently active (ie ActiveFPState() returns false). We
8381      * can identify not-active purely from our TB state flags, as the
8382      * FPU is active only if:
8383      *  the FPU is enabled
8384      *  AND lazy state preservation is not active
8385      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8386      *
8387      * Usually we don't need to care about this distinction between
8388      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8389      * will either take an exception or clear the conditions that make
8390      * the FPU not active. But LE is an unusual case of a non-FP insn
8391      * that looks at LTPSIZE.
8392      */
8393     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8394 
8395     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8396         /* Need to do a runtime check for LTPSIZE != 4 */
8397         DisasLabel skipexc = gen_disas_label(s);
8398         tmp = load_cpu_field(v7m.ltpsize);
8399         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8400         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8401         set_disas_label(s, skipexc);
8402     }
8403 
8404     if (a->f) {
8405         /* Loop-forever: just jump back to the loop start */
8406         gen_jmp(s, jmp_diff(s, -a->imm));
8407         return true;
8408     }
8409 
8410     /*
8411      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8412      * For LE, we know at this point that LTPSIZE must be 4 and the
8413      * loop decrement value is 1. For LETP we need to calculate the decrement
8414      * value from LTPSIZE.
8415      */
8416     loopend = gen_disas_label(s);
8417     if (!a->tp) {
8418         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8419         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8420     } else {
8421         /*
8422          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8423          * so that decr stays live after the brcondi.
8424          */
8425         TCGv_i32 decr = tcg_temp_new_i32();
8426         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8427         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8428         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8429 
8430         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8431 
8432         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8433     }
8434     /* Jump back to the loop start */
8435     gen_jmp(s, jmp_diff(s, -a->imm));
8436 
8437     set_disas_label(s, loopend);
8438     if (a->tp) {
8439         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8440         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8441     }
8442     /* End TB, continuing to following insn */
8443     gen_jmp_tb(s, curr_insn_len(s), 1);
8444     return true;
8445 }
8446 
8447 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8448 {
8449     /*
8450      * M-profile Loop Clear with Tail Predication. Since our implementation
8451      * doesn't cache branch information, all we need to do is reset
8452      * FPSCR.LTPSIZE to 4.
8453      */
8454 
8455     if (!dc_isar_feature(aa32_lob, s) ||
8456         !dc_isar_feature(aa32_mve, s)) {
8457         return false;
8458     }
8459 
8460     if (!vfp_access_check(s)) {
8461         return true;
8462     }
8463 
8464     store_cpu_field_constant(4, v7m.ltpsize);
8465     return true;
8466 }
8467 
8468 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8469 {
8470     /*
8471      * M-profile Create Vector Tail Predicate. This insn is itself
8472      * predicated and is subject to beatwise execution.
8473      */
8474     TCGv_i32 rn_shifted, masklen;
8475 
8476     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8477         return false;
8478     }
8479 
8480     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8481         return true;
8482     }
8483 
8484     /*
8485      * We pre-calculate the mask length here to avoid having
8486      * to have multiple helpers specialized for size.
8487      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8488      */
8489     rn_shifted = tcg_temp_new_i32();
8490     masklen = load_reg(s, a->rn);
8491     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8492     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8493                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8494                         rn_shifted, tcg_constant_i32(16));
8495     gen_helper_mve_vctp(cpu_env, masklen);
8496     /* This insn updates predication bits */
8497     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8498     mve_update_eci(s);
8499     return true;
8500 }
8501 
8502 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8503 {
8504     TCGv_i32 addr, tmp;
8505 
8506     tmp = load_reg(s, a->rm);
8507     if (half) {
8508         tcg_gen_add_i32(tmp, tmp, tmp);
8509     }
8510     addr = load_reg(s, a->rn);
8511     tcg_gen_add_i32(addr, addr, tmp);
8512 
8513     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8514 
8515     tcg_gen_add_i32(tmp, tmp, tmp);
8516     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8517     tcg_gen_add_i32(tmp, tmp, addr);
8518     store_reg(s, 15, tmp);
8519     return true;
8520 }
8521 
8522 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8523 {
8524     return op_tbranch(s, a, false);
8525 }
8526 
8527 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8528 {
8529     return op_tbranch(s, a, true);
8530 }
8531 
8532 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8533 {
8534     TCGv_i32 tmp = load_reg(s, a->rn);
8535 
8536     arm_gen_condlabel(s);
8537     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8538                         tmp, 0, s->condlabel.label);
8539     gen_jmp(s, jmp_diff(s, a->imm));
8540     return true;
8541 }
8542 
8543 /*
8544  * Supervisor call - both T32 & A32 come here so we need to check
8545  * which mode we are in when checking for semihosting.
8546  */
8547 
8548 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8549 {
8550     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8551 
8552     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8553         semihosting_enabled(s->current_el == 0) &&
8554         (a->imm == semihost_imm)) {
8555         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8556     } else {
8557         if (s->fgt_svc) {
8558             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8559             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8560         } else {
8561             gen_update_pc(s, curr_insn_len(s));
8562             s->svc_imm = a->imm;
8563             s->base.is_jmp = DISAS_SWI;
8564         }
8565     }
8566     return true;
8567 }
8568 
8569 /*
8570  * Unconditional system instructions
8571  */
8572 
8573 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8574 {
8575     static const int8_t pre_offset[4] = {
8576         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8577     };
8578     static const int8_t post_offset[4] = {
8579         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8580     };
8581     TCGv_i32 addr, t1, t2;
8582 
8583     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8584         return false;
8585     }
8586     if (IS_USER(s)) {
8587         unallocated_encoding(s);
8588         return true;
8589     }
8590 
8591     addr = load_reg(s, a->rn);
8592     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8593 
8594     /* Load PC into tmp and CPSR into tmp2.  */
8595     t1 = tcg_temp_new_i32();
8596     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8597     tcg_gen_addi_i32(addr, addr, 4);
8598     t2 = tcg_temp_new_i32();
8599     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8600 
8601     if (a->w) {
8602         /* Base writeback.  */
8603         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8604         store_reg(s, a->rn, addr);
8605     }
8606     gen_rfe(s, t1, t2);
8607     return true;
8608 }
8609 
8610 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8611 {
8612     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8613         return false;
8614     }
8615     gen_srs(s, a->mode, a->pu, a->w);
8616     return true;
8617 }
8618 
8619 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8620 {
8621     uint32_t mask, val;
8622 
8623     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8624         return false;
8625     }
8626     if (IS_USER(s)) {
8627         /* Implemented as NOP in user mode.  */
8628         return true;
8629     }
8630     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8631 
8632     mask = val = 0;
8633     if (a->imod & 2) {
8634         if (a->A) {
8635             mask |= CPSR_A;
8636         }
8637         if (a->I) {
8638             mask |= CPSR_I;
8639         }
8640         if (a->F) {
8641             mask |= CPSR_F;
8642         }
8643         if (a->imod & 1) {
8644             val |= mask;
8645         }
8646     }
8647     if (a->M) {
8648         mask |= CPSR_M;
8649         val |= a->mode;
8650     }
8651     if (mask) {
8652         gen_set_psr_im(s, mask, 0, val);
8653     }
8654     return true;
8655 }
8656 
8657 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8658 {
8659     TCGv_i32 tmp, addr;
8660 
8661     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8662         return false;
8663     }
8664     if (IS_USER(s)) {
8665         /* Implemented as NOP in user mode.  */
8666         return true;
8667     }
8668 
8669     tmp = tcg_constant_i32(a->im);
8670     /* FAULTMASK */
8671     if (a->F) {
8672         addr = tcg_constant_i32(19);
8673         gen_helper_v7m_msr(cpu_env, addr, tmp);
8674     }
8675     /* PRIMASK */
8676     if (a->I) {
8677         addr = tcg_constant_i32(16);
8678         gen_helper_v7m_msr(cpu_env, addr, tmp);
8679     }
8680     gen_rebuild_hflags(s, false);
8681     gen_lookup_tb(s);
8682     return true;
8683 }
8684 
8685 /*
8686  * Clear-Exclusive, Barriers
8687  */
8688 
8689 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8690 {
8691     if (s->thumb
8692         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8693         : !ENABLE_ARCH_6K) {
8694         return false;
8695     }
8696     gen_clrex(s);
8697     return true;
8698 }
8699 
8700 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8701 {
8702     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8703         return false;
8704     }
8705     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8706     return true;
8707 }
8708 
8709 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8710 {
8711     return trans_DSB(s, NULL);
8712 }
8713 
8714 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8715 {
8716     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8717         return false;
8718     }
8719     /*
8720      * We need to break the TB after this insn to execute
8721      * self-modifying code correctly and also to take
8722      * any pending interrupts immediately.
8723      */
8724     s->base.is_jmp = DISAS_TOO_MANY;
8725     return true;
8726 }
8727 
8728 static bool trans_SB(DisasContext *s, arg_SB *a)
8729 {
8730     if (!dc_isar_feature(aa32_sb, s)) {
8731         return false;
8732     }
8733     /*
8734      * TODO: There is no speculation barrier opcode
8735      * for TCG; MB and end the TB instead.
8736      */
8737     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8738     s->base.is_jmp = DISAS_TOO_MANY;
8739     return true;
8740 }
8741 
8742 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8743 {
8744     if (!ENABLE_ARCH_6) {
8745         return false;
8746     }
8747     if (a->E != (s->be_data == MO_BE)) {
8748         gen_helper_setend(cpu_env);
8749         s->base.is_jmp = DISAS_UPDATE_EXIT;
8750     }
8751     return true;
8752 }
8753 
8754 /*
8755  * Preload instructions
8756  * All are nops, contingent on the appropriate arch level.
8757  */
8758 
8759 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8760 {
8761     return ENABLE_ARCH_5TE;
8762 }
8763 
8764 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8765 {
8766     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8767 }
8768 
8769 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8770 {
8771     return ENABLE_ARCH_7;
8772 }
8773 
8774 /*
8775  * If-then
8776  */
8777 
8778 static bool trans_IT(DisasContext *s, arg_IT *a)
8779 {
8780     int cond_mask = a->cond_mask;
8781 
8782     /*
8783      * No actual code generated for this insn, just setup state.
8784      *
8785      * Combinations of firstcond and mask which set up an 0b1111
8786      * condition are UNPREDICTABLE; we take the CONSTRAINED
8787      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8788      * i.e. both meaning "execute always".
8789      */
8790     s->condexec_cond = (cond_mask >> 4) & 0xe;
8791     s->condexec_mask = cond_mask & 0x1f;
8792     return true;
8793 }
8794 
8795 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8796 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8797 {
8798     TCGv_i32 rn, rm;
8799     DisasCompare c;
8800 
8801     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8802         return false;
8803     }
8804 
8805     if (a->rm == 13) {
8806         /* SEE "Related encodings" (MVE shifts) */
8807         return false;
8808     }
8809 
8810     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8811         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8812         return false;
8813     }
8814 
8815     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8816     rn = tcg_temp_new_i32();
8817     rm = tcg_temp_new_i32();
8818     if (a->rn == 15) {
8819         tcg_gen_movi_i32(rn, 0);
8820     } else {
8821         load_reg_var(s, rn, a->rn);
8822     }
8823     if (a->rm == 15) {
8824         tcg_gen_movi_i32(rm, 0);
8825     } else {
8826         load_reg_var(s, rm, a->rm);
8827     }
8828 
8829     switch (a->op) {
8830     case 0: /* CSEL */
8831         break;
8832     case 1: /* CSINC */
8833         tcg_gen_addi_i32(rm, rm, 1);
8834         break;
8835     case 2: /* CSINV */
8836         tcg_gen_not_i32(rm, rm);
8837         break;
8838     case 3: /* CSNEG */
8839         tcg_gen_neg_i32(rm, rm);
8840         break;
8841     default:
8842         g_assert_not_reached();
8843     }
8844 
8845     arm_test_cc(&c, a->fcond);
8846     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8847 
8848     store_reg(s, a->rd, rn);
8849     return true;
8850 }
8851 
8852 /*
8853  * Legacy decoder.
8854  */
8855 
8856 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8857 {
8858     unsigned int cond = insn >> 28;
8859 
8860     /* M variants do not implement ARM mode; this must raise the INVSTATE
8861      * UsageFault exception.
8862      */
8863     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8864         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8865         return;
8866     }
8867 
8868     if (s->pstate_il) {
8869         /*
8870          * Illegal execution state. This has priority over BTI
8871          * exceptions, but comes after instruction abort exceptions.
8872          */
8873         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8874         return;
8875     }
8876 
8877     if (cond == 0xf) {
8878         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8879          * choose to UNDEF. In ARMv5 and above the space is used
8880          * for miscellaneous unconditional instructions.
8881          */
8882         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8883             unallocated_encoding(s);
8884             return;
8885         }
8886 
8887         /* Unconditional instructions.  */
8888         /* TODO: Perhaps merge these into one decodetree output file.  */
8889         if (disas_a32_uncond(s, insn) ||
8890             disas_vfp_uncond(s, insn) ||
8891             disas_neon_dp(s, insn) ||
8892             disas_neon_ls(s, insn) ||
8893             disas_neon_shared(s, insn)) {
8894             return;
8895         }
8896         /* fall back to legacy decoder */
8897 
8898         if ((insn & 0x0e000f00) == 0x0c000100) {
8899             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8900                 /* iWMMXt register transfer.  */
8901                 if (extract32(s->c15_cpar, 1, 1)) {
8902                     if (!disas_iwmmxt_insn(s, insn)) {
8903                         return;
8904                     }
8905                 }
8906             }
8907         }
8908         goto illegal_op;
8909     }
8910     if (cond != 0xe) {
8911         /* if not always execute, we generate a conditional jump to
8912            next instruction */
8913         arm_skip_unless(s, cond);
8914     }
8915 
8916     /* TODO: Perhaps merge these into one decodetree output file.  */
8917     if (disas_a32(s, insn) ||
8918         disas_vfp(s, insn)) {
8919         return;
8920     }
8921     /* fall back to legacy decoder */
8922     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8923     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8924         if (((insn & 0x0c000e00) == 0x0c000000)
8925             && ((insn & 0x03000000) != 0x03000000)) {
8926             /* Coprocessor insn, coprocessor 0 or 1 */
8927             disas_xscale_insn(s, insn);
8928             return;
8929         }
8930     }
8931 
8932 illegal_op:
8933     unallocated_encoding(s);
8934 }
8935 
8936 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8937 {
8938     /*
8939      * Return true if this is a 16 bit instruction. We must be precise
8940      * about this (matching the decode).
8941      */
8942     if ((insn >> 11) < 0x1d) {
8943         /* Definitely a 16-bit instruction */
8944         return true;
8945     }
8946 
8947     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8948      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8949      * end up actually treating this as two 16-bit insns, though,
8950      * if it's half of a bl/blx pair that might span a page boundary.
8951      */
8952     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8953         arm_dc_feature(s, ARM_FEATURE_M)) {
8954         /* Thumb2 cores (including all M profile ones) always treat
8955          * 32-bit insns as 32-bit.
8956          */
8957         return false;
8958     }
8959 
8960     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8961         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8962          * is not on the next page; we merge this into a 32-bit
8963          * insn.
8964          */
8965         return false;
8966     }
8967     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8968      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8969      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8970      *  -- handle as single 16 bit insn
8971      */
8972     return true;
8973 }
8974 
8975 /* Translate a 32-bit thumb instruction. */
8976 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8977 {
8978     /*
8979      * ARMv6-M supports a limited subset of Thumb2 instructions.
8980      * Other Thumb1 architectures allow only 32-bit
8981      * combined BL/BLX prefix and suffix.
8982      */
8983     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8984         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8985         int i;
8986         bool found = false;
8987         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8988                                                0xf3b08040 /* dsb */,
8989                                                0xf3b08050 /* dmb */,
8990                                                0xf3b08060 /* isb */,
8991                                                0xf3e08000 /* mrs */,
8992                                                0xf000d000 /* bl */};
8993         static const uint32_t armv6m_mask[] = {0xffe0d000,
8994                                                0xfff0d0f0,
8995                                                0xfff0d0f0,
8996                                                0xfff0d0f0,
8997                                                0xffe0d000,
8998                                                0xf800d000};
8999 
9000         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9001             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9002                 found = true;
9003                 break;
9004             }
9005         }
9006         if (!found) {
9007             goto illegal_op;
9008         }
9009     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9010         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9011             unallocated_encoding(s);
9012             return;
9013         }
9014     }
9015 
9016     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9017         /*
9018          * NOCP takes precedence over any UNDEF for (almost) the
9019          * entire wide range of coprocessor-space encodings, so check
9020          * for it first before proceeding to actually decode eg VFP
9021          * insns. This decode also handles the few insns which are
9022          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9023          */
9024         if (disas_m_nocp(s, insn)) {
9025             return;
9026         }
9027     }
9028 
9029     if ((insn & 0xef000000) == 0xef000000) {
9030         /*
9031          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9032          * transform into
9033          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9034          */
9035         uint32_t a32_insn = (insn & 0xe2ffffff) |
9036             ((insn & (1 << 28)) >> 4) | (1 << 28);
9037 
9038         if (disas_neon_dp(s, a32_insn)) {
9039             return;
9040         }
9041     }
9042 
9043     if ((insn & 0xff100000) == 0xf9000000) {
9044         /*
9045          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9046          * transform into
9047          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9048          */
9049         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9050 
9051         if (disas_neon_ls(s, a32_insn)) {
9052             return;
9053         }
9054     }
9055 
9056     /*
9057      * TODO: Perhaps merge these into one decodetree output file.
9058      * Note disas_vfp is written for a32 with cond field in the
9059      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9060      */
9061     if (disas_t32(s, insn) ||
9062         disas_vfp_uncond(s, insn) ||
9063         disas_neon_shared(s, insn) ||
9064         disas_mve(s, insn) ||
9065         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9066         return;
9067     }
9068 
9069 illegal_op:
9070     unallocated_encoding(s);
9071 }
9072 
9073 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9074 {
9075     if (!disas_t16(s, insn)) {
9076         unallocated_encoding(s);
9077     }
9078 }
9079 
9080 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9081 {
9082     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9083      * (False positives are OK, false negatives are not.)
9084      * We know this is a Thumb insn, and our caller ensures we are
9085      * only called if dc->base.pc_next is less than 4 bytes from the page
9086      * boundary, so we cross the page if the first 16 bits indicate
9087      * that this is a 32 bit insn.
9088      */
9089     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9090 
9091     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9092 }
9093 
9094 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9095 {
9096     DisasContext *dc = container_of(dcbase, DisasContext, base);
9097     CPUARMState *env = cs->env_ptr;
9098     ARMCPU *cpu = env_archcpu(env);
9099     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9100     uint32_t condexec, core_mmu_idx;
9101 
9102     dc->isar = &cpu->isar;
9103     dc->condjmp = 0;
9104     dc->pc_save = dc->base.pc_first;
9105     dc->aarch64 = false;
9106     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9107     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9108     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9109     /*
9110      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9111      * is always the IT bits. On M-profile, some of the reserved encodings
9112      * of IT are used instead to indicate either ICI or ECI, which
9113      * indicate partial progress of a restartable insn that was interrupted
9114      * partway through by an exception:
9115      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9116      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9117      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9118      * insn, behave normally".
9119      */
9120     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9121     dc->eci_handled = false;
9122     if (condexec & 0xf) {
9123         dc->condexec_mask = (condexec & 0xf) << 1;
9124         dc->condexec_cond = condexec >> 4;
9125     } else {
9126         if (arm_feature(env, ARM_FEATURE_M)) {
9127             dc->eci = condexec >> 4;
9128         }
9129     }
9130 
9131     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9132     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9133     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9134 #if !defined(CONFIG_USER_ONLY)
9135     dc->user = (dc->current_el == 0);
9136 #endif
9137     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9138     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9139     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9140     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9141     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9142 
9143     if (arm_feature(env, ARM_FEATURE_M)) {
9144         dc->vfp_enabled = 1;
9145         dc->be_data = MO_TE;
9146         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9147         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9148         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9149         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9150         dc->v7m_new_fp_ctxt_needed =
9151             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9152         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9153         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9154     } else {
9155         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9156         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9157         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9158         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9159         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9160             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9161         } else {
9162             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9163             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9164         }
9165         dc->sme_trap_nonstreaming =
9166             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9167     }
9168     dc->lse2 = false; /* applies only to aarch64 */
9169     dc->cp_regs = cpu->cp_regs;
9170     dc->features = env->features;
9171 
9172     /* Single step state. The code-generation logic here is:
9173      *  SS_ACTIVE == 0:
9174      *   generate code with no special handling for single-stepping (except
9175      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9176      *   this happens anyway because those changes are all system register or
9177      *   PSTATE writes).
9178      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9179      *   emit code for one insn
9180      *   emit code to clear PSTATE.SS
9181      *   emit code to generate software step exception for completed step
9182      *   end TB (as usual for having generated an exception)
9183      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9184      *   emit code to generate a software step exception
9185      *   end the TB
9186      */
9187     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9188     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9189     dc->is_ldex = false;
9190 
9191     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9192 
9193     /* If architectural single step active, limit to 1.  */
9194     if (dc->ss_active) {
9195         dc->base.max_insns = 1;
9196     }
9197 
9198     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9199        to those left on the page.  */
9200     if (!dc->thumb) {
9201         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9202         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9203     }
9204 
9205     cpu_V0 = tcg_temp_new_i64();
9206     cpu_V1 = tcg_temp_new_i64();
9207     cpu_M0 = tcg_temp_new_i64();
9208 }
9209 
9210 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9211 {
9212     DisasContext *dc = container_of(dcbase, DisasContext, base);
9213 
9214     /* A note on handling of the condexec (IT) bits:
9215      *
9216      * We want to avoid the overhead of having to write the updated condexec
9217      * bits back to the CPUARMState for every instruction in an IT block. So:
9218      * (1) if the condexec bits are not already zero then we write
9219      * zero back into the CPUARMState now. This avoids complications trying
9220      * to do it at the end of the block. (For example if we don't do this
9221      * it's hard to identify whether we can safely skip writing condexec
9222      * at the end of the TB, which we definitely want to do for the case
9223      * where a TB doesn't do anything with the IT state at all.)
9224      * (2) if we are going to leave the TB then we call gen_set_condexec()
9225      * which will write the correct value into CPUARMState if zero is wrong.
9226      * This is done both for leaving the TB at the end, and for leaving
9227      * it because of an exception we know will happen, which is done in
9228      * gen_exception_insn(). The latter is necessary because we need to
9229      * leave the TB with the PC/IT state just prior to execution of the
9230      * instruction which caused the exception.
9231      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9232      * then the CPUARMState will be wrong and we need to reset it.
9233      * This is handled in the same way as restoration of the
9234      * PC in these situations; we save the value of the condexec bits
9235      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9236      * then uses this to restore them after an exception.
9237      *
9238      * Note that there are no instructions which can read the condexec
9239      * bits, and none which can write non-static values to them, so
9240      * we don't need to care about whether CPUARMState is correct in the
9241      * middle of a TB.
9242      */
9243 
9244     /* Reset the conditional execution bits immediately. This avoids
9245        complications trying to do it at the end of the block.  */
9246     if (dc->condexec_mask || dc->condexec_cond) {
9247         store_cpu_field_constant(0, condexec_bits);
9248     }
9249 }
9250 
9251 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9252 {
9253     DisasContext *dc = container_of(dcbase, DisasContext, base);
9254     /*
9255      * The ECI/ICI bits share PSR bits with the IT bits, so we
9256      * need to reconstitute the bits from the split-out DisasContext
9257      * fields here.
9258      */
9259     uint32_t condexec_bits;
9260     target_ulong pc_arg = dc->base.pc_next;
9261 
9262     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9263         pc_arg &= ~TARGET_PAGE_MASK;
9264     }
9265     if (dc->eci) {
9266         condexec_bits = dc->eci << 4;
9267     } else {
9268         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9269     }
9270     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9271     dc->insn_start = tcg_last_op();
9272 }
9273 
9274 static bool arm_check_kernelpage(DisasContext *dc)
9275 {
9276 #ifdef CONFIG_USER_ONLY
9277     /* Intercept jump to the magic kernel page.  */
9278     if (dc->base.pc_next >= 0xffff0000) {
9279         /* We always get here via a jump, so know we are not in a
9280            conditional execution block.  */
9281         gen_exception_internal(EXCP_KERNEL_TRAP);
9282         dc->base.is_jmp = DISAS_NORETURN;
9283         return true;
9284     }
9285 #endif
9286     return false;
9287 }
9288 
9289 static bool arm_check_ss_active(DisasContext *dc)
9290 {
9291     if (dc->ss_active && !dc->pstate_ss) {
9292         /* Singlestep state is Active-pending.
9293          * If we're in this state at the start of a TB then either
9294          *  a) we just took an exception to an EL which is being debugged
9295          *     and this is the first insn in the exception handler
9296          *  b) debug exceptions were masked and we just unmasked them
9297          *     without changing EL (eg by clearing PSTATE.D)
9298          * In either case we're going to take a swstep exception in the
9299          * "did not step an insn" case, and so the syndrome ISV and EX
9300          * bits should be zero.
9301          */
9302         assert(dc->base.num_insns == 1);
9303         gen_swstep_exception(dc, 0, 0);
9304         dc->base.is_jmp = DISAS_NORETURN;
9305         return true;
9306     }
9307 
9308     return false;
9309 }
9310 
9311 static void arm_post_translate_insn(DisasContext *dc)
9312 {
9313     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9314         if (dc->pc_save != dc->condlabel.pc_save) {
9315             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9316         }
9317         gen_set_label(dc->condlabel.label);
9318         dc->condjmp = 0;
9319     }
9320 }
9321 
9322 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9323 {
9324     DisasContext *dc = container_of(dcbase, DisasContext, base);
9325     CPUARMState *env = cpu->env_ptr;
9326     uint32_t pc = dc->base.pc_next;
9327     unsigned int insn;
9328 
9329     /* Singlestep exceptions have the highest priority. */
9330     if (arm_check_ss_active(dc)) {
9331         dc->base.pc_next = pc + 4;
9332         return;
9333     }
9334 
9335     if (pc & 3) {
9336         /*
9337          * PC alignment fault.  This has priority over the instruction abort
9338          * that we would receive from a translation fault via arm_ldl_code
9339          * (or the execution of the kernelpage entrypoint). This should only
9340          * be possible after an indirect branch, at the start of the TB.
9341          */
9342         assert(dc->base.num_insns == 1);
9343         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9344         dc->base.is_jmp = DISAS_NORETURN;
9345         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9346         return;
9347     }
9348 
9349     if (arm_check_kernelpage(dc)) {
9350         dc->base.pc_next = pc + 4;
9351         return;
9352     }
9353 
9354     dc->pc_curr = pc;
9355     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9356     dc->insn = insn;
9357     dc->base.pc_next = pc + 4;
9358     disas_arm_insn(dc, insn);
9359 
9360     arm_post_translate_insn(dc);
9361 
9362     /* ARM is a fixed-length ISA.  We performed the cross-page check
9363        in init_disas_context by adjusting max_insns.  */
9364 }
9365 
9366 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9367 {
9368     /* Return true if this Thumb insn is always unconditional,
9369      * even inside an IT block. This is true of only a very few
9370      * instructions: BKPT, HLT, and SG.
9371      *
9372      * A larger class of instructions are UNPREDICTABLE if used
9373      * inside an IT block; we do not need to detect those here, because
9374      * what we do by default (perform the cc check and update the IT
9375      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9376      * choice for those situations.
9377      *
9378      * insn is either a 16-bit or a 32-bit instruction; the two are
9379      * distinguishable because for the 16-bit case the top 16 bits
9380      * are zeroes, and that isn't a valid 32-bit encoding.
9381      */
9382     if ((insn & 0xffffff00) == 0xbe00) {
9383         /* BKPT */
9384         return true;
9385     }
9386 
9387     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9388         !arm_dc_feature(s, ARM_FEATURE_M)) {
9389         /* HLT: v8A only. This is unconditional even when it is going to
9390          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9391          * For v7 cores this was a plain old undefined encoding and so
9392          * honours its cc check. (We might be using the encoding as
9393          * a semihosting trap, but we don't change the cc check behaviour
9394          * on that account, because a debugger connected to a real v7A
9395          * core and emulating semihosting traps by catching the UNDEF
9396          * exception would also only see cases where the cc check passed.
9397          * No guest code should be trying to do a HLT semihosting trap
9398          * in an IT block anyway.
9399          */
9400         return true;
9401     }
9402 
9403     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9404         arm_dc_feature(s, ARM_FEATURE_M)) {
9405         /* SG: v8M only */
9406         return true;
9407     }
9408 
9409     return false;
9410 }
9411 
9412 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9413 {
9414     DisasContext *dc = container_of(dcbase, DisasContext, base);
9415     CPUARMState *env = cpu->env_ptr;
9416     uint32_t pc = dc->base.pc_next;
9417     uint32_t insn;
9418     bool is_16bit;
9419     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9420     TCGOp *insn_eci_rewind = NULL;
9421     target_ulong insn_eci_pc_save = -1;
9422 
9423     /* Misaligned thumb PC is architecturally impossible. */
9424     assert((dc->base.pc_next & 1) == 0);
9425 
9426     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9427         dc->base.pc_next = pc + 2;
9428         return;
9429     }
9430 
9431     dc->pc_curr = pc;
9432     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9433     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9434     pc += 2;
9435     if (!is_16bit) {
9436         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9437         insn = insn << 16 | insn2;
9438         pc += 2;
9439     }
9440     dc->base.pc_next = pc;
9441     dc->insn = insn;
9442 
9443     if (dc->pstate_il) {
9444         /*
9445          * Illegal execution state. This has priority over BTI
9446          * exceptions, but comes after instruction abort exceptions.
9447          */
9448         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9449         return;
9450     }
9451 
9452     if (dc->eci) {
9453         /*
9454          * For M-profile continuable instructions, ECI/ICI handling
9455          * falls into these cases:
9456          *  - interrupt-continuable instructions
9457          *     These are the various load/store multiple insns (both
9458          *     integer and fp). The ICI bits indicate the register
9459          *     where the load/store can resume. We make the IMPDEF
9460          *     choice to always do "instruction restart", ie ignore
9461          *     the ICI value and always execute the ldm/stm from the
9462          *     start. So all we need to do is zero PSR.ICI if the
9463          *     insn executes.
9464          *  - MVE instructions subject to beat-wise execution
9465          *     Here the ECI bits indicate which beats have already been
9466          *     executed, and we must honour this. Each insn of this
9467          *     type will handle it correctly. We will update PSR.ECI
9468          *     in the helper function for the insn (some ECI values
9469          *     mean that the following insn also has been partially
9470          *     executed).
9471          *  - Special cases which don't advance ECI
9472          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9473          *     bits untouched.
9474          *  - all other insns (the common case)
9475          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9476          *     We place a rewind-marker here. Insns in the previous
9477          *     three categories will set a flag in the DisasContext.
9478          *     If the flag isn't set after we call disas_thumb_insn()
9479          *     or disas_thumb2_insn() then we know we have a "some other
9480          *     insn" case. We will rewind to the marker (ie throwing away
9481          *     all the generated code) and instead emit "take exception".
9482          */
9483         insn_eci_rewind = tcg_last_op();
9484         insn_eci_pc_save = dc->pc_save;
9485     }
9486 
9487     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9488         uint32_t cond = dc->condexec_cond;
9489 
9490         /*
9491          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9492          * "always"; 0xf is not "never".
9493          */
9494         if (cond < 0x0e) {
9495             arm_skip_unless(dc, cond);
9496         }
9497     }
9498 
9499     if (is_16bit) {
9500         disas_thumb_insn(dc, insn);
9501     } else {
9502         disas_thumb2_insn(dc, insn);
9503     }
9504 
9505     /* Advance the Thumb condexec condition.  */
9506     if (dc->condexec_mask) {
9507         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9508                              ((dc->condexec_mask >> 4) & 1));
9509         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9510         if (dc->condexec_mask == 0) {
9511             dc->condexec_cond = 0;
9512         }
9513     }
9514 
9515     if (dc->eci && !dc->eci_handled) {
9516         /*
9517          * Insn wasn't valid for ECI/ICI at all: undo what we
9518          * just generated and instead emit an exception
9519          */
9520         tcg_remove_ops_after(insn_eci_rewind);
9521         dc->pc_save = insn_eci_pc_save;
9522         dc->condjmp = 0;
9523         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9524     }
9525 
9526     arm_post_translate_insn(dc);
9527 
9528     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9529      * will touch a new page.  This ensures that prefetch aborts occur at
9530      * the right place.
9531      *
9532      * We want to stop the TB if the next insn starts in a new page,
9533      * or if it spans between this page and the next. This means that
9534      * if we're looking at the last halfword in the page we need to
9535      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9536      * or a 32-bit Thumb insn (which won't).
9537      * This is to avoid generating a silly TB with a single 16-bit insn
9538      * in it at the end of this page (which would execute correctly
9539      * but isn't very efficient).
9540      */
9541     if (dc->base.is_jmp == DISAS_NEXT
9542         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9543             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9544                 && insn_crosses_page(env, dc)))) {
9545         dc->base.is_jmp = DISAS_TOO_MANY;
9546     }
9547 }
9548 
9549 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9550 {
9551     DisasContext *dc = container_of(dcbase, DisasContext, base);
9552 
9553     /* At this stage dc->condjmp will only be set when the skipped
9554        instruction was a conditional branch or trap, and the PC has
9555        already been written.  */
9556     gen_set_condexec(dc);
9557     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9558         /* Exception return branches need some special case code at the
9559          * end of the TB, which is complex enough that it has to
9560          * handle the single-step vs not and the condition-failed
9561          * insn codepath itself.
9562          */
9563         gen_bx_excret_final_code(dc);
9564     } else if (unlikely(dc->ss_active)) {
9565         /* Unconditional and "condition passed" instruction codepath. */
9566         switch (dc->base.is_jmp) {
9567         case DISAS_SWI:
9568             gen_ss_advance(dc);
9569             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9570             break;
9571         case DISAS_HVC:
9572             gen_ss_advance(dc);
9573             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9574             break;
9575         case DISAS_SMC:
9576             gen_ss_advance(dc);
9577             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9578             break;
9579         case DISAS_NEXT:
9580         case DISAS_TOO_MANY:
9581         case DISAS_UPDATE_EXIT:
9582         case DISAS_UPDATE_NOCHAIN:
9583             gen_update_pc(dc, curr_insn_len(dc));
9584             /* fall through */
9585         default:
9586             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9587             gen_singlestep_exception(dc);
9588             break;
9589         case DISAS_NORETURN:
9590             break;
9591         }
9592     } else {
9593         /* While branches must always occur at the end of an IT block,
9594            there are a few other things that can cause us to terminate
9595            the TB in the middle of an IT block:
9596             - Exception generating instructions (bkpt, swi, undefined).
9597             - Page boundaries.
9598             - Hardware watchpoints.
9599            Hardware breakpoints have already been handled and skip this code.
9600          */
9601         switch (dc->base.is_jmp) {
9602         case DISAS_NEXT:
9603         case DISAS_TOO_MANY:
9604             gen_goto_tb(dc, 1, curr_insn_len(dc));
9605             break;
9606         case DISAS_UPDATE_NOCHAIN:
9607             gen_update_pc(dc, curr_insn_len(dc));
9608             /* fall through */
9609         case DISAS_JUMP:
9610             gen_goto_ptr();
9611             break;
9612         case DISAS_UPDATE_EXIT:
9613             gen_update_pc(dc, curr_insn_len(dc));
9614             /* fall through */
9615         default:
9616             /* indicate that the hash table must be used to find the next TB */
9617             tcg_gen_exit_tb(NULL, 0);
9618             break;
9619         case DISAS_NORETURN:
9620             /* nothing more to generate */
9621             break;
9622         case DISAS_WFI:
9623             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9624             /*
9625              * The helper doesn't necessarily throw an exception, but we
9626              * must go back to the main loop to check for interrupts anyway.
9627              */
9628             tcg_gen_exit_tb(NULL, 0);
9629             break;
9630         case DISAS_WFE:
9631             gen_helper_wfe(cpu_env);
9632             break;
9633         case DISAS_YIELD:
9634             gen_helper_yield(cpu_env);
9635             break;
9636         case DISAS_SWI:
9637             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9638             break;
9639         case DISAS_HVC:
9640             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9641             break;
9642         case DISAS_SMC:
9643             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9644             break;
9645         }
9646     }
9647 
9648     if (dc->condjmp) {
9649         /* "Condition failed" instruction codepath for the branch/trap insn */
9650         set_disas_label(dc, dc->condlabel);
9651         gen_set_condexec(dc);
9652         if (unlikely(dc->ss_active)) {
9653             gen_update_pc(dc, curr_insn_len(dc));
9654             gen_singlestep_exception(dc);
9655         } else {
9656             gen_goto_tb(dc, 1, curr_insn_len(dc));
9657         }
9658     }
9659 }
9660 
9661 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9662                              CPUState *cpu, FILE *logfile)
9663 {
9664     DisasContext *dc = container_of(dcbase, DisasContext, base);
9665 
9666     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9667     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9668 }
9669 
9670 static const TranslatorOps arm_translator_ops = {
9671     .init_disas_context = arm_tr_init_disas_context,
9672     .tb_start           = arm_tr_tb_start,
9673     .insn_start         = arm_tr_insn_start,
9674     .translate_insn     = arm_tr_translate_insn,
9675     .tb_stop            = arm_tr_tb_stop,
9676     .disas_log          = arm_tr_disas_log,
9677 };
9678 
9679 static const TranslatorOps thumb_translator_ops = {
9680     .init_disas_context = arm_tr_init_disas_context,
9681     .tb_start           = arm_tr_tb_start,
9682     .insn_start         = arm_tr_insn_start,
9683     .translate_insn     = thumb_tr_translate_insn,
9684     .tb_stop            = arm_tr_tb_stop,
9685     .disas_log          = arm_tr_disas_log,
9686 };
9687 
9688 /* generate intermediate code for basic block 'tb'.  */
9689 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9690                            target_ulong pc, void *host_pc)
9691 {
9692     DisasContext dc = { };
9693     const TranslatorOps *ops = &arm_translator_ops;
9694     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9695 
9696     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9697         ops = &thumb_translator_ops;
9698     }
9699 #ifdef TARGET_AARCH64
9700     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9701         ops = &aarch64_translator_ops;
9702     }
9703 #endif
9704 
9705     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9706 }
9707