xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 19ed42e8)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "semihosting/semihost.h"
28 #include "cpregs.h"
29 #include "exec/helper-proto.h"
30 
31 #define HELPER_H "helper.h"
32 #include "exec/helper-info.c.inc"
33 #undef  HELPER_H
34 
35 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
36 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
37 /* currently all emulated v5 cores are also v5TE, so don't bother */
38 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
39 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
40 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
41 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
42 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
43 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
44 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
45 
46 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
47 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
48 /* These are TCG globals which alias CPUARMState fields */
49 static TCGv_i32 cpu_R[16];
50 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
51 TCGv_i64 cpu_exclusive_addr;
52 TCGv_i64 cpu_exclusive_val;
53 
54 static const char * const regnames[] =
55     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
56       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
57 
58 
59 /* initialize TCG globals.  */
60 void arm_translate_init(void)
61 {
62     int i;
63 
64     for (i = 0; i < 16; i++) {
65         cpu_R[i] = tcg_global_mem_new_i32(tcg_env,
66                                           offsetof(CPUARMState, regs[i]),
67                                           regnames[i]);
68     }
69     cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF");
70     cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF");
71     cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF");
72     cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF");
73 
74     cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env,
75         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
76     cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env,
77         offsetof(CPUARMState, exclusive_val), "exclusive_val");
78 
79     a64_translate_init();
80 }
81 
82 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
83 {
84     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
85     switch (cmode) {
86     case 0: case 1:
87         /* no-op */
88         break;
89     case 2: case 3:
90         imm <<= 8;
91         break;
92     case 4: case 5:
93         imm <<= 16;
94         break;
95     case 6: case 7:
96         imm <<= 24;
97         break;
98     case 8: case 9:
99         imm |= imm << 16;
100         break;
101     case 10: case 11:
102         imm = (imm << 8) | (imm << 24);
103         break;
104     case 12:
105         imm = (imm << 8) | 0xff;
106         break;
107     case 13:
108         imm = (imm << 16) | 0xffff;
109         break;
110     case 14:
111         if (op) {
112             /*
113              * This and cmode == 15 op == 1 are the only cases where
114              * the top and bottom 32 bits of the encoded constant differ.
115              */
116             uint64_t imm64 = 0;
117             int n;
118 
119             for (n = 0; n < 8; n++) {
120                 if (imm & (1 << n)) {
121                     imm64 |= (0xffULL << (n * 8));
122                 }
123             }
124             return imm64;
125         }
126         imm |= (imm << 8) | (imm << 16) | (imm << 24);
127         break;
128     case 15:
129         if (op) {
130             /* Reserved encoding for AArch32; valid for AArch64 */
131             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
132             if (imm & 0x80) {
133                 imm64 |= 0x8000000000000000ULL;
134             }
135             if (imm & 0x40) {
136                 imm64 |= 0x3fc0000000000000ULL;
137             } else {
138                 imm64 |= 0x4000000000000000ULL;
139             }
140             return imm64;
141         }
142         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
143             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
144         break;
145     }
146     if (op) {
147         imm = ~imm;
148     }
149     return dup_const(MO_32, imm);
150 }
151 
152 /* Generate a label used for skipping this instruction */
153 void arm_gen_condlabel(DisasContext *s)
154 {
155     if (!s->condjmp) {
156         s->condlabel = gen_disas_label(s);
157         s->condjmp = 1;
158     }
159 }
160 
161 /* Flags for the disas_set_da_iss info argument:
162  * lower bits hold the Rt register number, higher bits are flags.
163  */
164 typedef enum ISSInfo {
165     ISSNone = 0,
166     ISSRegMask = 0x1f,
167     ISSInvalid = (1 << 5),
168     ISSIsAcqRel = (1 << 6),
169     ISSIsWrite = (1 << 7),
170     ISSIs16Bit = (1 << 8),
171 } ISSInfo;
172 
173 /*
174  * Store var into env + offset to a member with size bytes.
175  * Free var after use.
176  */
177 void store_cpu_offset(TCGv_i32 var, int offset, int size)
178 {
179     switch (size) {
180     case 1:
181         tcg_gen_st8_i32(var, tcg_env, offset);
182         break;
183     case 4:
184         tcg_gen_st_i32(var, tcg_env, offset);
185         break;
186     default:
187         g_assert_not_reached();
188     }
189 }
190 
191 /* Save the syndrome information for a Data Abort */
192 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
193 {
194     uint32_t syn;
195     int sas = memop & MO_SIZE;
196     bool sse = memop & MO_SIGN;
197     bool is_acqrel = issinfo & ISSIsAcqRel;
198     bool is_write = issinfo & ISSIsWrite;
199     bool is_16bit = issinfo & ISSIs16Bit;
200     int srt = issinfo & ISSRegMask;
201 
202     if (issinfo & ISSInvalid) {
203         /* Some callsites want to conditionally provide ISS info,
204          * eg "only if this was not a writeback"
205          */
206         return;
207     }
208 
209     if (srt == 15) {
210         /* For AArch32, insns where the src/dest is R15 never generate
211          * ISS information. Catching that here saves checking at all
212          * the call sites.
213          */
214         return;
215     }
216 
217     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
218                                   0, 0, 0, is_write, 0, is_16bit);
219     disas_set_insn_syndrome(s, syn);
220 }
221 
222 static inline int get_a32_user_mem_index(DisasContext *s)
223 {
224     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
225      * insns:
226      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
227      *  otherwise, access as if at PL0.
228      */
229     switch (s->mmu_idx) {
230     case ARMMMUIdx_E3:
231     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
232     case ARMMMUIdx_E10_0:
233     case ARMMMUIdx_E10_1:
234     case ARMMMUIdx_E10_1_PAN:
235         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
236     case ARMMMUIdx_MUser:
237     case ARMMMUIdx_MPriv:
238         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
239     case ARMMMUIdx_MUserNegPri:
240     case ARMMMUIdx_MPrivNegPri:
241         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
242     case ARMMMUIdx_MSUser:
243     case ARMMMUIdx_MSPriv:
244         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
245     case ARMMMUIdx_MSUserNegPri:
246     case ARMMMUIdx_MSPrivNegPri:
247         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
248     default:
249         g_assert_not_reached();
250     }
251 }
252 
253 /* The pc_curr difference for an architectural jump. */
254 static target_long jmp_diff(DisasContext *s, target_long diff)
255 {
256     return diff + (s->thumb ? 4 : 8);
257 }
258 
259 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
260 {
261     assert(s->pc_save != -1);
262     if (tb_cflags(s->base.tb) & CF_PCREL) {
263         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
264     } else {
265         tcg_gen_movi_i32(var, s->pc_curr + diff);
266     }
267 }
268 
269 /* Set a variable to the value of a CPU register.  */
270 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
271 {
272     if (reg == 15) {
273         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
274     } else {
275         tcg_gen_mov_i32(var, cpu_R[reg]);
276     }
277 }
278 
279 /*
280  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
281  * This is used for load/store for which use of PC implies (literal),
282  * or ADD that implies ADR.
283  */
284 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
285 {
286     TCGv_i32 tmp = tcg_temp_new_i32();
287 
288     if (reg == 15) {
289         /*
290          * This address is computed from an aligned PC:
291          * subtract off the low bits.
292          */
293         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
294     } else {
295         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
296     }
297     return tmp;
298 }
299 
300 /* Set a CPU register.  The source must be a temporary and will be
301    marked as dead.  */
302 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
303 {
304     if (reg == 15) {
305         /* In Thumb mode, we must ignore bit 0.
306          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
307          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
308          * We choose to ignore [1:0] in ARM mode for all architecture versions.
309          */
310         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
311         s->base.is_jmp = DISAS_JUMP;
312         s->pc_save = -1;
313     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
314         /* For M-profile SP bits [1:0] are always zero */
315         tcg_gen_andi_i32(var, var, ~3);
316     }
317     tcg_gen_mov_i32(cpu_R[reg], var);
318 }
319 
320 /*
321  * Variant of store_reg which applies v8M stack-limit checks before updating
322  * SP. If the check fails this will result in an exception being taken.
323  * We disable the stack checks for CONFIG_USER_ONLY because we have
324  * no idea what the stack limits should be in that case.
325  * If stack checking is not being done this just acts like store_reg().
326  */
327 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
328 {
329 #ifndef CONFIG_USER_ONLY
330     if (s->v8m_stackcheck) {
331         gen_helper_v8m_stackcheck(tcg_env, var);
332     }
333 #endif
334     store_reg(s, 13, var);
335 }
336 
337 /* Value extensions.  */
338 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
339 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
340 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
341 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
342 
343 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
344 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
345 
346 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
347 {
348     gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask));
349 }
350 
351 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
352 {
353     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
354 
355     if (new_el) {
356         if (m_profile) {
357             gen_helper_rebuild_hflags_m32_newel(tcg_env);
358         } else {
359             gen_helper_rebuild_hflags_a32_newel(tcg_env);
360         }
361     } else {
362         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
363         if (m_profile) {
364             gen_helper_rebuild_hflags_m32(tcg_env, tcg_el);
365         } else {
366             gen_helper_rebuild_hflags_a32(tcg_env, tcg_el);
367         }
368     }
369 }
370 
371 static void gen_exception_internal(int excp)
372 {
373     assert(excp_is_internal(excp));
374     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
375 }
376 
377 static void gen_singlestep_exception(DisasContext *s)
378 {
379     /* We just completed step of an insn. Move from Active-not-pending
380      * to Active-pending, and then also take the swstep exception.
381      * This corresponds to making the (IMPDEF) choice to prioritize
382      * swstep exceptions over asynchronous exceptions taken to an exception
383      * level where debug is disabled. This choice has the advantage that
384      * we do not need to maintain internal state corresponding to the
385      * ISV/EX syndrome bits between completion of the step and generation
386      * of the exception, and our syndrome information is always correct.
387      */
388     gen_ss_advance(s);
389     gen_swstep_exception(s, 1, s->is_ldex);
390     s->base.is_jmp = DISAS_NORETURN;
391 }
392 
393 void clear_eci_state(DisasContext *s)
394 {
395     /*
396      * Clear any ECI/ICI state: used when a load multiple/store
397      * multiple insn executes.
398      */
399     if (s->eci) {
400         store_cpu_field_constant(0, condexec_bits);
401         s->eci = 0;
402     }
403 }
404 
405 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
406 {
407     TCGv_i32 tmp1 = tcg_temp_new_i32();
408     TCGv_i32 tmp2 = tcg_temp_new_i32();
409     tcg_gen_ext16s_i32(tmp1, a);
410     tcg_gen_ext16s_i32(tmp2, b);
411     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
412     tcg_gen_sari_i32(a, a, 16);
413     tcg_gen_sari_i32(b, b, 16);
414     tcg_gen_mul_i32(b, b, a);
415     tcg_gen_mov_i32(a, tmp1);
416 }
417 
418 /* Byteswap each halfword.  */
419 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
420 {
421     TCGv_i32 tmp = tcg_temp_new_i32();
422     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
423     tcg_gen_shri_i32(tmp, var, 8);
424     tcg_gen_and_i32(tmp, tmp, mask);
425     tcg_gen_and_i32(var, var, mask);
426     tcg_gen_shli_i32(var, var, 8);
427     tcg_gen_or_i32(dest, var, tmp);
428 }
429 
430 /* Byteswap low halfword and sign extend.  */
431 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
432 {
433     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
434 }
435 
436 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
437     tmp = (t0 ^ t1) & 0x8000;
438     t0 &= ~0x8000;
439     t1 &= ~0x8000;
440     t0 = (t0 + t1) ^ tmp;
441  */
442 
443 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
444 {
445     TCGv_i32 tmp = tcg_temp_new_i32();
446     tcg_gen_xor_i32(tmp, t0, t1);
447     tcg_gen_andi_i32(tmp, tmp, 0x8000);
448     tcg_gen_andi_i32(t0, t0, ~0x8000);
449     tcg_gen_andi_i32(t1, t1, ~0x8000);
450     tcg_gen_add_i32(t0, t0, t1);
451     tcg_gen_xor_i32(dest, t0, tmp);
452 }
453 
454 /* Set N and Z flags from var.  */
455 static inline void gen_logic_CC(TCGv_i32 var)
456 {
457     tcg_gen_mov_i32(cpu_NF, var);
458     tcg_gen_mov_i32(cpu_ZF, var);
459 }
460 
461 /* dest = T0 + T1 + CF. */
462 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
463 {
464     tcg_gen_add_i32(dest, t0, t1);
465     tcg_gen_add_i32(dest, dest, cpu_CF);
466 }
467 
468 /* dest = T0 - T1 + CF - 1.  */
469 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
470 {
471     tcg_gen_sub_i32(dest, t0, t1);
472     tcg_gen_add_i32(dest, dest, cpu_CF);
473     tcg_gen_subi_i32(dest, dest, 1);
474 }
475 
476 /* dest = T0 + T1. Compute C, N, V and Z flags */
477 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
478 {
479     TCGv_i32 tmp = tcg_temp_new_i32();
480     tcg_gen_movi_i32(tmp, 0);
481     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
482     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
483     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
484     tcg_gen_xor_i32(tmp, t0, t1);
485     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
486     tcg_gen_mov_i32(dest, cpu_NF);
487 }
488 
489 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
490 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
491 {
492     TCGv_i32 tmp = tcg_temp_new_i32();
493     if (TCG_TARGET_HAS_add2_i32) {
494         tcg_gen_movi_i32(tmp, 0);
495         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
497     } else {
498         TCGv_i64 q0 = tcg_temp_new_i64();
499         TCGv_i64 q1 = tcg_temp_new_i64();
500         tcg_gen_extu_i32_i64(q0, t0);
501         tcg_gen_extu_i32_i64(q1, t1);
502         tcg_gen_add_i64(q0, q0, q1);
503         tcg_gen_extu_i32_i64(q1, cpu_CF);
504         tcg_gen_add_i64(q0, q0, q1);
505         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
506     }
507     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
508     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
509     tcg_gen_xor_i32(tmp, t0, t1);
510     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
511     tcg_gen_mov_i32(dest, cpu_NF);
512 }
513 
514 /* dest = T0 - T1. Compute C, N, V and Z flags */
515 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
516 {
517     TCGv_i32 tmp;
518     tcg_gen_sub_i32(cpu_NF, t0, t1);
519     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
520     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
521     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
522     tmp = tcg_temp_new_i32();
523     tcg_gen_xor_i32(tmp, t0, t1);
524     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
525     tcg_gen_mov_i32(dest, cpu_NF);
526 }
527 
528 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
529 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
530 {
531     TCGv_i32 tmp = tcg_temp_new_i32();
532     tcg_gen_not_i32(tmp, t1);
533     gen_adc_CC(dest, t0, tmp);
534 }
535 
536 #define GEN_SHIFT(name)                                               \
537 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
538 {                                                                     \
539     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
540     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
541     TCGv_i32 zero = tcg_constant_i32(0);                              \
542     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
543     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
544     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
545     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
546 }
547 GEN_SHIFT(shl)
548 GEN_SHIFT(shr)
549 #undef GEN_SHIFT
550 
551 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
552 {
553     TCGv_i32 tmp1 = tcg_temp_new_i32();
554 
555     tcg_gen_andi_i32(tmp1, t1, 0xff);
556     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
557     tcg_gen_sar_i32(dest, t0, tmp1);
558 }
559 
560 static void shifter_out_im(TCGv_i32 var, int shift)
561 {
562     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
563 }
564 
565 /* Shift by immediate.  Includes special handling for shift == 0.  */
566 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
567                                     int shift, int flags)
568 {
569     switch (shiftop) {
570     case 0: /* LSL */
571         if (shift != 0) {
572             if (flags)
573                 shifter_out_im(var, 32 - shift);
574             tcg_gen_shli_i32(var, var, shift);
575         }
576         break;
577     case 1: /* LSR */
578         if (shift == 0) {
579             if (flags) {
580                 tcg_gen_shri_i32(cpu_CF, var, 31);
581             }
582             tcg_gen_movi_i32(var, 0);
583         } else {
584             if (flags)
585                 shifter_out_im(var, shift - 1);
586             tcg_gen_shri_i32(var, var, shift);
587         }
588         break;
589     case 2: /* ASR */
590         if (shift == 0)
591             shift = 32;
592         if (flags)
593             shifter_out_im(var, shift - 1);
594         if (shift == 32)
595           shift = 31;
596         tcg_gen_sari_i32(var, var, shift);
597         break;
598     case 3: /* ROR/RRX */
599         if (shift != 0) {
600             if (flags)
601                 shifter_out_im(var, shift - 1);
602             tcg_gen_rotri_i32(var, var, shift); break;
603         } else {
604             TCGv_i32 tmp = tcg_temp_new_i32();
605             tcg_gen_shli_i32(tmp, cpu_CF, 31);
606             if (flags)
607                 shifter_out_im(var, 0);
608             tcg_gen_shri_i32(var, var, 1);
609             tcg_gen_or_i32(var, var, tmp);
610         }
611     }
612 };
613 
614 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
615                                      TCGv_i32 shift, int flags)
616 {
617     if (flags) {
618         switch (shiftop) {
619         case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break;
620         case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break;
621         case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break;
622         case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break;
623         }
624     } else {
625         switch (shiftop) {
626         case 0:
627             gen_shl(var, var, shift);
628             break;
629         case 1:
630             gen_shr(var, var, shift);
631             break;
632         case 2:
633             gen_sar(var, var, shift);
634             break;
635         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
636                 tcg_gen_rotr_i32(var, var, shift); break;
637         }
638     }
639 }
640 
641 /*
642  * Generate a conditional based on ARM condition code cc.
643  * This is common between ARM and Aarch64 targets.
644  */
645 void arm_test_cc(DisasCompare *cmp, int cc)
646 {
647     TCGv_i32 value;
648     TCGCond cond;
649 
650     switch (cc) {
651     case 0: /* eq: Z */
652     case 1: /* ne: !Z */
653         cond = TCG_COND_EQ;
654         value = cpu_ZF;
655         break;
656 
657     case 2: /* cs: C */
658     case 3: /* cc: !C */
659         cond = TCG_COND_NE;
660         value = cpu_CF;
661         break;
662 
663     case 4: /* mi: N */
664     case 5: /* pl: !N */
665         cond = TCG_COND_LT;
666         value = cpu_NF;
667         break;
668 
669     case 6: /* vs: V */
670     case 7: /* vc: !V */
671         cond = TCG_COND_LT;
672         value = cpu_VF;
673         break;
674 
675     case 8: /* hi: C && !Z */
676     case 9: /* ls: !C || Z -> !(C && !Z) */
677         cond = TCG_COND_NE;
678         value = tcg_temp_new_i32();
679         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
680            ZF is non-zero for !Z; so AND the two subexpressions.  */
681         tcg_gen_neg_i32(value, cpu_CF);
682         tcg_gen_and_i32(value, value, cpu_ZF);
683         break;
684 
685     case 10: /* ge: N == V -> N ^ V == 0 */
686     case 11: /* lt: N != V -> N ^ V != 0 */
687         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
688         cond = TCG_COND_GE;
689         value = tcg_temp_new_i32();
690         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
691         break;
692 
693     case 12: /* gt: !Z && N == V */
694     case 13: /* le: Z || N != V */
695         cond = TCG_COND_NE;
696         value = tcg_temp_new_i32();
697         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
698          * the sign bit then AND with ZF to yield the result.  */
699         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
700         tcg_gen_sari_i32(value, value, 31);
701         tcg_gen_andc_i32(value, cpu_ZF, value);
702         break;
703 
704     case 14: /* always */
705     case 15: /* always */
706         /* Use the ALWAYS condition, which will fold early.
707          * It doesn't matter what we use for the value.  */
708         cond = TCG_COND_ALWAYS;
709         value = cpu_ZF;
710         goto no_invert;
711 
712     default:
713         fprintf(stderr, "Bad condition code 0x%x\n", cc);
714         abort();
715     }
716 
717     if (cc & 1) {
718         cond = tcg_invert_cond(cond);
719     }
720 
721  no_invert:
722     cmp->cond = cond;
723     cmp->value = value;
724 }
725 
726 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
727 {
728     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
729 }
730 
731 void arm_gen_test_cc(int cc, TCGLabel *label)
732 {
733     DisasCompare cmp;
734     arm_test_cc(&cmp, cc);
735     arm_jump_cc(&cmp, label);
736 }
737 
738 void gen_set_condexec(DisasContext *s)
739 {
740     if (s->condexec_mask) {
741         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
742 
743         store_cpu_field_constant(val, condexec_bits);
744     }
745 }
746 
747 void gen_update_pc(DisasContext *s, target_long diff)
748 {
749     gen_pc_plus_diff(s, cpu_R[15], diff);
750     s->pc_save = s->pc_curr + diff;
751 }
752 
753 /* Set PC and Thumb state from var.  var is marked as dead.  */
754 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
755 {
756     s->base.is_jmp = DISAS_JUMP;
757     tcg_gen_andi_i32(cpu_R[15], var, ~1);
758     tcg_gen_andi_i32(var, var, 1);
759     store_cpu_field(var, thumb);
760     s->pc_save = -1;
761 }
762 
763 /*
764  * Set PC and Thumb state from var. var is marked as dead.
765  * For M-profile CPUs, include logic to detect exception-return
766  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
767  * and BX reg, and no others, and happens only for code in Handler mode.
768  * The Security Extension also requires us to check for the FNC_RETURN
769  * which signals a function return from non-secure state; this can happen
770  * in both Handler and Thread mode.
771  * To avoid having to do multiple comparisons in inline generated code,
772  * we make the check we do here loose, so it will match for EXC_RETURN
773  * in Thread mode. For system emulation do_v7m_exception_exit() checks
774  * for these spurious cases and returns without doing anything (giving
775  * the same behaviour as for a branch to a non-magic address).
776  *
777  * In linux-user mode it is unclear what the right behaviour for an
778  * attempted FNC_RETURN should be, because in real hardware this will go
779  * directly to Secure code (ie not the Linux kernel) which will then treat
780  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
781  * attempt behave the way it would on a CPU without the security extension,
782  * which is to say "like a normal branch". That means we can simply treat
783  * all branches as normal with no magic address behaviour.
784  */
785 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
786 {
787     /* Generate the same code here as for a simple bx, but flag via
788      * s->base.is_jmp that we need to do the rest of the work later.
789      */
790     gen_bx(s, var);
791 #ifndef CONFIG_USER_ONLY
792     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
793         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
794         s->base.is_jmp = DISAS_BX_EXCRET;
795     }
796 #endif
797 }
798 
799 static inline void gen_bx_excret_final_code(DisasContext *s)
800 {
801     /* Generate the code to finish possible exception return and end the TB */
802     DisasLabel excret_label = gen_disas_label(s);
803     uint32_t min_magic;
804 
805     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
806         /* Covers FNC_RETURN and EXC_RETURN magic */
807         min_magic = FNC_RETURN_MIN_MAGIC;
808     } else {
809         /* EXC_RETURN magic only */
810         min_magic = EXC_RETURN_MIN_MAGIC;
811     }
812 
813     /* Is the new PC value in the magic range indicating exception return? */
814     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
815     /* No: end the TB as we would for a DISAS_JMP */
816     if (s->ss_active) {
817         gen_singlestep_exception(s);
818     } else {
819         tcg_gen_exit_tb(NULL, 0);
820     }
821     set_disas_label(s, excret_label);
822     /* Yes: this is an exception return.
823      * At this point in runtime env->regs[15] and env->thumb will hold
824      * the exception-return magic number, which do_v7m_exception_exit()
825      * will read. Nothing else will be able to see those values because
826      * the cpu-exec main loop guarantees that we will always go straight
827      * from raising the exception to the exception-handling code.
828      *
829      * gen_ss_advance(s) does nothing on M profile currently but
830      * calling it is conceptually the right thing as we have executed
831      * this instruction (compare SWI, HVC, SMC handling).
832      */
833     gen_ss_advance(s);
834     gen_exception_internal(EXCP_EXCEPTION_EXIT);
835 }
836 
837 static inline void gen_bxns(DisasContext *s, int rm)
838 {
839     TCGv_i32 var = load_reg(s, rm);
840 
841     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
842      * we need to sync state before calling it, but:
843      *  - we don't need to do gen_update_pc() because the bxns helper will
844      *    always set the PC itself
845      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
846      *    unless it's outside an IT block or the last insn in an IT block,
847      *    so we know that condexec == 0 (already set at the top of the TB)
848      *    is correct in the non-UNPREDICTABLE cases, and we can choose
849      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
850      */
851     gen_helper_v7m_bxns(tcg_env, var);
852     s->base.is_jmp = DISAS_EXIT;
853 }
854 
855 static inline void gen_blxns(DisasContext *s, int rm)
856 {
857     TCGv_i32 var = load_reg(s, rm);
858 
859     /* We don't need to sync condexec state, for the same reason as bxns.
860      * We do however need to set the PC, because the blxns helper reads it.
861      * The blxns helper may throw an exception.
862      */
863     gen_update_pc(s, curr_insn_len(s));
864     gen_helper_v7m_blxns(tcg_env, var);
865     s->base.is_jmp = DISAS_EXIT;
866 }
867 
868 /* Variant of store_reg which uses branch&exchange logic when storing
869    to r15 in ARM architecture v7 and above. The source must be a temporary
870    and will be marked as dead. */
871 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
872 {
873     if (reg == 15 && ENABLE_ARCH_7) {
874         gen_bx(s, var);
875     } else {
876         store_reg(s, reg, var);
877     }
878 }
879 
880 /* Variant of store_reg which uses branch&exchange logic when storing
881  * to r15 in ARM architecture v5T and above. This is used for storing
882  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
883  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
884 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
885 {
886     if (reg == 15 && ENABLE_ARCH_5) {
887         gen_bx_excret(s, var);
888     } else {
889         store_reg(s, reg, var);
890     }
891 }
892 
893 #ifdef CONFIG_USER_ONLY
894 #define IS_USER_ONLY 1
895 #else
896 #define IS_USER_ONLY 0
897 #endif
898 
899 MemOp pow2_align(unsigned i)
900 {
901     static const MemOp mop_align[] = {
902         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, MO_ALIGN_32
903     };
904     g_assert(i < ARRAY_SIZE(mop_align));
905     return mop_align[i];
906 }
907 
908 /*
909  * Abstractions of "generate code to do a guest load/store for
910  * AArch32", where a vaddr is always 32 bits (and is zero
911  * extended if we're a 64 bit core) and  data is also
912  * 32 bits unless specifically doing a 64 bit access.
913  * These functions work like tcg_gen_qemu_{ld,st}* except
914  * that the address argument is TCGv_i32 rather than TCGv.
915  */
916 
917 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
918 {
919     TCGv addr = tcg_temp_new();
920     tcg_gen_extu_i32_tl(addr, a32);
921 
922     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
923     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
924         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
925     }
926     return addr;
927 }
928 
929 /*
930  * Internal routines are used for NEON cases where the endianness
931  * and/or alignment has already been taken into account and manipulated.
932  */
933 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
934                               TCGv_i32 a32, int index, MemOp opc)
935 {
936     TCGv addr = gen_aa32_addr(s, a32, opc);
937     tcg_gen_qemu_ld_i32(val, addr, index, opc);
938 }
939 
940 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_st_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951 
952     tcg_gen_qemu_ld_i64(val, addr, index, opc);
953 
954     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
955     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
956         tcg_gen_rotri_i64(val, val, 32);
957     }
958 }
959 
960 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
961                               TCGv_i32 a32, int index, MemOp opc)
962 {
963     TCGv addr = gen_aa32_addr(s, a32, opc);
964 
965     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
966     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
967         TCGv_i64 tmp = tcg_temp_new_i64();
968         tcg_gen_rotri_i64(tmp, val, 32);
969         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
970     } else {
971         tcg_gen_qemu_st_i64(val, addr, index, opc);
972     }
973 }
974 
975 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
976                      int index, MemOp opc)
977 {
978     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
979 }
980 
981 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
982                      int index, MemOp opc)
983 {
984     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
985 }
986 
987 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
988                      int index, MemOp opc)
989 {
990     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
991 }
992 
993 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
994                      int index, MemOp opc)
995 {
996     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
997 }
998 
999 #define DO_GEN_LD(SUFF, OPC)                                            \
1000     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1001                                          TCGv_i32 a32, int index)       \
1002     {                                                                   \
1003         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1004     }
1005 
1006 #define DO_GEN_ST(SUFF, OPC)                                            \
1007     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 static inline void gen_hvc(DisasContext *s, int imm16)
1014 {
1015     /* The pre HVC helper handles cases when HVC gets trapped
1016      * as an undefined insn by runtime configuration (ie before
1017      * the insn really executes).
1018      */
1019     gen_update_pc(s, 0);
1020     gen_helper_pre_hvc(tcg_env);
1021     /* Otherwise we will treat this as a real exception which
1022      * happens after execution of the insn. (The distinction matters
1023      * for the PC value reported to the exception handler and also
1024      * for single stepping.)
1025      */
1026     s->svc_imm = imm16;
1027     gen_update_pc(s, curr_insn_len(s));
1028     s->base.is_jmp = DISAS_HVC;
1029 }
1030 
1031 static inline void gen_smc(DisasContext *s)
1032 {
1033     /* As with HVC, we may take an exception either before or after
1034      * the insn executes.
1035      */
1036     gen_update_pc(s, 0);
1037     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc()));
1038     gen_update_pc(s, curr_insn_len(s));
1039     s->base.is_jmp = DISAS_SMC;
1040 }
1041 
1042 static void gen_exception_internal_insn(DisasContext *s, int excp)
1043 {
1044     gen_set_condexec(s);
1045     gen_update_pc(s, 0);
1046     gen_exception_internal(excp);
1047     s->base.is_jmp = DISAS_NORETURN;
1048 }
1049 
1050 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1051 {
1052     gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp),
1053                                           tcg_constant_i32(syndrome), tcg_el);
1054 }
1055 
1056 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1057 {
1058     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1059 }
1060 
1061 static void gen_exception(int excp, uint32_t syndrome)
1062 {
1063     gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp),
1064                                        tcg_constant_i32(syndrome));
1065 }
1066 
1067 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1068                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1069 {
1070     if (s->aarch64) {
1071         gen_a64_update_pc(s, pc_diff);
1072     } else {
1073         gen_set_condexec(s);
1074         gen_update_pc(s, pc_diff);
1075     }
1076     gen_exception_el_v(excp, syn, tcg_el);
1077     s->base.is_jmp = DISAS_NORETURN;
1078 }
1079 
1080 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1081                            uint32_t syn, uint32_t target_el)
1082 {
1083     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1084                             tcg_constant_i32(target_el));
1085 }
1086 
1087 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1088                         int excp, uint32_t syn)
1089 {
1090     if (s->aarch64) {
1091         gen_a64_update_pc(s, pc_diff);
1092     } else {
1093         gen_set_condexec(s);
1094         gen_update_pc(s, pc_diff);
1095     }
1096     gen_exception(excp, syn);
1097     s->base.is_jmp = DISAS_NORETURN;
1098 }
1099 
1100 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1101 {
1102     gen_set_condexec(s);
1103     gen_update_pc(s, 0);
1104     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn));
1105     s->base.is_jmp = DISAS_NORETURN;
1106 }
1107 
1108 void unallocated_encoding(DisasContext *s)
1109 {
1110     /* Unallocated and reserved encodings are uncategorized */
1111     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1112 }
1113 
1114 /* Force a TB lookup after an instruction that changes the CPU state.  */
1115 void gen_lookup_tb(DisasContext *s)
1116 {
1117     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1118     s->base.is_jmp = DISAS_EXIT;
1119 }
1120 
1121 static inline void gen_hlt(DisasContext *s, int imm)
1122 {
1123     /* HLT. This has two purposes.
1124      * Architecturally, it is an external halting debug instruction.
1125      * Since QEMU doesn't implement external debug, we treat this as
1126      * it is required for halting debug disabled: it will UNDEF.
1127      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1128      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1129      * must trigger semihosting even for ARMv7 and earlier, where
1130      * HLT was an undefined encoding.
1131      * In system mode, we don't allow userspace access to
1132      * semihosting, to provide some semblance of security
1133      * (and for consistency with our 32-bit semihosting).
1134      */
1135     if (semihosting_enabled(s->current_el == 0) &&
1136         (imm == (s->thumb ? 0x3c : 0xf000))) {
1137         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1138         return;
1139     }
1140 
1141     unallocated_encoding(s);
1142 }
1143 
1144 /*
1145  * Return the offset of a "full" NEON Dreg.
1146  */
1147 long neon_full_reg_offset(unsigned reg)
1148 {
1149     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1150 }
1151 
1152 /*
1153  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1154  * where 0 is the least significant end of the register.
1155  */
1156 long neon_element_offset(int reg, int element, MemOp memop)
1157 {
1158     int element_size = 1 << (memop & MO_SIZE);
1159     int ofs = element * element_size;
1160 #if HOST_BIG_ENDIAN
1161     /*
1162      * Calculate the offset assuming fully little-endian,
1163      * then XOR to account for the order of the 8-byte units.
1164      */
1165     if (element_size < 8) {
1166         ofs ^= 8 - element_size;
1167     }
1168 #endif
1169     return neon_full_reg_offset(reg) + ofs;
1170 }
1171 
1172 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1173 long vfp_reg_offset(bool dp, unsigned reg)
1174 {
1175     if (dp) {
1176         return neon_element_offset(reg, 0, MO_64);
1177     } else {
1178         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1179     }
1180 }
1181 
1182 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1183 {
1184     long off = neon_element_offset(reg, ele, memop);
1185 
1186     switch (memop) {
1187     case MO_SB:
1188         tcg_gen_ld8s_i32(dest, tcg_env, off);
1189         break;
1190     case MO_UB:
1191         tcg_gen_ld8u_i32(dest, tcg_env, off);
1192         break;
1193     case MO_SW:
1194         tcg_gen_ld16s_i32(dest, tcg_env, off);
1195         break;
1196     case MO_UW:
1197         tcg_gen_ld16u_i32(dest, tcg_env, off);
1198         break;
1199     case MO_UL:
1200     case MO_SL:
1201         tcg_gen_ld_i32(dest, tcg_env, off);
1202         break;
1203     default:
1204         g_assert_not_reached();
1205     }
1206 }
1207 
1208 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1209 {
1210     long off = neon_element_offset(reg, ele, memop);
1211 
1212     switch (memop) {
1213     case MO_SL:
1214         tcg_gen_ld32s_i64(dest, tcg_env, off);
1215         break;
1216     case MO_UL:
1217         tcg_gen_ld32u_i64(dest, tcg_env, off);
1218         break;
1219     case MO_UQ:
1220         tcg_gen_ld_i64(dest, tcg_env, off);
1221         break;
1222     default:
1223         g_assert_not_reached();
1224     }
1225 }
1226 
1227 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1228 {
1229     long off = neon_element_offset(reg, ele, memop);
1230 
1231     switch (memop) {
1232     case MO_8:
1233         tcg_gen_st8_i32(src, tcg_env, off);
1234         break;
1235     case MO_16:
1236         tcg_gen_st16_i32(src, tcg_env, off);
1237         break;
1238     case MO_32:
1239         tcg_gen_st_i32(src, tcg_env, off);
1240         break;
1241     default:
1242         g_assert_not_reached();
1243     }
1244 }
1245 
1246 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1247 {
1248     long off = neon_element_offset(reg, ele, memop);
1249 
1250     switch (memop) {
1251     case MO_32:
1252         tcg_gen_st32_i64(src, tcg_env, off);
1253         break;
1254     case MO_64:
1255         tcg_gen_st_i64(src, tcg_env, off);
1256         break;
1257     default:
1258         g_assert_not_reached();
1259     }
1260 }
1261 
1262 #define ARM_CP_RW_BIT   (1 << 20)
1263 
1264 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1265 {
1266     tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1267 }
1268 
1269 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1270 {
1271     tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1272 }
1273 
1274 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1275 {
1276     TCGv_i32 var = tcg_temp_new_i32();
1277     tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1278     return var;
1279 }
1280 
1281 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1282 {
1283     tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1284 }
1285 
1286 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1287 {
1288     iwmmxt_store_reg(cpu_M0, rn);
1289 }
1290 
1291 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1292 {
1293     iwmmxt_load_reg(cpu_M0, rn);
1294 }
1295 
1296 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1297 {
1298     iwmmxt_load_reg(cpu_V1, rn);
1299     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1300 }
1301 
1302 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1303 {
1304     iwmmxt_load_reg(cpu_V1, rn);
1305     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1306 }
1307 
1308 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1309 {
1310     iwmmxt_load_reg(cpu_V1, rn);
1311     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1312 }
1313 
1314 #define IWMMXT_OP(name) \
1315 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1316 { \
1317     iwmmxt_load_reg(cpu_V1, rn); \
1318     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1319 }
1320 
1321 #define IWMMXT_OP_ENV(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV_SIZE(name) \
1329 IWMMXT_OP_ENV(name##b) \
1330 IWMMXT_OP_ENV(name##w) \
1331 IWMMXT_OP_ENV(name##l)
1332 
1333 #define IWMMXT_OP_ENV1(name) \
1334 static inline void gen_op_iwmmxt_##name##_M0(void) \
1335 { \
1336     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
1337 }
1338 
1339 IWMMXT_OP(maddsq)
1340 IWMMXT_OP(madduq)
1341 IWMMXT_OP(sadb)
1342 IWMMXT_OP(sadw)
1343 IWMMXT_OP(mulslw)
1344 IWMMXT_OP(mulshw)
1345 IWMMXT_OP(mululw)
1346 IWMMXT_OP(muluhw)
1347 IWMMXT_OP(macsw)
1348 IWMMXT_OP(macuw)
1349 
1350 IWMMXT_OP_ENV_SIZE(unpackl)
1351 IWMMXT_OP_ENV_SIZE(unpackh)
1352 
1353 IWMMXT_OP_ENV1(unpacklub)
1354 IWMMXT_OP_ENV1(unpackluw)
1355 IWMMXT_OP_ENV1(unpacklul)
1356 IWMMXT_OP_ENV1(unpackhub)
1357 IWMMXT_OP_ENV1(unpackhuw)
1358 IWMMXT_OP_ENV1(unpackhul)
1359 IWMMXT_OP_ENV1(unpacklsb)
1360 IWMMXT_OP_ENV1(unpacklsw)
1361 IWMMXT_OP_ENV1(unpacklsl)
1362 IWMMXT_OP_ENV1(unpackhsb)
1363 IWMMXT_OP_ENV1(unpackhsw)
1364 IWMMXT_OP_ENV1(unpackhsl)
1365 
1366 IWMMXT_OP_ENV_SIZE(cmpeq)
1367 IWMMXT_OP_ENV_SIZE(cmpgtu)
1368 IWMMXT_OP_ENV_SIZE(cmpgts)
1369 
1370 IWMMXT_OP_ENV_SIZE(mins)
1371 IWMMXT_OP_ENV_SIZE(minu)
1372 IWMMXT_OP_ENV_SIZE(maxs)
1373 IWMMXT_OP_ENV_SIZE(maxu)
1374 
1375 IWMMXT_OP_ENV_SIZE(subn)
1376 IWMMXT_OP_ENV_SIZE(addn)
1377 IWMMXT_OP_ENV_SIZE(subu)
1378 IWMMXT_OP_ENV_SIZE(addu)
1379 IWMMXT_OP_ENV_SIZE(subs)
1380 IWMMXT_OP_ENV_SIZE(adds)
1381 
1382 IWMMXT_OP_ENV(avgb0)
1383 IWMMXT_OP_ENV(avgb1)
1384 IWMMXT_OP_ENV(avgw0)
1385 IWMMXT_OP_ENV(avgw1)
1386 
1387 IWMMXT_OP_ENV(packuw)
1388 IWMMXT_OP_ENV(packul)
1389 IWMMXT_OP_ENV(packuq)
1390 IWMMXT_OP_ENV(packsw)
1391 IWMMXT_OP_ENV(packsl)
1392 IWMMXT_OP_ENV(packsq)
1393 
1394 static void gen_op_iwmmxt_set_mup(void)
1395 {
1396     TCGv_i32 tmp;
1397     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1398     tcg_gen_ori_i32(tmp, tmp, 2);
1399     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1400 }
1401 
1402 static void gen_op_iwmmxt_set_cup(void)
1403 {
1404     TCGv_i32 tmp;
1405     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1406     tcg_gen_ori_i32(tmp, tmp, 1);
1407     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1408 }
1409 
1410 static void gen_op_iwmmxt_setpsr_nz(void)
1411 {
1412     TCGv_i32 tmp = tcg_temp_new_i32();
1413     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1415 }
1416 
1417 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1418 {
1419     iwmmxt_load_reg(cpu_V1, rn);
1420     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1421     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1422 }
1423 
1424 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1425                                      TCGv_i32 dest)
1426 {
1427     int rd;
1428     uint32_t offset;
1429     TCGv_i32 tmp;
1430 
1431     rd = (insn >> 16) & 0xf;
1432     tmp = load_reg(s, rd);
1433 
1434     offset = (insn & 0xff) << ((insn >> 7) & 2);
1435     if (insn & (1 << 24)) {
1436         /* Pre indexed */
1437         if (insn & (1 << 23))
1438             tcg_gen_addi_i32(tmp, tmp, offset);
1439         else
1440             tcg_gen_addi_i32(tmp, tmp, -offset);
1441         tcg_gen_mov_i32(dest, tmp);
1442         if (insn & (1 << 21)) {
1443             store_reg(s, rd, tmp);
1444         }
1445     } else if (insn & (1 << 21)) {
1446         /* Post indexed */
1447         tcg_gen_mov_i32(dest, tmp);
1448         if (insn & (1 << 23))
1449             tcg_gen_addi_i32(tmp, tmp, offset);
1450         else
1451             tcg_gen_addi_i32(tmp, tmp, -offset);
1452         store_reg(s, rd, tmp);
1453     } else if (!(insn & (1 << 23)))
1454         return 1;
1455     return 0;
1456 }
1457 
1458 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1459 {
1460     int rd = (insn >> 0) & 0xf;
1461     TCGv_i32 tmp;
1462 
1463     if (insn & (1 << 8)) {
1464         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1465             return 1;
1466         } else {
1467             tmp = iwmmxt_load_creg(rd);
1468         }
1469     } else {
1470         tmp = tcg_temp_new_i32();
1471         iwmmxt_load_reg(cpu_V0, rd);
1472         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1473     }
1474     tcg_gen_andi_i32(tmp, tmp, mask);
1475     tcg_gen_mov_i32(dest, tmp);
1476     return 0;
1477 }
1478 
1479 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1480    (ie. an undefined instruction).  */
1481 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1482 {
1483     int rd, wrd;
1484     int rdhi, rdlo, rd0, rd1, i;
1485     TCGv_i32 addr;
1486     TCGv_i32 tmp, tmp2, tmp3;
1487 
1488     if ((insn & 0x0e000e00) == 0x0c000000) {
1489         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1490             wrd = insn & 0xf;
1491             rdlo = (insn >> 12) & 0xf;
1492             rdhi = (insn >> 16) & 0xf;
1493             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1494                 iwmmxt_load_reg(cpu_V0, wrd);
1495                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1496                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1497             } else {                                    /* TMCRR */
1498                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1499                 iwmmxt_store_reg(cpu_V0, wrd);
1500                 gen_op_iwmmxt_set_mup();
1501             }
1502             return 0;
1503         }
1504 
1505         wrd = (insn >> 12) & 0xf;
1506         addr = tcg_temp_new_i32();
1507         if (gen_iwmmxt_address(s, insn, addr)) {
1508             return 1;
1509         }
1510         if (insn & ARM_CP_RW_BIT) {
1511             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1512                 tmp = tcg_temp_new_i32();
1513                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1514                 iwmmxt_store_creg(wrd, tmp);
1515             } else {
1516                 i = 1;
1517                 if (insn & (1 << 8)) {
1518                     if (insn & (1 << 22)) {             /* WLDRD */
1519                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1520                         i = 0;
1521                     } else {                            /* WLDRW wRd */
1522                         tmp = tcg_temp_new_i32();
1523                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1524                     }
1525                 } else {
1526                     tmp = tcg_temp_new_i32();
1527                     if (insn & (1 << 22)) {             /* WLDRH */
1528                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1529                     } else {                            /* WLDRB */
1530                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 }
1533                 if (i) {
1534                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1535                 }
1536                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1537             }
1538         } else {
1539             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1540                 tmp = iwmmxt_load_creg(wrd);
1541                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1542             } else {
1543                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1544                 tmp = tcg_temp_new_i32();
1545                 if (insn & (1 << 8)) {
1546                     if (insn & (1 << 22)) {             /* WSTRD */
1547                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1548                     } else {                            /* WSTRW wRd */
1549                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1550                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1551                     }
1552                 } else {
1553                     if (insn & (1 << 22)) {             /* WSTRH */
1554                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1555                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1556                     } else {                            /* WSTRB */
1557                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1558                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1559                     }
1560                 }
1561             }
1562         }
1563         return 0;
1564     }
1565 
1566     if ((insn & 0x0f000000) != 0x0e000000)
1567         return 1;
1568 
1569     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1570     case 0x000:                                                 /* WOR */
1571         wrd = (insn >> 12) & 0xf;
1572         rd0 = (insn >> 0) & 0xf;
1573         rd1 = (insn >> 16) & 0xf;
1574         gen_op_iwmmxt_movq_M0_wRn(rd0);
1575         gen_op_iwmmxt_orq_M0_wRn(rd1);
1576         gen_op_iwmmxt_setpsr_nz();
1577         gen_op_iwmmxt_movq_wRn_M0(wrd);
1578         gen_op_iwmmxt_set_mup();
1579         gen_op_iwmmxt_set_cup();
1580         break;
1581     case 0x011:                                                 /* TMCR */
1582         if (insn & 0xf)
1583             return 1;
1584         rd = (insn >> 12) & 0xf;
1585         wrd = (insn >> 16) & 0xf;
1586         switch (wrd) {
1587         case ARM_IWMMXT_wCID:
1588         case ARM_IWMMXT_wCASF:
1589             break;
1590         case ARM_IWMMXT_wCon:
1591             gen_op_iwmmxt_set_cup();
1592             /* Fall through.  */
1593         case ARM_IWMMXT_wCSSF:
1594             tmp = iwmmxt_load_creg(wrd);
1595             tmp2 = load_reg(s, rd);
1596             tcg_gen_andc_i32(tmp, tmp, tmp2);
1597             iwmmxt_store_creg(wrd, tmp);
1598             break;
1599         case ARM_IWMMXT_wCGR0:
1600         case ARM_IWMMXT_wCGR1:
1601         case ARM_IWMMXT_wCGR2:
1602         case ARM_IWMMXT_wCGR3:
1603             gen_op_iwmmxt_set_cup();
1604             tmp = load_reg(s, rd);
1605             iwmmxt_store_creg(wrd, tmp);
1606             break;
1607         default:
1608             return 1;
1609         }
1610         break;
1611     case 0x100:                                                 /* WXOR */
1612         wrd = (insn >> 12) & 0xf;
1613         rd0 = (insn >> 0) & 0xf;
1614         rd1 = (insn >> 16) & 0xf;
1615         gen_op_iwmmxt_movq_M0_wRn(rd0);
1616         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1617         gen_op_iwmmxt_setpsr_nz();
1618         gen_op_iwmmxt_movq_wRn_M0(wrd);
1619         gen_op_iwmmxt_set_mup();
1620         gen_op_iwmmxt_set_cup();
1621         break;
1622     case 0x111:                                                 /* TMRC */
1623         if (insn & 0xf)
1624             return 1;
1625         rd = (insn >> 12) & 0xf;
1626         wrd = (insn >> 16) & 0xf;
1627         tmp = iwmmxt_load_creg(wrd);
1628         store_reg(s, rd, tmp);
1629         break;
1630     case 0x300:                                                 /* WANDN */
1631         wrd = (insn >> 12) & 0xf;
1632         rd0 = (insn >> 0) & 0xf;
1633         rd1 = (insn >> 16) & 0xf;
1634         gen_op_iwmmxt_movq_M0_wRn(rd0);
1635         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1636         gen_op_iwmmxt_andq_M0_wRn(rd1);
1637         gen_op_iwmmxt_setpsr_nz();
1638         gen_op_iwmmxt_movq_wRn_M0(wrd);
1639         gen_op_iwmmxt_set_mup();
1640         gen_op_iwmmxt_set_cup();
1641         break;
1642     case 0x200:                                                 /* WAND */
1643         wrd = (insn >> 12) & 0xf;
1644         rd0 = (insn >> 0) & 0xf;
1645         rd1 = (insn >> 16) & 0xf;
1646         gen_op_iwmmxt_movq_M0_wRn(rd0);
1647         gen_op_iwmmxt_andq_M0_wRn(rd1);
1648         gen_op_iwmmxt_setpsr_nz();
1649         gen_op_iwmmxt_movq_wRn_M0(wrd);
1650         gen_op_iwmmxt_set_mup();
1651         gen_op_iwmmxt_set_cup();
1652         break;
1653     case 0x810: case 0xa10:                             /* WMADD */
1654         wrd = (insn >> 12) & 0xf;
1655         rd0 = (insn >> 0) & 0xf;
1656         rd1 = (insn >> 16) & 0xf;
1657         gen_op_iwmmxt_movq_M0_wRn(rd0);
1658         if (insn & (1 << 21))
1659             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1660         else
1661             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1662         gen_op_iwmmxt_movq_wRn_M0(wrd);
1663         gen_op_iwmmxt_set_mup();
1664         break;
1665     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1666         wrd = (insn >> 12) & 0xf;
1667         rd0 = (insn >> 16) & 0xf;
1668         rd1 = (insn >> 0) & 0xf;
1669         gen_op_iwmmxt_movq_M0_wRn(rd0);
1670         switch ((insn >> 22) & 3) {
1671         case 0:
1672             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1673             break;
1674         case 1:
1675             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1676             break;
1677         case 2:
1678             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1679             break;
1680         case 3:
1681             return 1;
1682         }
1683         gen_op_iwmmxt_movq_wRn_M0(wrd);
1684         gen_op_iwmmxt_set_mup();
1685         gen_op_iwmmxt_set_cup();
1686         break;
1687     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1688         wrd = (insn >> 12) & 0xf;
1689         rd0 = (insn >> 16) & 0xf;
1690         rd1 = (insn >> 0) & 0xf;
1691         gen_op_iwmmxt_movq_M0_wRn(rd0);
1692         switch ((insn >> 22) & 3) {
1693         case 0:
1694             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1695             break;
1696         case 1:
1697             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1698             break;
1699         case 2:
1700             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1701             break;
1702         case 3:
1703             return 1;
1704         }
1705         gen_op_iwmmxt_movq_wRn_M0(wrd);
1706         gen_op_iwmmxt_set_mup();
1707         gen_op_iwmmxt_set_cup();
1708         break;
1709     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1710         wrd = (insn >> 12) & 0xf;
1711         rd0 = (insn >> 16) & 0xf;
1712         rd1 = (insn >> 0) & 0xf;
1713         gen_op_iwmmxt_movq_M0_wRn(rd0);
1714         if (insn & (1 << 22))
1715             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1716         else
1717             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1718         if (!(insn & (1 << 20)))
1719             gen_op_iwmmxt_addl_M0_wRn(wrd);
1720         gen_op_iwmmxt_movq_wRn_M0(wrd);
1721         gen_op_iwmmxt_set_mup();
1722         break;
1723     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1724         wrd = (insn >> 12) & 0xf;
1725         rd0 = (insn >> 16) & 0xf;
1726         rd1 = (insn >> 0) & 0xf;
1727         gen_op_iwmmxt_movq_M0_wRn(rd0);
1728         if (insn & (1 << 21)) {
1729             if (insn & (1 << 20))
1730                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1731             else
1732                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1733         } else {
1734             if (insn & (1 << 20))
1735                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1736             else
1737                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1738         }
1739         gen_op_iwmmxt_movq_wRn_M0(wrd);
1740         gen_op_iwmmxt_set_mup();
1741         break;
1742     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1743         wrd = (insn >> 12) & 0xf;
1744         rd0 = (insn >> 16) & 0xf;
1745         rd1 = (insn >> 0) & 0xf;
1746         gen_op_iwmmxt_movq_M0_wRn(rd0);
1747         if (insn & (1 << 21))
1748             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1749         else
1750             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1751         if (!(insn & (1 << 20))) {
1752             iwmmxt_load_reg(cpu_V1, wrd);
1753             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1754         }
1755         gen_op_iwmmxt_movq_wRn_M0(wrd);
1756         gen_op_iwmmxt_set_mup();
1757         break;
1758     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1759         wrd = (insn >> 12) & 0xf;
1760         rd0 = (insn >> 16) & 0xf;
1761         rd1 = (insn >> 0) & 0xf;
1762         gen_op_iwmmxt_movq_M0_wRn(rd0);
1763         switch ((insn >> 22) & 3) {
1764         case 0:
1765             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1766             break;
1767         case 1:
1768             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1769             break;
1770         case 2:
1771             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1772             break;
1773         case 3:
1774             return 1;
1775         }
1776         gen_op_iwmmxt_movq_wRn_M0(wrd);
1777         gen_op_iwmmxt_set_mup();
1778         gen_op_iwmmxt_set_cup();
1779         break;
1780     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1781         wrd = (insn >> 12) & 0xf;
1782         rd0 = (insn >> 16) & 0xf;
1783         rd1 = (insn >> 0) & 0xf;
1784         gen_op_iwmmxt_movq_M0_wRn(rd0);
1785         if (insn & (1 << 22)) {
1786             if (insn & (1 << 20))
1787                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1788             else
1789                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1790         } else {
1791             if (insn & (1 << 20))
1792                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1793             else
1794                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1795         }
1796         gen_op_iwmmxt_movq_wRn_M0(wrd);
1797         gen_op_iwmmxt_set_mup();
1798         gen_op_iwmmxt_set_cup();
1799         break;
1800     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1801         wrd = (insn >> 12) & 0xf;
1802         rd0 = (insn >> 16) & 0xf;
1803         rd1 = (insn >> 0) & 0xf;
1804         gen_op_iwmmxt_movq_M0_wRn(rd0);
1805         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1806         tcg_gen_andi_i32(tmp, tmp, 7);
1807         iwmmxt_load_reg(cpu_V1, rd1);
1808         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1809         gen_op_iwmmxt_movq_wRn_M0(wrd);
1810         gen_op_iwmmxt_set_mup();
1811         break;
1812     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1813         if (((insn >> 6) & 3) == 3)
1814             return 1;
1815         rd = (insn >> 12) & 0xf;
1816         wrd = (insn >> 16) & 0xf;
1817         tmp = load_reg(s, rd);
1818         gen_op_iwmmxt_movq_M0_wRn(wrd);
1819         switch ((insn >> 6) & 3) {
1820         case 0:
1821             tmp2 = tcg_constant_i32(0xff);
1822             tmp3 = tcg_constant_i32((insn & 7) << 3);
1823             break;
1824         case 1:
1825             tmp2 = tcg_constant_i32(0xffff);
1826             tmp3 = tcg_constant_i32((insn & 3) << 4);
1827             break;
1828         case 2:
1829             tmp2 = tcg_constant_i32(0xffffffff);
1830             tmp3 = tcg_constant_i32((insn & 1) << 5);
1831             break;
1832         default:
1833             g_assert_not_reached();
1834         }
1835         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1836         gen_op_iwmmxt_movq_wRn_M0(wrd);
1837         gen_op_iwmmxt_set_mup();
1838         break;
1839     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1840         rd = (insn >> 12) & 0xf;
1841         wrd = (insn >> 16) & 0xf;
1842         if (rd == 15 || ((insn >> 22) & 3) == 3)
1843             return 1;
1844         gen_op_iwmmxt_movq_M0_wRn(wrd);
1845         tmp = tcg_temp_new_i32();
1846         switch ((insn >> 22) & 3) {
1847         case 0:
1848             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1849             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1850             if (insn & 8) {
1851                 tcg_gen_ext8s_i32(tmp, tmp);
1852             } else {
1853                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1854             }
1855             break;
1856         case 1:
1857             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1858             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1859             if (insn & 8) {
1860                 tcg_gen_ext16s_i32(tmp, tmp);
1861             } else {
1862                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1863             }
1864             break;
1865         case 2:
1866             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1867             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1868             break;
1869         }
1870         store_reg(s, rd, tmp);
1871         break;
1872     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1873         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1874             return 1;
1875         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1876         switch ((insn >> 22) & 3) {
1877         case 0:
1878             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1879             break;
1880         case 1:
1881             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1882             break;
1883         case 2:
1884             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1885             break;
1886         }
1887         tcg_gen_shli_i32(tmp, tmp, 28);
1888         gen_set_nzcv(tmp);
1889         break;
1890     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1891         if (((insn >> 6) & 3) == 3)
1892             return 1;
1893         rd = (insn >> 12) & 0xf;
1894         wrd = (insn >> 16) & 0xf;
1895         tmp = load_reg(s, rd);
1896         switch ((insn >> 6) & 3) {
1897         case 0:
1898             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1899             break;
1900         case 1:
1901             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1902             break;
1903         case 2:
1904             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1905             break;
1906         }
1907         gen_op_iwmmxt_movq_wRn_M0(wrd);
1908         gen_op_iwmmxt_set_mup();
1909         break;
1910     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1911         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1914         tmp2 = tcg_temp_new_i32();
1915         tcg_gen_mov_i32(tmp2, tmp);
1916         switch ((insn >> 22) & 3) {
1917         case 0:
1918             for (i = 0; i < 7; i ++) {
1919                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1920                 tcg_gen_and_i32(tmp, tmp, tmp2);
1921             }
1922             break;
1923         case 1:
1924             for (i = 0; i < 3; i ++) {
1925                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1926                 tcg_gen_and_i32(tmp, tmp, tmp2);
1927             }
1928             break;
1929         case 2:
1930             tcg_gen_shli_i32(tmp2, tmp2, 16);
1931             tcg_gen_and_i32(tmp, tmp, tmp2);
1932             break;
1933         }
1934         gen_set_nzcv(tmp);
1935         break;
1936     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1937         wrd = (insn >> 12) & 0xf;
1938         rd0 = (insn >> 16) & 0xf;
1939         gen_op_iwmmxt_movq_M0_wRn(rd0);
1940         switch ((insn >> 22) & 3) {
1941         case 0:
1942             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1943             break;
1944         case 1:
1945             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1946             break;
1947         case 2:
1948             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1949             break;
1950         case 3:
1951             return 1;
1952         }
1953         gen_op_iwmmxt_movq_wRn_M0(wrd);
1954         gen_op_iwmmxt_set_mup();
1955         break;
1956     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1957         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1958             return 1;
1959         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1960         tmp2 = tcg_temp_new_i32();
1961         tcg_gen_mov_i32(tmp2, tmp);
1962         switch ((insn >> 22) & 3) {
1963         case 0:
1964             for (i = 0; i < 7; i ++) {
1965                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1966                 tcg_gen_or_i32(tmp, tmp, tmp2);
1967             }
1968             break;
1969         case 1:
1970             for (i = 0; i < 3; i ++) {
1971                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1972                 tcg_gen_or_i32(tmp, tmp, tmp2);
1973             }
1974             break;
1975         case 2:
1976             tcg_gen_shli_i32(tmp2, tmp2, 16);
1977             tcg_gen_or_i32(tmp, tmp, tmp2);
1978             break;
1979         }
1980         gen_set_nzcv(tmp);
1981         break;
1982     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1983         rd = (insn >> 12) & 0xf;
1984         rd0 = (insn >> 16) & 0xf;
1985         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1986             return 1;
1987         gen_op_iwmmxt_movq_M0_wRn(rd0);
1988         tmp = tcg_temp_new_i32();
1989         switch ((insn >> 22) & 3) {
1990         case 0:
1991             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1992             break;
1993         case 1:
1994             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1995             break;
1996         case 2:
1997             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1998             break;
1999         }
2000         store_reg(s, rd, tmp);
2001         break;
2002     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2003     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2004         wrd = (insn >> 12) & 0xf;
2005         rd0 = (insn >> 16) & 0xf;
2006         rd1 = (insn >> 0) & 0xf;
2007         gen_op_iwmmxt_movq_M0_wRn(rd0);
2008         switch ((insn >> 22) & 3) {
2009         case 0:
2010             if (insn & (1 << 21))
2011                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2012             else
2013                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2014             break;
2015         case 1:
2016             if (insn & (1 << 21))
2017                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2018             else
2019                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2020             break;
2021         case 2:
2022             if (insn & (1 << 21))
2023                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2024             else
2025                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2026             break;
2027         case 3:
2028             return 1;
2029         }
2030         gen_op_iwmmxt_movq_wRn_M0(wrd);
2031         gen_op_iwmmxt_set_mup();
2032         gen_op_iwmmxt_set_cup();
2033         break;
2034     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2035     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2036         wrd = (insn >> 12) & 0xf;
2037         rd0 = (insn >> 16) & 0xf;
2038         gen_op_iwmmxt_movq_M0_wRn(rd0);
2039         switch ((insn >> 22) & 3) {
2040         case 0:
2041             if (insn & (1 << 21))
2042                 gen_op_iwmmxt_unpacklsb_M0();
2043             else
2044                 gen_op_iwmmxt_unpacklub_M0();
2045             break;
2046         case 1:
2047             if (insn & (1 << 21))
2048                 gen_op_iwmmxt_unpacklsw_M0();
2049             else
2050                 gen_op_iwmmxt_unpackluw_M0();
2051             break;
2052         case 2:
2053             if (insn & (1 << 21))
2054                 gen_op_iwmmxt_unpacklsl_M0();
2055             else
2056                 gen_op_iwmmxt_unpacklul_M0();
2057             break;
2058         case 3:
2059             return 1;
2060         }
2061         gen_op_iwmmxt_movq_wRn_M0(wrd);
2062         gen_op_iwmmxt_set_mup();
2063         gen_op_iwmmxt_set_cup();
2064         break;
2065     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2066     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2067         wrd = (insn >> 12) & 0xf;
2068         rd0 = (insn >> 16) & 0xf;
2069         gen_op_iwmmxt_movq_M0_wRn(rd0);
2070         switch ((insn >> 22) & 3) {
2071         case 0:
2072             if (insn & (1 << 21))
2073                 gen_op_iwmmxt_unpackhsb_M0();
2074             else
2075                 gen_op_iwmmxt_unpackhub_M0();
2076             break;
2077         case 1:
2078             if (insn & (1 << 21))
2079                 gen_op_iwmmxt_unpackhsw_M0();
2080             else
2081                 gen_op_iwmmxt_unpackhuw_M0();
2082             break;
2083         case 2:
2084             if (insn & (1 << 21))
2085                 gen_op_iwmmxt_unpackhsl_M0();
2086             else
2087                 gen_op_iwmmxt_unpackhul_M0();
2088             break;
2089         case 3:
2090             return 1;
2091         }
2092         gen_op_iwmmxt_movq_wRn_M0(wrd);
2093         gen_op_iwmmxt_set_mup();
2094         gen_op_iwmmxt_set_cup();
2095         break;
2096     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2097     case 0x214: case 0x614: case 0xa14: case 0xe14:
2098         if (((insn >> 22) & 3) == 0)
2099             return 1;
2100         wrd = (insn >> 12) & 0xf;
2101         rd0 = (insn >> 16) & 0xf;
2102         gen_op_iwmmxt_movq_M0_wRn(rd0);
2103         tmp = tcg_temp_new_i32();
2104         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2105             return 1;
2106         }
2107         switch ((insn >> 22) & 3) {
2108         case 1:
2109             gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
2110             break;
2111         case 2:
2112             gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
2113             break;
2114         case 3:
2115             gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
2116             break;
2117         }
2118         gen_op_iwmmxt_movq_wRn_M0(wrd);
2119         gen_op_iwmmxt_set_mup();
2120         gen_op_iwmmxt_set_cup();
2121         break;
2122     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2123     case 0x014: case 0x414: case 0x814: case 0xc14:
2124         if (((insn >> 22) & 3) == 0)
2125             return 1;
2126         wrd = (insn >> 12) & 0xf;
2127         rd0 = (insn >> 16) & 0xf;
2128         gen_op_iwmmxt_movq_M0_wRn(rd0);
2129         tmp = tcg_temp_new_i32();
2130         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2131             return 1;
2132         }
2133         switch ((insn >> 22) & 3) {
2134         case 1:
2135             gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
2136             break;
2137         case 2:
2138             gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
2139             break;
2140         case 3:
2141             gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
2142             break;
2143         }
2144         gen_op_iwmmxt_movq_wRn_M0(wrd);
2145         gen_op_iwmmxt_set_mup();
2146         gen_op_iwmmxt_set_cup();
2147         break;
2148     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2149     case 0x114: case 0x514: case 0x914: case 0xd14:
2150         if (((insn >> 22) & 3) == 0)
2151             return 1;
2152         wrd = (insn >> 12) & 0xf;
2153         rd0 = (insn >> 16) & 0xf;
2154         gen_op_iwmmxt_movq_M0_wRn(rd0);
2155         tmp = tcg_temp_new_i32();
2156         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2157             return 1;
2158         }
2159         switch ((insn >> 22) & 3) {
2160         case 1:
2161             gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
2162             break;
2163         case 2:
2164             gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
2165             break;
2166         case 3:
2167             gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
2168             break;
2169         }
2170         gen_op_iwmmxt_movq_wRn_M0(wrd);
2171         gen_op_iwmmxt_set_mup();
2172         gen_op_iwmmxt_set_cup();
2173         break;
2174     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2175     case 0x314: case 0x714: case 0xb14: case 0xf14:
2176         if (((insn >> 22) & 3) == 0)
2177             return 1;
2178         wrd = (insn >> 12) & 0xf;
2179         rd0 = (insn >> 16) & 0xf;
2180         gen_op_iwmmxt_movq_M0_wRn(rd0);
2181         tmp = tcg_temp_new_i32();
2182         switch ((insn >> 22) & 3) {
2183         case 1:
2184             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2185                 return 1;
2186             }
2187             gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
2188             break;
2189         case 2:
2190             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2191                 return 1;
2192             }
2193             gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
2194             break;
2195         case 3:
2196             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2197                 return 1;
2198             }
2199             gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
2200             break;
2201         }
2202         gen_op_iwmmxt_movq_wRn_M0(wrd);
2203         gen_op_iwmmxt_set_mup();
2204         gen_op_iwmmxt_set_cup();
2205         break;
2206     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2207     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2208         wrd = (insn >> 12) & 0xf;
2209         rd0 = (insn >> 16) & 0xf;
2210         rd1 = (insn >> 0) & 0xf;
2211         gen_op_iwmmxt_movq_M0_wRn(rd0);
2212         switch ((insn >> 22) & 3) {
2213         case 0:
2214             if (insn & (1 << 21))
2215                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2216             else
2217                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2218             break;
2219         case 1:
2220             if (insn & (1 << 21))
2221                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2222             else
2223                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2224             break;
2225         case 2:
2226             if (insn & (1 << 21))
2227                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2228             else
2229                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2230             break;
2231         case 3:
2232             return 1;
2233         }
2234         gen_op_iwmmxt_movq_wRn_M0(wrd);
2235         gen_op_iwmmxt_set_mup();
2236         break;
2237     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2238     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2239         wrd = (insn >> 12) & 0xf;
2240         rd0 = (insn >> 16) & 0xf;
2241         rd1 = (insn >> 0) & 0xf;
2242         gen_op_iwmmxt_movq_M0_wRn(rd0);
2243         switch ((insn >> 22) & 3) {
2244         case 0:
2245             if (insn & (1 << 21))
2246                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2247             else
2248                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2249             break;
2250         case 1:
2251             if (insn & (1 << 21))
2252                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2253             else
2254                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2255             break;
2256         case 2:
2257             if (insn & (1 << 21))
2258                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2259             else
2260                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2261             break;
2262         case 3:
2263             return 1;
2264         }
2265         gen_op_iwmmxt_movq_wRn_M0(wrd);
2266         gen_op_iwmmxt_set_mup();
2267         break;
2268     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2269     case 0x402: case 0x502: case 0x602: case 0x702:
2270         wrd = (insn >> 12) & 0xf;
2271         rd0 = (insn >> 16) & 0xf;
2272         rd1 = (insn >> 0) & 0xf;
2273         gen_op_iwmmxt_movq_M0_wRn(rd0);
2274         iwmmxt_load_reg(cpu_V1, rd1);
2275         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2276                                 tcg_constant_i32((insn >> 20) & 3));
2277         gen_op_iwmmxt_movq_wRn_M0(wrd);
2278         gen_op_iwmmxt_set_mup();
2279         break;
2280     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2281     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2282     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2283     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2284         wrd = (insn >> 12) & 0xf;
2285         rd0 = (insn >> 16) & 0xf;
2286         rd1 = (insn >> 0) & 0xf;
2287         gen_op_iwmmxt_movq_M0_wRn(rd0);
2288         switch ((insn >> 20) & 0xf) {
2289         case 0x0:
2290             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2291             break;
2292         case 0x1:
2293             gen_op_iwmmxt_subub_M0_wRn(rd1);
2294             break;
2295         case 0x3:
2296             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2297             break;
2298         case 0x4:
2299             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2300             break;
2301         case 0x5:
2302             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2303             break;
2304         case 0x7:
2305             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2306             break;
2307         case 0x8:
2308             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2309             break;
2310         case 0x9:
2311             gen_op_iwmmxt_subul_M0_wRn(rd1);
2312             break;
2313         case 0xb:
2314             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2315             break;
2316         default:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         gen_op_iwmmxt_set_cup();
2322         break;
2323     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2324     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2325     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2326     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2327         wrd = (insn >> 12) & 0xf;
2328         rd0 = (insn >> 16) & 0xf;
2329         gen_op_iwmmxt_movq_M0_wRn(rd0);
2330         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2331         gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
2332         gen_op_iwmmxt_movq_wRn_M0(wrd);
2333         gen_op_iwmmxt_set_mup();
2334         gen_op_iwmmxt_set_cup();
2335         break;
2336     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2337     case 0x418: case 0x518: case 0x618: case 0x718:
2338     case 0x818: case 0x918: case 0xa18: case 0xb18:
2339     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2340         wrd = (insn >> 12) & 0xf;
2341         rd0 = (insn >> 16) & 0xf;
2342         rd1 = (insn >> 0) & 0xf;
2343         gen_op_iwmmxt_movq_M0_wRn(rd0);
2344         switch ((insn >> 20) & 0xf) {
2345         case 0x0:
2346             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2347             break;
2348         case 0x1:
2349             gen_op_iwmmxt_addub_M0_wRn(rd1);
2350             break;
2351         case 0x3:
2352             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2353             break;
2354         case 0x4:
2355             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2356             break;
2357         case 0x5:
2358             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2359             break;
2360         case 0x7:
2361             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2362             break;
2363         case 0x8:
2364             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2365             break;
2366         case 0x9:
2367             gen_op_iwmmxt_addul_M0_wRn(rd1);
2368             break;
2369         case 0xb:
2370             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2371             break;
2372         default:
2373             return 1;
2374         }
2375         gen_op_iwmmxt_movq_wRn_M0(wrd);
2376         gen_op_iwmmxt_set_mup();
2377         gen_op_iwmmxt_set_cup();
2378         break;
2379     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2380     case 0x408: case 0x508: case 0x608: case 0x708:
2381     case 0x808: case 0x908: case 0xa08: case 0xb08:
2382     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2383         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2384             return 1;
2385         wrd = (insn >> 12) & 0xf;
2386         rd0 = (insn >> 16) & 0xf;
2387         rd1 = (insn >> 0) & 0xf;
2388         gen_op_iwmmxt_movq_M0_wRn(rd0);
2389         switch ((insn >> 22) & 3) {
2390         case 1:
2391             if (insn & (1 << 21))
2392                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2393             else
2394                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2395             break;
2396         case 2:
2397             if (insn & (1 << 21))
2398                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2399             else
2400                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2401             break;
2402         case 3:
2403             if (insn & (1 << 21))
2404                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2405             else
2406                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2407             break;
2408         }
2409         gen_op_iwmmxt_movq_wRn_M0(wrd);
2410         gen_op_iwmmxt_set_mup();
2411         gen_op_iwmmxt_set_cup();
2412         break;
2413     case 0x201: case 0x203: case 0x205: case 0x207:
2414     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2415     case 0x211: case 0x213: case 0x215: case 0x217:
2416     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2417         wrd = (insn >> 5) & 0xf;
2418         rd0 = (insn >> 12) & 0xf;
2419         rd1 = (insn >> 0) & 0xf;
2420         if (rd0 == 0xf || rd1 == 0xf)
2421             return 1;
2422         gen_op_iwmmxt_movq_M0_wRn(wrd);
2423         tmp = load_reg(s, rd0);
2424         tmp2 = load_reg(s, rd1);
2425         switch ((insn >> 16) & 0xf) {
2426         case 0x0:                                       /* TMIA */
2427             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2428             break;
2429         case 0x8:                                       /* TMIAPH */
2430             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2431             break;
2432         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2433             if (insn & (1 << 16))
2434                 tcg_gen_shri_i32(tmp, tmp, 16);
2435             if (insn & (1 << 17))
2436                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2437             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         default:
2440             return 1;
2441         }
2442         gen_op_iwmmxt_movq_wRn_M0(wrd);
2443         gen_op_iwmmxt_set_mup();
2444         break;
2445     default:
2446         return 1;
2447     }
2448 
2449     return 0;
2450 }
2451 
2452 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2453    (ie. an undefined instruction).  */
2454 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2455 {
2456     int acc, rd0, rd1, rdhi, rdlo;
2457     TCGv_i32 tmp, tmp2;
2458 
2459     if ((insn & 0x0ff00f10) == 0x0e200010) {
2460         /* Multiply with Internal Accumulate Format */
2461         rd0 = (insn >> 12) & 0xf;
2462         rd1 = insn & 0xf;
2463         acc = (insn >> 5) & 7;
2464 
2465         if (acc != 0)
2466             return 1;
2467 
2468         tmp = load_reg(s, rd0);
2469         tmp2 = load_reg(s, rd1);
2470         switch ((insn >> 16) & 0xf) {
2471         case 0x0:                                       /* MIA */
2472             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2473             break;
2474         case 0x8:                                       /* MIAPH */
2475             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2476             break;
2477         case 0xc:                                       /* MIABB */
2478         case 0xd:                                       /* MIABT */
2479         case 0xe:                                       /* MIATB */
2480         case 0xf:                                       /* MIATT */
2481             if (insn & (1 << 16))
2482                 tcg_gen_shri_i32(tmp, tmp, 16);
2483             if (insn & (1 << 17))
2484                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2485             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2486             break;
2487         default:
2488             return 1;
2489         }
2490 
2491         gen_op_iwmmxt_movq_wRn_M0(acc);
2492         return 0;
2493     }
2494 
2495     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2496         /* Internal Accumulator Access Format */
2497         rdhi = (insn >> 16) & 0xf;
2498         rdlo = (insn >> 12) & 0xf;
2499         acc = insn & 7;
2500 
2501         if (acc != 0)
2502             return 1;
2503 
2504         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2505             iwmmxt_load_reg(cpu_V0, acc);
2506             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2507             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2508             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2509         } else {                                        /* MAR */
2510             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2511             iwmmxt_store_reg(cpu_V0, acc);
2512         }
2513         return 0;
2514     }
2515 
2516     return 1;
2517 }
2518 
2519 static void gen_goto_ptr(void)
2520 {
2521     tcg_gen_lookup_and_goto_ptr();
2522 }
2523 
2524 /* This will end the TB but doesn't guarantee we'll return to
2525  * cpu_loop_exec. Any live exit_requests will be processed as we
2526  * enter the next TB.
2527  */
2528 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2529 {
2530     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2531         /*
2532          * For pcrel, the pc must always be up-to-date on entry to
2533          * the linked TB, so that it can use simple additions for all
2534          * further adjustments.  For !pcrel, the linked TB is compiled
2535          * to know its full virtual address, so we can delay the
2536          * update to pc to the unlinked path.  A long chain of links
2537          * can thus avoid many updates to the PC.
2538          */
2539         if (tb_cflags(s->base.tb) & CF_PCREL) {
2540             gen_update_pc(s, diff);
2541             tcg_gen_goto_tb(n);
2542         } else {
2543             tcg_gen_goto_tb(n);
2544             gen_update_pc(s, diff);
2545         }
2546         tcg_gen_exit_tb(s->base.tb, n);
2547     } else {
2548         gen_update_pc(s, diff);
2549         gen_goto_ptr();
2550     }
2551     s->base.is_jmp = DISAS_NORETURN;
2552 }
2553 
2554 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2555 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2556 {
2557     if (unlikely(s->ss_active)) {
2558         /* An indirect jump so that we still trigger the debug exception.  */
2559         gen_update_pc(s, diff);
2560         s->base.is_jmp = DISAS_JUMP;
2561         return;
2562     }
2563     switch (s->base.is_jmp) {
2564     case DISAS_NEXT:
2565     case DISAS_TOO_MANY:
2566     case DISAS_NORETURN:
2567         /*
2568          * The normal case: just go to the destination TB.
2569          * NB: NORETURN happens if we generate code like
2570          *    gen_brcondi(l);
2571          *    gen_jmp();
2572          *    gen_set_label(l);
2573          *    gen_jmp();
2574          * on the second call to gen_jmp().
2575          */
2576         gen_goto_tb(s, tbno, diff);
2577         break;
2578     case DISAS_UPDATE_NOCHAIN:
2579     case DISAS_UPDATE_EXIT:
2580         /*
2581          * We already decided we're leaving the TB for some other reason.
2582          * Avoid using goto_tb so we really do exit back to the main loop
2583          * and don't chain to another TB.
2584          */
2585         gen_update_pc(s, diff);
2586         gen_goto_ptr();
2587         s->base.is_jmp = DISAS_NORETURN;
2588         break;
2589     default:
2590         /*
2591          * We shouldn't be emitting code for a jump and also have
2592          * is_jmp set to one of the special cases like DISAS_SWI.
2593          */
2594         g_assert_not_reached();
2595     }
2596 }
2597 
2598 static inline void gen_jmp(DisasContext *s, target_long diff)
2599 {
2600     gen_jmp_tb(s, diff, 0);
2601 }
2602 
2603 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2604 {
2605     if (x)
2606         tcg_gen_sari_i32(t0, t0, 16);
2607     else
2608         gen_sxth(t0);
2609     if (y)
2610         tcg_gen_sari_i32(t1, t1, 16);
2611     else
2612         gen_sxth(t1);
2613     tcg_gen_mul_i32(t0, t0, t1);
2614 }
2615 
2616 /* Return the mask of PSR bits set by a MSR instruction.  */
2617 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2618 {
2619     uint32_t mask = 0;
2620 
2621     if (flags & (1 << 0)) {
2622         mask |= 0xff;
2623     }
2624     if (flags & (1 << 1)) {
2625         mask |= 0xff00;
2626     }
2627     if (flags & (1 << 2)) {
2628         mask |= 0xff0000;
2629     }
2630     if (flags & (1 << 3)) {
2631         mask |= 0xff000000;
2632     }
2633 
2634     /* Mask out undefined and reserved bits.  */
2635     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2636 
2637     /* Mask out execution state.  */
2638     if (!spsr) {
2639         mask &= ~CPSR_EXEC;
2640     }
2641 
2642     /* Mask out privileged bits.  */
2643     if (IS_USER(s)) {
2644         mask &= CPSR_USER;
2645     }
2646     return mask;
2647 }
2648 
2649 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2650 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2651 {
2652     TCGv_i32 tmp;
2653     if (spsr) {
2654         /* ??? This is also undefined in system mode.  */
2655         if (IS_USER(s))
2656             return 1;
2657 
2658         tmp = load_cpu_field(spsr);
2659         tcg_gen_andi_i32(tmp, tmp, ~mask);
2660         tcg_gen_andi_i32(t0, t0, mask);
2661         tcg_gen_or_i32(tmp, tmp, t0);
2662         store_cpu_field(tmp, spsr);
2663     } else {
2664         gen_set_cpsr(t0, mask);
2665     }
2666     gen_lookup_tb(s);
2667     return 0;
2668 }
2669 
2670 /* Returns nonzero if access to the PSR is not permitted.  */
2671 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2672 {
2673     TCGv_i32 tmp;
2674     tmp = tcg_temp_new_i32();
2675     tcg_gen_movi_i32(tmp, val);
2676     return gen_set_psr(s, mask, spsr, tmp);
2677 }
2678 
2679 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2680                                      int *tgtmode, int *regno)
2681 {
2682     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2683      * the target mode and register number, and identify the various
2684      * unpredictable cases.
2685      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2686      *  + executed in user mode
2687      *  + using R15 as the src/dest register
2688      *  + accessing an unimplemented register
2689      *  + accessing a register that's inaccessible at current PL/security state*
2690      *  + accessing a register that you could access with a different insn
2691      * We choose to UNDEF in all these cases.
2692      * Since we don't know which of the various AArch32 modes we are in
2693      * we have to defer some checks to runtime.
2694      * Accesses to Monitor mode registers from Secure EL1 (which implies
2695      * that EL3 is AArch64) must trap to EL3.
2696      *
2697      * If the access checks fail this function will emit code to take
2698      * an exception and return false. Otherwise it will return true,
2699      * and set *tgtmode and *regno appropriately.
2700      */
2701     /* These instructions are present only in ARMv8, or in ARMv7 with the
2702      * Virtualization Extensions.
2703      */
2704     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2705         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2706         goto undef;
2707     }
2708 
2709     if (IS_USER(s) || rn == 15) {
2710         goto undef;
2711     }
2712 
2713     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2714      * of registers into (r, sysm).
2715      */
2716     if (r) {
2717         /* SPSRs for other modes */
2718         switch (sysm) {
2719         case 0xe: /* SPSR_fiq */
2720             *tgtmode = ARM_CPU_MODE_FIQ;
2721             break;
2722         case 0x10: /* SPSR_irq */
2723             *tgtmode = ARM_CPU_MODE_IRQ;
2724             break;
2725         case 0x12: /* SPSR_svc */
2726             *tgtmode = ARM_CPU_MODE_SVC;
2727             break;
2728         case 0x14: /* SPSR_abt */
2729             *tgtmode = ARM_CPU_MODE_ABT;
2730             break;
2731         case 0x16: /* SPSR_und */
2732             *tgtmode = ARM_CPU_MODE_UND;
2733             break;
2734         case 0x1c: /* SPSR_mon */
2735             *tgtmode = ARM_CPU_MODE_MON;
2736             break;
2737         case 0x1e: /* SPSR_hyp */
2738             *tgtmode = ARM_CPU_MODE_HYP;
2739             break;
2740         default: /* unallocated */
2741             goto undef;
2742         }
2743         /* We arbitrarily assign SPSR a register number of 16. */
2744         *regno = 16;
2745     } else {
2746         /* general purpose registers for other modes */
2747         switch (sysm) {
2748         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2749             *tgtmode = ARM_CPU_MODE_USR;
2750             *regno = sysm + 8;
2751             break;
2752         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2753             *tgtmode = ARM_CPU_MODE_FIQ;
2754             *regno = sysm;
2755             break;
2756         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2757             *tgtmode = ARM_CPU_MODE_IRQ;
2758             *regno = sysm & 1 ? 13 : 14;
2759             break;
2760         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2761             *tgtmode = ARM_CPU_MODE_SVC;
2762             *regno = sysm & 1 ? 13 : 14;
2763             break;
2764         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2765             *tgtmode = ARM_CPU_MODE_ABT;
2766             *regno = sysm & 1 ? 13 : 14;
2767             break;
2768         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2769             *tgtmode = ARM_CPU_MODE_UND;
2770             *regno = sysm & 1 ? 13 : 14;
2771             break;
2772         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2773             *tgtmode = ARM_CPU_MODE_MON;
2774             *regno = sysm & 1 ? 13 : 14;
2775             break;
2776         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2777             *tgtmode = ARM_CPU_MODE_HYP;
2778             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2779             *regno = sysm & 1 ? 13 : 17;
2780             break;
2781         default: /* unallocated */
2782             goto undef;
2783         }
2784     }
2785 
2786     /* Catch the 'accessing inaccessible register' cases we can detect
2787      * at translate time.
2788      */
2789     switch (*tgtmode) {
2790     case ARM_CPU_MODE_MON:
2791         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2792             goto undef;
2793         }
2794         if (s->current_el == 1) {
2795             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2796              * then accesses to Mon registers trap to Secure EL2, if it exists,
2797              * otherwise EL3.
2798              */
2799             TCGv_i32 tcg_el;
2800 
2801             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2802                 dc_isar_feature(aa64_sel2, s)) {
2803                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2804                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2805                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2806                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2807             } else {
2808                 tcg_el = tcg_constant_i32(3);
2809             }
2810 
2811             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2812                                     syn_uncategorized(), tcg_el);
2813             return false;
2814         }
2815         break;
2816     case ARM_CPU_MODE_HYP:
2817         /*
2818          * r13_hyp can only be accessed from Monitor mode, and so we
2819          * can forbid accesses from EL2 or below.
2820          * elr_hyp can be accessed also from Hyp mode, so forbid
2821          * accesses from EL0 or EL1.
2822          * SPSR_hyp is supposed to be in the same category as r13_hyp
2823          * and UNPREDICTABLE if accessed from anything except Monitor
2824          * mode. However there is some real-world code that will do
2825          * it because at least some hardware happens to permit the
2826          * access. (Notably a standard Cortex-R52 startup code fragment
2827          * does this.) So we permit SPSR_hyp from Hyp mode also, to allow
2828          * this (incorrect) guest code to run.
2829          */
2830         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2
2831             || (s->current_el < 3 && *regno != 16 && *regno != 17)) {
2832             goto undef;
2833         }
2834         break;
2835     default:
2836         break;
2837     }
2838 
2839     return true;
2840 
2841 undef:
2842     /* If we get here then some access check did not pass */
2843     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844     return false;
2845 }
2846 
2847 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848 {
2849     TCGv_i32 tcg_reg;
2850     int tgtmode = 0, regno = 0;
2851 
2852     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853         return;
2854     }
2855 
2856     /* Sync state because msr_banked() can raise exceptions */
2857     gen_set_condexec(s);
2858     gen_update_pc(s, 0);
2859     tcg_reg = load_reg(s, rn);
2860     gen_helper_msr_banked(tcg_env, tcg_reg,
2861                           tcg_constant_i32(tgtmode),
2862                           tcg_constant_i32(regno));
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864 }
2865 
2866 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867 {
2868     TCGv_i32 tcg_reg;
2869     int tgtmode = 0, regno = 0;
2870 
2871     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872         return;
2873     }
2874 
2875     /* Sync state because mrs_banked() can raise exceptions */
2876     gen_set_condexec(s);
2877     gen_update_pc(s, 0);
2878     tcg_reg = tcg_temp_new_i32();
2879     gen_helper_mrs_banked(tcg_reg, tcg_env,
2880                           tcg_constant_i32(tgtmode),
2881                           tcg_constant_i32(regno));
2882     store_reg(s, rn, tcg_reg);
2883     s->base.is_jmp = DISAS_UPDATE_EXIT;
2884 }
2885 
2886 /* Store value to PC as for an exception return (ie don't
2887  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888  * will do the masking based on the new value of the Thumb bit.
2889  */
2890 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891 {
2892     tcg_gen_mov_i32(cpu_R[15], pc);
2893 }
2894 
2895 /* Generate a v6 exception return.  Marks both values as dead.  */
2896 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897 {
2898     store_pc_exc_ret(s, pc);
2899     /* The cpsr_write_eret helper will mask the low bits of PC
2900      * appropriately depending on the new Thumb bit, so it must
2901      * be called after storing the new PC.
2902      */
2903     translator_io_start(&s->base);
2904     gen_helper_cpsr_write_eret(tcg_env, cpsr);
2905     /* Must exit loop to check un-masked IRQs */
2906     s->base.is_jmp = DISAS_EXIT;
2907 }
2908 
2909 /* Generate an old-style exception return. Marks pc as dead. */
2910 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911 {
2912     gen_rfe(s, pc, load_cpu_field(spsr));
2913 }
2914 
2915 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                             uint32_t opr_sz, uint32_t max_sz,
2917                             gen_helper_gvec_3_ptr *fn)
2918 {
2919     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920 
2921     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
2922     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                        opr_sz, max_sz, 0, fn);
2924 }
2925 
2926 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928 {
2929     static gen_helper_gvec_3_ptr * const fns[2] = {
2930         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931     };
2932     tcg_debug_assert(vece >= 1 && vece <= 2);
2933     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934 }
2935 
2936 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 #define GEN_CMP0(NAME, COND)                              \
2947     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
2948               uint32_t opr_sz, uint32_t max_sz)           \
2949     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
2950 
2951 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
2952 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
2953 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
2954 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
2955 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
2956 
2957 #undef GEN_CMP0
2958 
2959 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2960 {
2961     tcg_gen_vec_sar8i_i64(a, a, shift);
2962     tcg_gen_vec_add8_i64(d, d, a);
2963 }
2964 
2965 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2966 {
2967     tcg_gen_vec_sar16i_i64(a, a, shift);
2968     tcg_gen_vec_add16_i64(d, d, a);
2969 }
2970 
2971 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2972 {
2973     tcg_gen_sari_i32(a, a, shift);
2974     tcg_gen_add_i32(d, d, a);
2975 }
2976 
2977 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2978 {
2979     tcg_gen_sari_i64(a, a, shift);
2980     tcg_gen_add_i64(d, d, a);
2981 }
2982 
2983 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2984 {
2985     tcg_gen_sari_vec(vece, a, a, sh);
2986     tcg_gen_add_vec(vece, d, d, a);
2987 }
2988 
2989 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2990                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2991 {
2992     static const TCGOpcode vecop_list[] = {
2993         INDEX_op_sari_vec, INDEX_op_add_vec, 0
2994     };
2995     static const GVecGen2i ops[4] = {
2996         { .fni8 = gen_ssra8_i64,
2997           .fniv = gen_ssra_vec,
2998           .fno = gen_helper_gvec_ssra_b,
2999           .load_dest = true,
3000           .opt_opc = vecop_list,
3001           .vece = MO_8 },
3002         { .fni8 = gen_ssra16_i64,
3003           .fniv = gen_ssra_vec,
3004           .fno = gen_helper_gvec_ssra_h,
3005           .load_dest = true,
3006           .opt_opc = vecop_list,
3007           .vece = MO_16 },
3008         { .fni4 = gen_ssra32_i32,
3009           .fniv = gen_ssra_vec,
3010           .fno = gen_helper_gvec_ssra_s,
3011           .load_dest = true,
3012           .opt_opc = vecop_list,
3013           .vece = MO_32 },
3014         { .fni8 = gen_ssra64_i64,
3015           .fniv = gen_ssra_vec,
3016           .fno = gen_helper_gvec_ssra_d,
3017           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3018           .opt_opc = vecop_list,
3019           .load_dest = true,
3020           .vece = MO_64 },
3021     };
3022 
3023     /* tszimm encoding produces immediates in the range [1..esize]. */
3024     tcg_debug_assert(shift > 0);
3025     tcg_debug_assert(shift <= (8 << vece));
3026 
3027     /*
3028      * Shifts larger than the element size are architecturally valid.
3029      * Signed results in all sign bits.
3030      */
3031     shift = MIN(shift, (8 << vece) - 1);
3032     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3033 }
3034 
3035 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3036 {
3037     tcg_gen_vec_shr8i_i64(a, a, shift);
3038     tcg_gen_vec_add8_i64(d, d, a);
3039 }
3040 
3041 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3042 {
3043     tcg_gen_vec_shr16i_i64(a, a, shift);
3044     tcg_gen_vec_add16_i64(d, d, a);
3045 }
3046 
3047 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3048 {
3049     tcg_gen_shri_i32(a, a, shift);
3050     tcg_gen_add_i32(d, d, a);
3051 }
3052 
3053 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3054 {
3055     tcg_gen_shri_i64(a, a, shift);
3056     tcg_gen_add_i64(d, d, a);
3057 }
3058 
3059 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3060 {
3061     tcg_gen_shri_vec(vece, a, a, sh);
3062     tcg_gen_add_vec(vece, d, d, a);
3063 }
3064 
3065 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3066                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3067 {
3068     static const TCGOpcode vecop_list[] = {
3069         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3070     };
3071     static const GVecGen2i ops[4] = {
3072         { .fni8 = gen_usra8_i64,
3073           .fniv = gen_usra_vec,
3074           .fno = gen_helper_gvec_usra_b,
3075           .load_dest = true,
3076           .opt_opc = vecop_list,
3077           .vece = MO_8, },
3078         { .fni8 = gen_usra16_i64,
3079           .fniv = gen_usra_vec,
3080           .fno = gen_helper_gvec_usra_h,
3081           .load_dest = true,
3082           .opt_opc = vecop_list,
3083           .vece = MO_16, },
3084         { .fni4 = gen_usra32_i32,
3085           .fniv = gen_usra_vec,
3086           .fno = gen_helper_gvec_usra_s,
3087           .load_dest = true,
3088           .opt_opc = vecop_list,
3089           .vece = MO_32, },
3090         { .fni8 = gen_usra64_i64,
3091           .fniv = gen_usra_vec,
3092           .fno = gen_helper_gvec_usra_d,
3093           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3094           .load_dest = true,
3095           .opt_opc = vecop_list,
3096           .vece = MO_64, },
3097     };
3098 
3099     /* tszimm encoding produces immediates in the range [1..esize]. */
3100     tcg_debug_assert(shift > 0);
3101     tcg_debug_assert(shift <= (8 << vece));
3102 
3103     /*
3104      * Shifts larger than the element size are architecturally valid.
3105      * Unsigned results in all zeros as input to accumulate: nop.
3106      */
3107     if (shift < (8 << vece)) {
3108         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3109     } else {
3110         /* Nop, but we do need to clear the tail. */
3111         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3112     }
3113 }
3114 
3115 /*
3116  * Shift one less than the requested amount, and the low bit is
3117  * the rounding bit.  For the 8 and 16-bit operations, because we
3118  * mask the low bit, we can perform a normal integer shift instead
3119  * of a vector shift.
3120  */
3121 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3122 {
3123     TCGv_i64 t = tcg_temp_new_i64();
3124 
3125     tcg_gen_shri_i64(t, a, sh - 1);
3126     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3127     tcg_gen_vec_sar8i_i64(d, a, sh);
3128     tcg_gen_vec_add8_i64(d, d, t);
3129 }
3130 
3131 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3132 {
3133     TCGv_i64 t = tcg_temp_new_i64();
3134 
3135     tcg_gen_shri_i64(t, a, sh - 1);
3136     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3137     tcg_gen_vec_sar16i_i64(d, a, sh);
3138     tcg_gen_vec_add16_i64(d, d, t);
3139 }
3140 
3141 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3142 {
3143     TCGv_i32 t;
3144 
3145     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3146     if (sh == 32) {
3147         tcg_gen_movi_i32(d, 0);
3148         return;
3149     }
3150     t = tcg_temp_new_i32();
3151     tcg_gen_extract_i32(t, a, sh - 1, 1);
3152     tcg_gen_sari_i32(d, a, sh);
3153     tcg_gen_add_i32(d, d, t);
3154 }
3155 
3156 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3157 {
3158     TCGv_i64 t = tcg_temp_new_i64();
3159 
3160     tcg_gen_extract_i64(t, a, sh - 1, 1);
3161     tcg_gen_sari_i64(d, a, sh);
3162     tcg_gen_add_i64(d, d, t);
3163 }
3164 
3165 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3166 {
3167     TCGv_vec t = tcg_temp_new_vec_matching(d);
3168     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3169 
3170     tcg_gen_shri_vec(vece, t, a, sh - 1);
3171     tcg_gen_dupi_vec(vece, ones, 1);
3172     tcg_gen_and_vec(vece, t, t, ones);
3173     tcg_gen_sari_vec(vece, d, a, sh);
3174     tcg_gen_add_vec(vece, d, d, t);
3175 }
3176 
3177 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3178                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3179 {
3180     static const TCGOpcode vecop_list[] = {
3181         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3182     };
3183     static const GVecGen2i ops[4] = {
3184         { .fni8 = gen_srshr8_i64,
3185           .fniv = gen_srshr_vec,
3186           .fno = gen_helper_gvec_srshr_b,
3187           .opt_opc = vecop_list,
3188           .vece = MO_8 },
3189         { .fni8 = gen_srshr16_i64,
3190           .fniv = gen_srshr_vec,
3191           .fno = gen_helper_gvec_srshr_h,
3192           .opt_opc = vecop_list,
3193           .vece = MO_16 },
3194         { .fni4 = gen_srshr32_i32,
3195           .fniv = gen_srshr_vec,
3196           .fno = gen_helper_gvec_srshr_s,
3197           .opt_opc = vecop_list,
3198           .vece = MO_32 },
3199         { .fni8 = gen_srshr64_i64,
3200           .fniv = gen_srshr_vec,
3201           .fno = gen_helper_gvec_srshr_d,
3202           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3203           .opt_opc = vecop_list,
3204           .vece = MO_64 },
3205     };
3206 
3207     /* tszimm encoding produces immediates in the range [1..esize] */
3208     tcg_debug_assert(shift > 0);
3209     tcg_debug_assert(shift <= (8 << vece));
3210 
3211     if (shift == (8 << vece)) {
3212         /*
3213          * Shifts larger than the element size are architecturally valid.
3214          * Signed results in all sign bits.  With rounding, this produces
3215          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3216          * I.e. always zero.
3217          */
3218         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3219     } else {
3220         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3221     }
3222 }
3223 
3224 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3225 {
3226     TCGv_i64 t = tcg_temp_new_i64();
3227 
3228     gen_srshr8_i64(t, a, sh);
3229     tcg_gen_vec_add8_i64(d, d, t);
3230 }
3231 
3232 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3233 {
3234     TCGv_i64 t = tcg_temp_new_i64();
3235 
3236     gen_srshr16_i64(t, a, sh);
3237     tcg_gen_vec_add16_i64(d, d, t);
3238 }
3239 
3240 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3241 {
3242     TCGv_i32 t = tcg_temp_new_i32();
3243 
3244     gen_srshr32_i32(t, a, sh);
3245     tcg_gen_add_i32(d, d, t);
3246 }
3247 
3248 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3249 {
3250     TCGv_i64 t = tcg_temp_new_i64();
3251 
3252     gen_srshr64_i64(t, a, sh);
3253     tcg_gen_add_i64(d, d, t);
3254 }
3255 
3256 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3257 {
3258     TCGv_vec t = tcg_temp_new_vec_matching(d);
3259 
3260     gen_srshr_vec(vece, t, a, sh);
3261     tcg_gen_add_vec(vece, d, d, t);
3262 }
3263 
3264 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3265                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3266 {
3267     static const TCGOpcode vecop_list[] = {
3268         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3269     };
3270     static const GVecGen2i ops[4] = {
3271         { .fni8 = gen_srsra8_i64,
3272           .fniv = gen_srsra_vec,
3273           .fno = gen_helper_gvec_srsra_b,
3274           .opt_opc = vecop_list,
3275           .load_dest = true,
3276           .vece = MO_8 },
3277         { .fni8 = gen_srsra16_i64,
3278           .fniv = gen_srsra_vec,
3279           .fno = gen_helper_gvec_srsra_h,
3280           .opt_opc = vecop_list,
3281           .load_dest = true,
3282           .vece = MO_16 },
3283         { .fni4 = gen_srsra32_i32,
3284           .fniv = gen_srsra_vec,
3285           .fno = gen_helper_gvec_srsra_s,
3286           .opt_opc = vecop_list,
3287           .load_dest = true,
3288           .vece = MO_32 },
3289         { .fni8 = gen_srsra64_i64,
3290           .fniv = gen_srsra_vec,
3291           .fno = gen_helper_gvec_srsra_d,
3292           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3293           .opt_opc = vecop_list,
3294           .load_dest = true,
3295           .vece = MO_64 },
3296     };
3297 
3298     /* tszimm encoding produces immediates in the range [1..esize] */
3299     tcg_debug_assert(shift > 0);
3300     tcg_debug_assert(shift <= (8 << vece));
3301 
3302     /*
3303      * Shifts larger than the element size are architecturally valid.
3304      * Signed results in all sign bits.  With rounding, this produces
3305      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3306      * I.e. always zero.  With accumulation, this leaves D unchanged.
3307      */
3308     if (shift == (8 << vece)) {
3309         /* Nop, but we do need to clear the tail. */
3310         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3311     } else {
3312         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3313     }
3314 }
3315 
3316 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3317 {
3318     TCGv_i64 t = tcg_temp_new_i64();
3319 
3320     tcg_gen_shri_i64(t, a, sh - 1);
3321     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3322     tcg_gen_vec_shr8i_i64(d, a, sh);
3323     tcg_gen_vec_add8_i64(d, d, t);
3324 }
3325 
3326 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3327 {
3328     TCGv_i64 t = tcg_temp_new_i64();
3329 
3330     tcg_gen_shri_i64(t, a, sh - 1);
3331     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3332     tcg_gen_vec_shr16i_i64(d, a, sh);
3333     tcg_gen_vec_add16_i64(d, d, t);
3334 }
3335 
3336 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3337 {
3338     TCGv_i32 t;
3339 
3340     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3341     if (sh == 32) {
3342         tcg_gen_extract_i32(d, a, sh - 1, 1);
3343         return;
3344     }
3345     t = tcg_temp_new_i32();
3346     tcg_gen_extract_i32(t, a, sh - 1, 1);
3347     tcg_gen_shri_i32(d, a, sh);
3348     tcg_gen_add_i32(d, d, t);
3349 }
3350 
3351 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3352 {
3353     TCGv_i64 t = tcg_temp_new_i64();
3354 
3355     tcg_gen_extract_i64(t, a, sh - 1, 1);
3356     tcg_gen_shri_i64(d, a, sh);
3357     tcg_gen_add_i64(d, d, t);
3358 }
3359 
3360 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3361 {
3362     TCGv_vec t = tcg_temp_new_vec_matching(d);
3363     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3364 
3365     tcg_gen_shri_vec(vece, t, a, shift - 1);
3366     tcg_gen_dupi_vec(vece, ones, 1);
3367     tcg_gen_and_vec(vece, t, t, ones);
3368     tcg_gen_shri_vec(vece, d, a, shift);
3369     tcg_gen_add_vec(vece, d, d, t);
3370 }
3371 
3372 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3373                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3374 {
3375     static const TCGOpcode vecop_list[] = {
3376         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3377     };
3378     static const GVecGen2i ops[4] = {
3379         { .fni8 = gen_urshr8_i64,
3380           .fniv = gen_urshr_vec,
3381           .fno = gen_helper_gvec_urshr_b,
3382           .opt_opc = vecop_list,
3383           .vece = MO_8 },
3384         { .fni8 = gen_urshr16_i64,
3385           .fniv = gen_urshr_vec,
3386           .fno = gen_helper_gvec_urshr_h,
3387           .opt_opc = vecop_list,
3388           .vece = MO_16 },
3389         { .fni4 = gen_urshr32_i32,
3390           .fniv = gen_urshr_vec,
3391           .fno = gen_helper_gvec_urshr_s,
3392           .opt_opc = vecop_list,
3393           .vece = MO_32 },
3394         { .fni8 = gen_urshr64_i64,
3395           .fniv = gen_urshr_vec,
3396           .fno = gen_helper_gvec_urshr_d,
3397           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3398           .opt_opc = vecop_list,
3399           .vece = MO_64 },
3400     };
3401 
3402     /* tszimm encoding produces immediates in the range [1..esize] */
3403     tcg_debug_assert(shift > 0);
3404     tcg_debug_assert(shift <= (8 << vece));
3405 
3406     if (shift == (8 << vece)) {
3407         /*
3408          * Shifts larger than the element size are architecturally valid.
3409          * Unsigned results in zero.  With rounding, this produces a
3410          * copy of the most significant bit.
3411          */
3412         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3413     } else {
3414         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3415     }
3416 }
3417 
3418 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3419 {
3420     TCGv_i64 t = tcg_temp_new_i64();
3421 
3422     if (sh == 8) {
3423         tcg_gen_vec_shr8i_i64(t, a, 7);
3424     } else {
3425         gen_urshr8_i64(t, a, sh);
3426     }
3427     tcg_gen_vec_add8_i64(d, d, t);
3428 }
3429 
3430 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3431 {
3432     TCGv_i64 t = tcg_temp_new_i64();
3433 
3434     if (sh == 16) {
3435         tcg_gen_vec_shr16i_i64(t, a, 15);
3436     } else {
3437         gen_urshr16_i64(t, a, sh);
3438     }
3439     tcg_gen_vec_add16_i64(d, d, t);
3440 }
3441 
3442 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3443 {
3444     TCGv_i32 t = tcg_temp_new_i32();
3445 
3446     if (sh == 32) {
3447         tcg_gen_shri_i32(t, a, 31);
3448     } else {
3449         gen_urshr32_i32(t, a, sh);
3450     }
3451     tcg_gen_add_i32(d, d, t);
3452 }
3453 
3454 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3455 {
3456     TCGv_i64 t = tcg_temp_new_i64();
3457 
3458     if (sh == 64) {
3459         tcg_gen_shri_i64(t, a, 63);
3460     } else {
3461         gen_urshr64_i64(t, a, sh);
3462     }
3463     tcg_gen_add_i64(d, d, t);
3464 }
3465 
3466 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3467 {
3468     TCGv_vec t = tcg_temp_new_vec_matching(d);
3469 
3470     if (sh == (8 << vece)) {
3471         tcg_gen_shri_vec(vece, t, a, sh - 1);
3472     } else {
3473         gen_urshr_vec(vece, t, a, sh);
3474     }
3475     tcg_gen_add_vec(vece, d, d, t);
3476 }
3477 
3478 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3479                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3480 {
3481     static const TCGOpcode vecop_list[] = {
3482         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3483     };
3484     static const GVecGen2i ops[4] = {
3485         { .fni8 = gen_ursra8_i64,
3486           .fniv = gen_ursra_vec,
3487           .fno = gen_helper_gvec_ursra_b,
3488           .opt_opc = vecop_list,
3489           .load_dest = true,
3490           .vece = MO_8 },
3491         { .fni8 = gen_ursra16_i64,
3492           .fniv = gen_ursra_vec,
3493           .fno = gen_helper_gvec_ursra_h,
3494           .opt_opc = vecop_list,
3495           .load_dest = true,
3496           .vece = MO_16 },
3497         { .fni4 = gen_ursra32_i32,
3498           .fniv = gen_ursra_vec,
3499           .fno = gen_helper_gvec_ursra_s,
3500           .opt_opc = vecop_list,
3501           .load_dest = true,
3502           .vece = MO_32 },
3503         { .fni8 = gen_ursra64_i64,
3504           .fniv = gen_ursra_vec,
3505           .fno = gen_helper_gvec_ursra_d,
3506           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3507           .opt_opc = vecop_list,
3508           .load_dest = true,
3509           .vece = MO_64 },
3510     };
3511 
3512     /* tszimm encoding produces immediates in the range [1..esize] */
3513     tcg_debug_assert(shift > 0);
3514     tcg_debug_assert(shift <= (8 << vece));
3515 
3516     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3517 }
3518 
3519 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3520 {
3521     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3522     TCGv_i64 t = tcg_temp_new_i64();
3523 
3524     tcg_gen_shri_i64(t, a, shift);
3525     tcg_gen_andi_i64(t, t, mask);
3526     tcg_gen_andi_i64(d, d, ~mask);
3527     tcg_gen_or_i64(d, d, t);
3528 }
3529 
3530 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3531 {
3532     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3533     TCGv_i64 t = tcg_temp_new_i64();
3534 
3535     tcg_gen_shri_i64(t, a, shift);
3536     tcg_gen_andi_i64(t, t, mask);
3537     tcg_gen_andi_i64(d, d, ~mask);
3538     tcg_gen_or_i64(d, d, t);
3539 }
3540 
3541 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3542 {
3543     tcg_gen_shri_i32(a, a, shift);
3544     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3545 }
3546 
3547 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3548 {
3549     tcg_gen_shri_i64(a, a, shift);
3550     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3551 }
3552 
3553 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3554 {
3555     TCGv_vec t = tcg_temp_new_vec_matching(d);
3556     TCGv_vec m = tcg_temp_new_vec_matching(d);
3557 
3558     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3559     tcg_gen_shri_vec(vece, t, a, sh);
3560     tcg_gen_and_vec(vece, d, d, m);
3561     tcg_gen_or_vec(vece, d, d, t);
3562 }
3563 
3564 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3565                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3566 {
3567     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3568     const GVecGen2i ops[4] = {
3569         { .fni8 = gen_shr8_ins_i64,
3570           .fniv = gen_shr_ins_vec,
3571           .fno = gen_helper_gvec_sri_b,
3572           .load_dest = true,
3573           .opt_opc = vecop_list,
3574           .vece = MO_8 },
3575         { .fni8 = gen_shr16_ins_i64,
3576           .fniv = gen_shr_ins_vec,
3577           .fno = gen_helper_gvec_sri_h,
3578           .load_dest = true,
3579           .opt_opc = vecop_list,
3580           .vece = MO_16 },
3581         { .fni4 = gen_shr32_ins_i32,
3582           .fniv = gen_shr_ins_vec,
3583           .fno = gen_helper_gvec_sri_s,
3584           .load_dest = true,
3585           .opt_opc = vecop_list,
3586           .vece = MO_32 },
3587         { .fni8 = gen_shr64_ins_i64,
3588           .fniv = gen_shr_ins_vec,
3589           .fno = gen_helper_gvec_sri_d,
3590           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3591           .load_dest = true,
3592           .opt_opc = vecop_list,
3593           .vece = MO_64 },
3594     };
3595 
3596     /* tszimm encoding produces immediates in the range [1..esize]. */
3597     tcg_debug_assert(shift > 0);
3598     tcg_debug_assert(shift <= (8 << vece));
3599 
3600     /* Shift of esize leaves destination unchanged. */
3601     if (shift < (8 << vece)) {
3602         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3603     } else {
3604         /* Nop, but we do need to clear the tail. */
3605         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3606     }
3607 }
3608 
3609 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3610 {
3611     uint64_t mask = dup_const(MO_8, 0xff << shift);
3612     TCGv_i64 t = tcg_temp_new_i64();
3613 
3614     tcg_gen_shli_i64(t, a, shift);
3615     tcg_gen_andi_i64(t, t, mask);
3616     tcg_gen_andi_i64(d, d, ~mask);
3617     tcg_gen_or_i64(d, d, t);
3618 }
3619 
3620 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3621 {
3622     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3623     TCGv_i64 t = tcg_temp_new_i64();
3624 
3625     tcg_gen_shli_i64(t, a, shift);
3626     tcg_gen_andi_i64(t, t, mask);
3627     tcg_gen_andi_i64(d, d, ~mask);
3628     tcg_gen_or_i64(d, d, t);
3629 }
3630 
3631 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3632 {
3633     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3634 }
3635 
3636 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3637 {
3638     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3639 }
3640 
3641 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3642 {
3643     TCGv_vec t = tcg_temp_new_vec_matching(d);
3644     TCGv_vec m = tcg_temp_new_vec_matching(d);
3645 
3646     tcg_gen_shli_vec(vece, t, a, sh);
3647     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3648     tcg_gen_and_vec(vece, d, d, m);
3649     tcg_gen_or_vec(vece, d, d, t);
3650 }
3651 
3652 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3653                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3654 {
3655     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3656     const GVecGen2i ops[4] = {
3657         { .fni8 = gen_shl8_ins_i64,
3658           .fniv = gen_shl_ins_vec,
3659           .fno = gen_helper_gvec_sli_b,
3660           .load_dest = true,
3661           .opt_opc = vecop_list,
3662           .vece = MO_8 },
3663         { .fni8 = gen_shl16_ins_i64,
3664           .fniv = gen_shl_ins_vec,
3665           .fno = gen_helper_gvec_sli_h,
3666           .load_dest = true,
3667           .opt_opc = vecop_list,
3668           .vece = MO_16 },
3669         { .fni4 = gen_shl32_ins_i32,
3670           .fniv = gen_shl_ins_vec,
3671           .fno = gen_helper_gvec_sli_s,
3672           .load_dest = true,
3673           .opt_opc = vecop_list,
3674           .vece = MO_32 },
3675         { .fni8 = gen_shl64_ins_i64,
3676           .fniv = gen_shl_ins_vec,
3677           .fno = gen_helper_gvec_sli_d,
3678           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3679           .load_dest = true,
3680           .opt_opc = vecop_list,
3681           .vece = MO_64 },
3682     };
3683 
3684     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3685     tcg_debug_assert(shift >= 0);
3686     tcg_debug_assert(shift < (8 << vece));
3687 
3688     if (shift == 0) {
3689         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3690     } else {
3691         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3692     }
3693 }
3694 
3695 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3696 {
3697     gen_helper_neon_mul_u8(a, a, b);
3698     gen_helper_neon_add_u8(d, d, a);
3699 }
3700 
3701 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3702 {
3703     gen_helper_neon_mul_u8(a, a, b);
3704     gen_helper_neon_sub_u8(d, d, a);
3705 }
3706 
3707 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3708 {
3709     gen_helper_neon_mul_u16(a, a, b);
3710     gen_helper_neon_add_u16(d, d, a);
3711 }
3712 
3713 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3714 {
3715     gen_helper_neon_mul_u16(a, a, b);
3716     gen_helper_neon_sub_u16(d, d, a);
3717 }
3718 
3719 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3720 {
3721     tcg_gen_mul_i32(a, a, b);
3722     tcg_gen_add_i32(d, d, a);
3723 }
3724 
3725 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3726 {
3727     tcg_gen_mul_i32(a, a, b);
3728     tcg_gen_sub_i32(d, d, a);
3729 }
3730 
3731 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3732 {
3733     tcg_gen_mul_i64(a, a, b);
3734     tcg_gen_add_i64(d, d, a);
3735 }
3736 
3737 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3738 {
3739     tcg_gen_mul_i64(a, a, b);
3740     tcg_gen_sub_i64(d, d, a);
3741 }
3742 
3743 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3744 {
3745     tcg_gen_mul_vec(vece, a, a, b);
3746     tcg_gen_add_vec(vece, d, d, a);
3747 }
3748 
3749 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3750 {
3751     tcg_gen_mul_vec(vece, a, a, b);
3752     tcg_gen_sub_vec(vece, d, d, a);
3753 }
3754 
3755 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3756  * these tables are shared with AArch64 which does support them.
3757  */
3758 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3759                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3760 {
3761     static const TCGOpcode vecop_list[] = {
3762         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3763     };
3764     static const GVecGen3 ops[4] = {
3765         { .fni4 = gen_mla8_i32,
3766           .fniv = gen_mla_vec,
3767           .load_dest = true,
3768           .opt_opc = vecop_list,
3769           .vece = MO_8 },
3770         { .fni4 = gen_mla16_i32,
3771           .fniv = gen_mla_vec,
3772           .load_dest = true,
3773           .opt_opc = vecop_list,
3774           .vece = MO_16 },
3775         { .fni4 = gen_mla32_i32,
3776           .fniv = gen_mla_vec,
3777           .load_dest = true,
3778           .opt_opc = vecop_list,
3779           .vece = MO_32 },
3780         { .fni8 = gen_mla64_i64,
3781           .fniv = gen_mla_vec,
3782           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3783           .load_dest = true,
3784           .opt_opc = vecop_list,
3785           .vece = MO_64 },
3786     };
3787     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3788 }
3789 
3790 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3791                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3792 {
3793     static const TCGOpcode vecop_list[] = {
3794         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3795     };
3796     static const GVecGen3 ops[4] = {
3797         { .fni4 = gen_mls8_i32,
3798           .fniv = gen_mls_vec,
3799           .load_dest = true,
3800           .opt_opc = vecop_list,
3801           .vece = MO_8 },
3802         { .fni4 = gen_mls16_i32,
3803           .fniv = gen_mls_vec,
3804           .load_dest = true,
3805           .opt_opc = vecop_list,
3806           .vece = MO_16 },
3807         { .fni4 = gen_mls32_i32,
3808           .fniv = gen_mls_vec,
3809           .load_dest = true,
3810           .opt_opc = vecop_list,
3811           .vece = MO_32 },
3812         { .fni8 = gen_mls64_i64,
3813           .fniv = gen_mls_vec,
3814           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3815           .load_dest = true,
3816           .opt_opc = vecop_list,
3817           .vece = MO_64 },
3818     };
3819     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3820 }
3821 
3822 /* CMTST : test is "if (X & Y != 0)". */
3823 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3824 {
3825     tcg_gen_and_i32(d, a, b);
3826     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3827 }
3828 
3829 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3830 {
3831     tcg_gen_and_i64(d, a, b);
3832     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3833 }
3834 
3835 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3836 {
3837     tcg_gen_and_vec(vece, d, a, b);
3838     tcg_gen_dupi_vec(vece, a, 0);
3839     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3840 }
3841 
3842 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3843                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3844 {
3845     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3846     static const GVecGen3 ops[4] = {
3847         { .fni4 = gen_helper_neon_tst_u8,
3848           .fniv = gen_cmtst_vec,
3849           .opt_opc = vecop_list,
3850           .vece = MO_8 },
3851         { .fni4 = gen_helper_neon_tst_u16,
3852           .fniv = gen_cmtst_vec,
3853           .opt_opc = vecop_list,
3854           .vece = MO_16 },
3855         { .fni4 = gen_cmtst_i32,
3856           .fniv = gen_cmtst_vec,
3857           .opt_opc = vecop_list,
3858           .vece = MO_32 },
3859         { .fni8 = gen_cmtst_i64,
3860           .fniv = gen_cmtst_vec,
3861           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3862           .opt_opc = vecop_list,
3863           .vece = MO_64 },
3864     };
3865     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3866 }
3867 
3868 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3869 {
3870     TCGv_i32 lval = tcg_temp_new_i32();
3871     TCGv_i32 rval = tcg_temp_new_i32();
3872     TCGv_i32 lsh = tcg_temp_new_i32();
3873     TCGv_i32 rsh = tcg_temp_new_i32();
3874     TCGv_i32 zero = tcg_constant_i32(0);
3875     TCGv_i32 max = tcg_constant_i32(32);
3876 
3877     /*
3878      * Rely on the TCG guarantee that out of range shifts produce
3879      * unspecified results, not undefined behaviour (i.e. no trap).
3880      * Discard out-of-range results after the fact.
3881      */
3882     tcg_gen_ext8s_i32(lsh, shift);
3883     tcg_gen_neg_i32(rsh, lsh);
3884     tcg_gen_shl_i32(lval, src, lsh);
3885     tcg_gen_shr_i32(rval, src, rsh);
3886     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3887     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3888 }
3889 
3890 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3891 {
3892     TCGv_i64 lval = tcg_temp_new_i64();
3893     TCGv_i64 rval = tcg_temp_new_i64();
3894     TCGv_i64 lsh = tcg_temp_new_i64();
3895     TCGv_i64 rsh = tcg_temp_new_i64();
3896     TCGv_i64 zero = tcg_constant_i64(0);
3897     TCGv_i64 max = tcg_constant_i64(64);
3898 
3899     /*
3900      * Rely on the TCG guarantee that out of range shifts produce
3901      * unspecified results, not undefined behaviour (i.e. no trap).
3902      * Discard out-of-range results after the fact.
3903      */
3904     tcg_gen_ext8s_i64(lsh, shift);
3905     tcg_gen_neg_i64(rsh, lsh);
3906     tcg_gen_shl_i64(lval, src, lsh);
3907     tcg_gen_shr_i64(rval, src, rsh);
3908     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3909     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3910 }
3911 
3912 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3913                          TCGv_vec src, TCGv_vec shift)
3914 {
3915     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3916     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3917     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3918     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3919     TCGv_vec msk, max;
3920 
3921     tcg_gen_neg_vec(vece, rsh, shift);
3922     if (vece == MO_8) {
3923         tcg_gen_mov_vec(lsh, shift);
3924     } else {
3925         msk = tcg_temp_new_vec_matching(dst);
3926         tcg_gen_dupi_vec(vece, msk, 0xff);
3927         tcg_gen_and_vec(vece, lsh, shift, msk);
3928         tcg_gen_and_vec(vece, rsh, rsh, msk);
3929     }
3930 
3931     /*
3932      * Rely on the TCG guarantee that out of range shifts produce
3933      * unspecified results, not undefined behaviour (i.e. no trap).
3934      * Discard out-of-range results after the fact.
3935      */
3936     tcg_gen_shlv_vec(vece, lval, src, lsh);
3937     tcg_gen_shrv_vec(vece, rval, src, rsh);
3938 
3939     max = tcg_temp_new_vec_matching(dst);
3940     tcg_gen_dupi_vec(vece, max, 8 << vece);
3941 
3942     /*
3943      * The choice of LT (signed) and GEU (unsigned) are biased toward
3944      * the instructions of the x86_64 host.  For MO_8, the whole byte
3945      * is significant so we must use an unsigned compare; otherwise we
3946      * have already masked to a byte and so a signed compare works.
3947      * Other tcg hosts have a full set of comparisons and do not care.
3948      */
3949     if (vece == MO_8) {
3950         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3951         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3952         tcg_gen_andc_vec(vece, lval, lval, lsh);
3953         tcg_gen_andc_vec(vece, rval, rval, rsh);
3954     } else {
3955         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3956         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3957         tcg_gen_and_vec(vece, lval, lval, lsh);
3958         tcg_gen_and_vec(vece, rval, rval, rsh);
3959     }
3960     tcg_gen_or_vec(vece, dst, lval, rval);
3961 }
3962 
3963 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3964                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3965 {
3966     static const TCGOpcode vecop_list[] = {
3967         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3968         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3969     };
3970     static const GVecGen3 ops[4] = {
3971         { .fniv = gen_ushl_vec,
3972           .fno = gen_helper_gvec_ushl_b,
3973           .opt_opc = vecop_list,
3974           .vece = MO_8 },
3975         { .fniv = gen_ushl_vec,
3976           .fno = gen_helper_gvec_ushl_h,
3977           .opt_opc = vecop_list,
3978           .vece = MO_16 },
3979         { .fni4 = gen_ushl_i32,
3980           .fniv = gen_ushl_vec,
3981           .opt_opc = vecop_list,
3982           .vece = MO_32 },
3983         { .fni8 = gen_ushl_i64,
3984           .fniv = gen_ushl_vec,
3985           .opt_opc = vecop_list,
3986           .vece = MO_64 },
3987     };
3988     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3989 }
3990 
3991 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3992 {
3993     TCGv_i32 lval = tcg_temp_new_i32();
3994     TCGv_i32 rval = tcg_temp_new_i32();
3995     TCGv_i32 lsh = tcg_temp_new_i32();
3996     TCGv_i32 rsh = tcg_temp_new_i32();
3997     TCGv_i32 zero = tcg_constant_i32(0);
3998     TCGv_i32 max = tcg_constant_i32(31);
3999 
4000     /*
4001      * Rely on the TCG guarantee that out of range shifts produce
4002      * unspecified results, not undefined behaviour (i.e. no trap).
4003      * Discard out-of-range results after the fact.
4004      */
4005     tcg_gen_ext8s_i32(lsh, shift);
4006     tcg_gen_neg_i32(rsh, lsh);
4007     tcg_gen_shl_i32(lval, src, lsh);
4008     tcg_gen_umin_i32(rsh, rsh, max);
4009     tcg_gen_sar_i32(rval, src, rsh);
4010     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4011     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4012 }
4013 
4014 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4015 {
4016     TCGv_i64 lval = tcg_temp_new_i64();
4017     TCGv_i64 rval = tcg_temp_new_i64();
4018     TCGv_i64 lsh = tcg_temp_new_i64();
4019     TCGv_i64 rsh = tcg_temp_new_i64();
4020     TCGv_i64 zero = tcg_constant_i64(0);
4021     TCGv_i64 max = tcg_constant_i64(63);
4022 
4023     /*
4024      * Rely on the TCG guarantee that out of range shifts produce
4025      * unspecified results, not undefined behaviour (i.e. no trap).
4026      * Discard out-of-range results after the fact.
4027      */
4028     tcg_gen_ext8s_i64(lsh, shift);
4029     tcg_gen_neg_i64(rsh, lsh);
4030     tcg_gen_shl_i64(lval, src, lsh);
4031     tcg_gen_umin_i64(rsh, rsh, max);
4032     tcg_gen_sar_i64(rval, src, rsh);
4033     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4034     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4035 }
4036 
4037 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4038                          TCGv_vec src, TCGv_vec shift)
4039 {
4040     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4041     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4042     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4043     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4044     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4045 
4046     /*
4047      * Rely on the TCG guarantee that out of range shifts produce
4048      * unspecified results, not undefined behaviour (i.e. no trap).
4049      * Discard out-of-range results after the fact.
4050      */
4051     tcg_gen_neg_vec(vece, rsh, shift);
4052     if (vece == MO_8) {
4053         tcg_gen_mov_vec(lsh, shift);
4054     } else {
4055         tcg_gen_dupi_vec(vece, tmp, 0xff);
4056         tcg_gen_and_vec(vece, lsh, shift, tmp);
4057         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4058     }
4059 
4060     /* Bound rsh so out of bound right shift gets -1.  */
4061     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4062     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4063     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4064 
4065     tcg_gen_shlv_vec(vece, lval, src, lsh);
4066     tcg_gen_sarv_vec(vece, rval, src, rsh);
4067 
4068     /* Select in-bound left shift.  */
4069     tcg_gen_andc_vec(vece, lval, lval, tmp);
4070 
4071     /* Select between left and right shift.  */
4072     if (vece == MO_8) {
4073         tcg_gen_dupi_vec(vece, tmp, 0);
4074         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4075     } else {
4076         tcg_gen_dupi_vec(vece, tmp, 0x80);
4077         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4078     }
4079 }
4080 
4081 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4082                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4083 {
4084     static const TCGOpcode vecop_list[] = {
4085         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4086         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4087     };
4088     static const GVecGen3 ops[4] = {
4089         { .fniv = gen_sshl_vec,
4090           .fno = gen_helper_gvec_sshl_b,
4091           .opt_opc = vecop_list,
4092           .vece = MO_8 },
4093         { .fniv = gen_sshl_vec,
4094           .fno = gen_helper_gvec_sshl_h,
4095           .opt_opc = vecop_list,
4096           .vece = MO_16 },
4097         { .fni4 = gen_sshl_i32,
4098           .fniv = gen_sshl_vec,
4099           .opt_opc = vecop_list,
4100           .vece = MO_32 },
4101         { .fni8 = gen_sshl_i64,
4102           .fniv = gen_sshl_vec,
4103           .opt_opc = vecop_list,
4104           .vece = MO_64 },
4105     };
4106     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4107 }
4108 
4109 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4110                           TCGv_vec a, TCGv_vec b)
4111 {
4112     TCGv_vec x = tcg_temp_new_vec_matching(t);
4113     tcg_gen_add_vec(vece, x, a, b);
4114     tcg_gen_usadd_vec(vece, t, a, b);
4115     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4116     tcg_gen_or_vec(vece, sat, sat, x);
4117 }
4118 
4119 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4120                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4121 {
4122     static const TCGOpcode vecop_list[] = {
4123         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4124     };
4125     static const GVecGen4 ops[4] = {
4126         { .fniv = gen_uqadd_vec,
4127           .fno = gen_helper_gvec_uqadd_b,
4128           .write_aofs = true,
4129           .opt_opc = vecop_list,
4130           .vece = MO_8 },
4131         { .fniv = gen_uqadd_vec,
4132           .fno = gen_helper_gvec_uqadd_h,
4133           .write_aofs = true,
4134           .opt_opc = vecop_list,
4135           .vece = MO_16 },
4136         { .fniv = gen_uqadd_vec,
4137           .fno = gen_helper_gvec_uqadd_s,
4138           .write_aofs = true,
4139           .opt_opc = vecop_list,
4140           .vece = MO_32 },
4141         { .fniv = gen_uqadd_vec,
4142           .fno = gen_helper_gvec_uqadd_d,
4143           .write_aofs = true,
4144           .opt_opc = vecop_list,
4145           .vece = MO_64 },
4146     };
4147     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4148                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4149 }
4150 
4151 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4152                           TCGv_vec a, TCGv_vec b)
4153 {
4154     TCGv_vec x = tcg_temp_new_vec_matching(t);
4155     tcg_gen_add_vec(vece, x, a, b);
4156     tcg_gen_ssadd_vec(vece, t, a, b);
4157     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4158     tcg_gen_or_vec(vece, sat, sat, x);
4159 }
4160 
4161 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4162                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4163 {
4164     static const TCGOpcode vecop_list[] = {
4165         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4166     };
4167     static const GVecGen4 ops[4] = {
4168         { .fniv = gen_sqadd_vec,
4169           .fno = gen_helper_gvec_sqadd_b,
4170           .opt_opc = vecop_list,
4171           .write_aofs = true,
4172           .vece = MO_8 },
4173         { .fniv = gen_sqadd_vec,
4174           .fno = gen_helper_gvec_sqadd_h,
4175           .opt_opc = vecop_list,
4176           .write_aofs = true,
4177           .vece = MO_16 },
4178         { .fniv = gen_sqadd_vec,
4179           .fno = gen_helper_gvec_sqadd_s,
4180           .opt_opc = vecop_list,
4181           .write_aofs = true,
4182           .vece = MO_32 },
4183         { .fniv = gen_sqadd_vec,
4184           .fno = gen_helper_gvec_sqadd_d,
4185           .opt_opc = vecop_list,
4186           .write_aofs = true,
4187           .vece = MO_64 },
4188     };
4189     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4190                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4191 }
4192 
4193 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4194                           TCGv_vec a, TCGv_vec b)
4195 {
4196     TCGv_vec x = tcg_temp_new_vec_matching(t);
4197     tcg_gen_sub_vec(vece, x, a, b);
4198     tcg_gen_ussub_vec(vece, t, a, b);
4199     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4200     tcg_gen_or_vec(vece, sat, sat, x);
4201 }
4202 
4203 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4204                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4205 {
4206     static const TCGOpcode vecop_list[] = {
4207         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4208     };
4209     static const GVecGen4 ops[4] = {
4210         { .fniv = gen_uqsub_vec,
4211           .fno = gen_helper_gvec_uqsub_b,
4212           .opt_opc = vecop_list,
4213           .write_aofs = true,
4214           .vece = MO_8 },
4215         { .fniv = gen_uqsub_vec,
4216           .fno = gen_helper_gvec_uqsub_h,
4217           .opt_opc = vecop_list,
4218           .write_aofs = true,
4219           .vece = MO_16 },
4220         { .fniv = gen_uqsub_vec,
4221           .fno = gen_helper_gvec_uqsub_s,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_32 },
4225         { .fniv = gen_uqsub_vec,
4226           .fno = gen_helper_gvec_uqsub_d,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_64 },
4230     };
4231     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4232                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4233 }
4234 
4235 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4236                           TCGv_vec a, TCGv_vec b)
4237 {
4238     TCGv_vec x = tcg_temp_new_vec_matching(t);
4239     tcg_gen_sub_vec(vece, x, a, b);
4240     tcg_gen_sssub_vec(vece, t, a, b);
4241     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4242     tcg_gen_or_vec(vece, sat, sat, x);
4243 }
4244 
4245 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4246                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4247 {
4248     static const TCGOpcode vecop_list[] = {
4249         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4250     };
4251     static const GVecGen4 ops[4] = {
4252         { .fniv = gen_sqsub_vec,
4253           .fno = gen_helper_gvec_sqsub_b,
4254           .opt_opc = vecop_list,
4255           .write_aofs = true,
4256           .vece = MO_8 },
4257         { .fniv = gen_sqsub_vec,
4258           .fno = gen_helper_gvec_sqsub_h,
4259           .opt_opc = vecop_list,
4260           .write_aofs = true,
4261           .vece = MO_16 },
4262         { .fniv = gen_sqsub_vec,
4263           .fno = gen_helper_gvec_sqsub_s,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_32 },
4267         { .fniv = gen_sqsub_vec,
4268           .fno = gen_helper_gvec_sqsub_d,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_64 },
4272     };
4273     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4274                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4275 }
4276 
4277 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4278 {
4279     TCGv_i32 t = tcg_temp_new_i32();
4280 
4281     tcg_gen_sub_i32(t, a, b);
4282     tcg_gen_sub_i32(d, b, a);
4283     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4284 }
4285 
4286 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4287 {
4288     TCGv_i64 t = tcg_temp_new_i64();
4289 
4290     tcg_gen_sub_i64(t, a, b);
4291     tcg_gen_sub_i64(d, b, a);
4292     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4293 }
4294 
4295 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4296 {
4297     TCGv_vec t = tcg_temp_new_vec_matching(d);
4298 
4299     tcg_gen_smin_vec(vece, t, a, b);
4300     tcg_gen_smax_vec(vece, d, a, b);
4301     tcg_gen_sub_vec(vece, d, d, t);
4302 }
4303 
4304 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4305                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4306 {
4307     static const TCGOpcode vecop_list[] = {
4308         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4309     };
4310     static const GVecGen3 ops[4] = {
4311         { .fniv = gen_sabd_vec,
4312           .fno = gen_helper_gvec_sabd_b,
4313           .opt_opc = vecop_list,
4314           .vece = MO_8 },
4315         { .fniv = gen_sabd_vec,
4316           .fno = gen_helper_gvec_sabd_h,
4317           .opt_opc = vecop_list,
4318           .vece = MO_16 },
4319         { .fni4 = gen_sabd_i32,
4320           .fniv = gen_sabd_vec,
4321           .fno = gen_helper_gvec_sabd_s,
4322           .opt_opc = vecop_list,
4323           .vece = MO_32 },
4324         { .fni8 = gen_sabd_i64,
4325           .fniv = gen_sabd_vec,
4326           .fno = gen_helper_gvec_sabd_d,
4327           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4328           .opt_opc = vecop_list,
4329           .vece = MO_64 },
4330     };
4331     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4332 }
4333 
4334 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4335 {
4336     TCGv_i32 t = tcg_temp_new_i32();
4337 
4338     tcg_gen_sub_i32(t, a, b);
4339     tcg_gen_sub_i32(d, b, a);
4340     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4341 }
4342 
4343 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4344 {
4345     TCGv_i64 t = tcg_temp_new_i64();
4346 
4347     tcg_gen_sub_i64(t, a, b);
4348     tcg_gen_sub_i64(d, b, a);
4349     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4350 }
4351 
4352 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4353 {
4354     TCGv_vec t = tcg_temp_new_vec_matching(d);
4355 
4356     tcg_gen_umin_vec(vece, t, a, b);
4357     tcg_gen_umax_vec(vece, d, a, b);
4358     tcg_gen_sub_vec(vece, d, d, t);
4359 }
4360 
4361 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4362                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4363 {
4364     static const TCGOpcode vecop_list[] = {
4365         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4366     };
4367     static const GVecGen3 ops[4] = {
4368         { .fniv = gen_uabd_vec,
4369           .fno = gen_helper_gvec_uabd_b,
4370           .opt_opc = vecop_list,
4371           .vece = MO_8 },
4372         { .fniv = gen_uabd_vec,
4373           .fno = gen_helper_gvec_uabd_h,
4374           .opt_opc = vecop_list,
4375           .vece = MO_16 },
4376         { .fni4 = gen_uabd_i32,
4377           .fniv = gen_uabd_vec,
4378           .fno = gen_helper_gvec_uabd_s,
4379           .opt_opc = vecop_list,
4380           .vece = MO_32 },
4381         { .fni8 = gen_uabd_i64,
4382           .fniv = gen_uabd_vec,
4383           .fno = gen_helper_gvec_uabd_d,
4384           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4385           .opt_opc = vecop_list,
4386           .vece = MO_64 },
4387     };
4388     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4389 }
4390 
4391 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4392 {
4393     TCGv_i32 t = tcg_temp_new_i32();
4394     gen_sabd_i32(t, a, b);
4395     tcg_gen_add_i32(d, d, t);
4396 }
4397 
4398 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4399 {
4400     TCGv_i64 t = tcg_temp_new_i64();
4401     gen_sabd_i64(t, a, b);
4402     tcg_gen_add_i64(d, d, t);
4403 }
4404 
4405 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4406 {
4407     TCGv_vec t = tcg_temp_new_vec_matching(d);
4408     gen_sabd_vec(vece, t, a, b);
4409     tcg_gen_add_vec(vece, d, d, t);
4410 }
4411 
4412 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4413                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4414 {
4415     static const TCGOpcode vecop_list[] = {
4416         INDEX_op_sub_vec, INDEX_op_add_vec,
4417         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4418     };
4419     static const GVecGen3 ops[4] = {
4420         { .fniv = gen_saba_vec,
4421           .fno = gen_helper_gvec_saba_b,
4422           .opt_opc = vecop_list,
4423           .load_dest = true,
4424           .vece = MO_8 },
4425         { .fniv = gen_saba_vec,
4426           .fno = gen_helper_gvec_saba_h,
4427           .opt_opc = vecop_list,
4428           .load_dest = true,
4429           .vece = MO_16 },
4430         { .fni4 = gen_saba_i32,
4431           .fniv = gen_saba_vec,
4432           .fno = gen_helper_gvec_saba_s,
4433           .opt_opc = vecop_list,
4434           .load_dest = true,
4435           .vece = MO_32 },
4436         { .fni8 = gen_saba_i64,
4437           .fniv = gen_saba_vec,
4438           .fno = gen_helper_gvec_saba_d,
4439           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4440           .opt_opc = vecop_list,
4441           .load_dest = true,
4442           .vece = MO_64 },
4443     };
4444     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4445 }
4446 
4447 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4448 {
4449     TCGv_i32 t = tcg_temp_new_i32();
4450     gen_uabd_i32(t, a, b);
4451     tcg_gen_add_i32(d, d, t);
4452 }
4453 
4454 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4455 {
4456     TCGv_i64 t = tcg_temp_new_i64();
4457     gen_uabd_i64(t, a, b);
4458     tcg_gen_add_i64(d, d, t);
4459 }
4460 
4461 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4462 {
4463     TCGv_vec t = tcg_temp_new_vec_matching(d);
4464     gen_uabd_vec(vece, t, a, b);
4465     tcg_gen_add_vec(vece, d, d, t);
4466 }
4467 
4468 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4469                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4470 {
4471     static const TCGOpcode vecop_list[] = {
4472         INDEX_op_sub_vec, INDEX_op_add_vec,
4473         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4474     };
4475     static const GVecGen3 ops[4] = {
4476         { .fniv = gen_uaba_vec,
4477           .fno = gen_helper_gvec_uaba_b,
4478           .opt_opc = vecop_list,
4479           .load_dest = true,
4480           .vece = MO_8 },
4481         { .fniv = gen_uaba_vec,
4482           .fno = gen_helper_gvec_uaba_h,
4483           .opt_opc = vecop_list,
4484           .load_dest = true,
4485           .vece = MO_16 },
4486         { .fni4 = gen_uaba_i32,
4487           .fniv = gen_uaba_vec,
4488           .fno = gen_helper_gvec_uaba_s,
4489           .opt_opc = vecop_list,
4490           .load_dest = true,
4491           .vece = MO_32 },
4492         { .fni8 = gen_uaba_i64,
4493           .fniv = gen_uaba_vec,
4494           .fno = gen_helper_gvec_uaba_d,
4495           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4496           .opt_opc = vecop_list,
4497           .load_dest = true,
4498           .vece = MO_64 },
4499     };
4500     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4501 }
4502 
4503 static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
4504 {
4505     static const uint16_t mask[3] = {
4506         0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
4507         0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
4508         0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
4509     };
4510 
4511     if (crn >= 9 && crn <= 11) {
4512         return (mask[crn - 9] >> crm) & 1;
4513     }
4514     return false;
4515 }
4516 
4517 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4518                            int opc1, int crn, int crm, int opc2,
4519                            bool isread, int rt, int rt2)
4520 {
4521     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4522     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4523     TCGv_ptr tcg_ri = NULL;
4524     bool need_exit_tb = false;
4525     uint32_t syndrome;
4526 
4527     /*
4528      * Note that since we are an implementation which takes an
4529      * exception on a trapped conditional instruction only if the
4530      * instruction passes its condition code check, we can take
4531      * advantage of the clause in the ARM ARM that allows us to set
4532      * the COND field in the instruction to 0xE in all cases.
4533      * We could fish the actual condition out of the insn (ARM)
4534      * or the condexec bits (Thumb) but it isn't necessary.
4535      */
4536     switch (cpnum) {
4537     case 14:
4538         if (is64) {
4539             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4540                                          isread, false);
4541         } else {
4542             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4543                                         rt, isread, false);
4544         }
4545         break;
4546     case 15:
4547         if (is64) {
4548             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4549                                          isread, false);
4550         } else {
4551             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4552                                         rt, isread, false);
4553         }
4554         break;
4555     default:
4556         /*
4557          * ARMv8 defines that only coprocessors 14 and 15 exist,
4558          * so this can only happen if this is an ARMv7 or earlier CPU,
4559          * in which case the syndrome information won't actually be
4560          * guest visible.
4561          */
4562         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4563         syndrome = syn_uncategorized();
4564         break;
4565     }
4566 
4567     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4568         /*
4569          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4570          * over the UNDEF for "no such register" or the UNDEF for "access
4571          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4572          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4573          * access_check_cp_reg(), after the checks for whether the access
4574          * configurably trapped to EL1.
4575          */
4576         uint32_t maskbit = is64 ? crm : crn;
4577 
4578         if (maskbit != 4 && maskbit != 14) {
4579             /* T4 and T14 are RES0 so never cause traps */
4580             TCGv_i32 t;
4581             DisasLabel over = gen_disas_label(s);
4582 
4583             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4584             tcg_gen_andi_i32(t, t, 1u << maskbit);
4585             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4586 
4587             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
4588             /*
4589              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4590              * but since we're conditionally branching over it, we want
4591              * to assume continue-to-next-instruction.
4592              */
4593             s->base.is_jmp = DISAS_NEXT;
4594             set_disas_label(s, over);
4595         }
4596     }
4597 
4598     if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
4599         /*
4600          * Check for TIDCP trap, which must take precedence over the UNDEF
4601          * for "no such register" etc.  It shares precedence with HSTR,
4602          * but raises the same exception, so order doesn't matter.
4603          */
4604         switch (s->current_el) {
4605         case 0:
4606             if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
4607                 && dc_isar_feature(aa64_tidcp1, s)) {
4608                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
4609             }
4610             break;
4611         case 1:
4612             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
4613             break;
4614         }
4615     }
4616 
4617     if (!ri) {
4618         /*
4619          * Unknown register; this might be a guest error or a QEMU
4620          * unimplemented feature.
4621          */
4622         if (is64) {
4623             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4624                           "64 bit system register cp:%d opc1: %d crm:%d "
4625                           "(%s)\n",
4626                           isread ? "read" : "write", cpnum, opc1, crm,
4627                           s->ns ? "non-secure" : "secure");
4628         } else {
4629             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4630                           "system register cp:%d opc1:%d crn:%d crm:%d "
4631                           "opc2:%d (%s)\n",
4632                           isread ? "read" : "write", cpnum, opc1, crn,
4633                           crm, opc2, s->ns ? "non-secure" : "secure");
4634         }
4635         unallocated_encoding(s);
4636         return;
4637     }
4638 
4639     /* Check access permissions */
4640     if (!cp_access_ok(s->current_el, ri, isread)) {
4641         unallocated_encoding(s);
4642         return;
4643     }
4644 
4645     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4646         (ri->fgt && s->fgt_active) ||
4647         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4648         /*
4649          * Emit code to perform further access permissions checks at
4650          * runtime; this may result in an exception.
4651          * Note that on XScale all cp0..c13 registers do an access check
4652          * call in order to handle c15_cpar.
4653          */
4654         gen_set_condexec(s);
4655         gen_update_pc(s, 0);
4656         tcg_ri = tcg_temp_new_ptr();
4657         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
4658                                        tcg_constant_i32(key),
4659                                        tcg_constant_i32(syndrome),
4660                                        tcg_constant_i32(isread));
4661     } else if (ri->type & ARM_CP_RAISES_EXC) {
4662         /*
4663          * The readfn or writefn might raise an exception;
4664          * synchronize the CPU state in case it does.
4665          */
4666         gen_set_condexec(s);
4667         gen_update_pc(s, 0);
4668     }
4669 
4670     /* Handle special cases first */
4671     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4672     case 0:
4673         break;
4674     case ARM_CP_NOP:
4675         return;
4676     case ARM_CP_WFI:
4677         if (isread) {
4678             unallocated_encoding(s);
4679         } else {
4680             gen_update_pc(s, curr_insn_len(s));
4681             s->base.is_jmp = DISAS_WFI;
4682         }
4683         return;
4684     default:
4685         g_assert_not_reached();
4686     }
4687 
4688     if (ri->type & ARM_CP_IO) {
4689         /* I/O operations must end the TB here (whether read or write) */
4690         need_exit_tb = translator_io_start(&s->base);
4691     }
4692 
4693     if (isread) {
4694         /* Read */
4695         if (is64) {
4696             TCGv_i64 tmp64;
4697             TCGv_i32 tmp;
4698             if (ri->type & ARM_CP_CONST) {
4699                 tmp64 = tcg_constant_i64(ri->resetvalue);
4700             } else if (ri->readfn) {
4701                 if (!tcg_ri) {
4702                     tcg_ri = gen_lookup_cp_reg(key);
4703                 }
4704                 tmp64 = tcg_temp_new_i64();
4705                 gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri);
4706             } else {
4707                 tmp64 = tcg_temp_new_i64();
4708                 tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset);
4709             }
4710             tmp = tcg_temp_new_i32();
4711             tcg_gen_extrl_i64_i32(tmp, tmp64);
4712             store_reg(s, rt, tmp);
4713             tmp = tcg_temp_new_i32();
4714             tcg_gen_extrh_i64_i32(tmp, tmp64);
4715             store_reg(s, rt2, tmp);
4716         } else {
4717             TCGv_i32 tmp;
4718             if (ri->type & ARM_CP_CONST) {
4719                 tmp = tcg_constant_i32(ri->resetvalue);
4720             } else if (ri->readfn) {
4721                 if (!tcg_ri) {
4722                     tcg_ri = gen_lookup_cp_reg(key);
4723                 }
4724                 tmp = tcg_temp_new_i32();
4725                 gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri);
4726             } else {
4727                 tmp = load_cpu_offset(ri->fieldoffset);
4728             }
4729             if (rt == 15) {
4730                 /* Destination register of r15 for 32 bit loads sets
4731                  * the condition codes from the high 4 bits of the value
4732                  */
4733                 gen_set_nzcv(tmp);
4734             } else {
4735                 store_reg(s, rt, tmp);
4736             }
4737         }
4738     } else {
4739         /* Write */
4740         if (ri->type & ARM_CP_CONST) {
4741             /* If not forbidden by access permissions, treat as WI */
4742             return;
4743         }
4744 
4745         if (is64) {
4746             TCGv_i32 tmplo, tmphi;
4747             TCGv_i64 tmp64 = tcg_temp_new_i64();
4748             tmplo = load_reg(s, rt);
4749             tmphi = load_reg(s, rt2);
4750             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4751             if (ri->writefn) {
4752                 if (!tcg_ri) {
4753                     tcg_ri = gen_lookup_cp_reg(key);
4754                 }
4755                 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64);
4756             } else {
4757                 tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset);
4758             }
4759         } else {
4760             TCGv_i32 tmp = load_reg(s, rt);
4761             if (ri->writefn) {
4762                 if (!tcg_ri) {
4763                     tcg_ri = gen_lookup_cp_reg(key);
4764                 }
4765                 gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp);
4766             } else {
4767                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4768             }
4769         }
4770     }
4771 
4772     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4773         /*
4774          * A write to any coprocessor register that ends a TB
4775          * must rebuild the hflags for the next TB.
4776          */
4777         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4778         /*
4779          * We default to ending the TB on a coprocessor register write,
4780          * but allow this to be suppressed by the register definition
4781          * (usually only necessary to work around guest bugs).
4782          */
4783         need_exit_tb = true;
4784     }
4785     if (need_exit_tb) {
4786         gen_lookup_tb(s);
4787     }
4788 }
4789 
4790 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4791 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4792 {
4793     int cpnum = (insn >> 8) & 0xf;
4794 
4795     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4796         unallocated_encoding(s);
4797     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4798         if (disas_iwmmxt_insn(s, insn)) {
4799             unallocated_encoding(s);
4800         }
4801     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4802         if (disas_dsp_insn(s, insn)) {
4803             unallocated_encoding(s);
4804         }
4805     }
4806 }
4807 
4808 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4809 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4810 {
4811     TCGv_i32 tmp;
4812     tmp = tcg_temp_new_i32();
4813     tcg_gen_extrl_i64_i32(tmp, val);
4814     store_reg(s, rlow, tmp);
4815     tmp = tcg_temp_new_i32();
4816     tcg_gen_extrh_i64_i32(tmp, val);
4817     store_reg(s, rhigh, tmp);
4818 }
4819 
4820 /* load and add a 64-bit value from a register pair.  */
4821 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4822 {
4823     TCGv_i64 tmp;
4824     TCGv_i32 tmpl;
4825     TCGv_i32 tmph;
4826 
4827     /* Load 64-bit value rd:rn.  */
4828     tmpl = load_reg(s, rlow);
4829     tmph = load_reg(s, rhigh);
4830     tmp = tcg_temp_new_i64();
4831     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4832     tcg_gen_add_i64(val, val, tmp);
4833 }
4834 
4835 /* Set N and Z flags from hi|lo.  */
4836 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4837 {
4838     tcg_gen_mov_i32(cpu_NF, hi);
4839     tcg_gen_or_i32(cpu_ZF, lo, hi);
4840 }
4841 
4842 /* Load/Store exclusive instructions are implemented by remembering
4843    the value/address loaded, and seeing if these are the same
4844    when the store is performed.  This should be sufficient to implement
4845    the architecturally mandated semantics, and avoids having to monitor
4846    regular stores.  The compare vs the remembered value is done during
4847    the cmpxchg operation, but we must compare the addresses manually.  */
4848 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4849                                TCGv_i32 addr, int size)
4850 {
4851     TCGv_i32 tmp = tcg_temp_new_i32();
4852     MemOp opc = size | MO_ALIGN | s->be_data;
4853 
4854     s->is_ldex = true;
4855 
4856     if (size == 3) {
4857         TCGv_i32 tmp2 = tcg_temp_new_i32();
4858         TCGv_i64 t64 = tcg_temp_new_i64();
4859 
4860         /*
4861          * For AArch32, architecturally the 32-bit word at the lowest
4862          * address is always Rt and the one at addr+4 is Rt2, even if
4863          * the CPU is big-endian. That means we don't want to do a
4864          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4865          * architecturally 64-bit access, but instead do a 64-bit access
4866          * using MO_BE if appropriate and then split the two halves.
4867          */
4868         TCGv taddr = gen_aa32_addr(s, addr, opc);
4869 
4870         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4871         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4872         if (s->be_data == MO_BE) {
4873             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4874         } else {
4875             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4876         }
4877         store_reg(s, rt2, tmp2);
4878     } else {
4879         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4880         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4881     }
4882 
4883     store_reg(s, rt, tmp);
4884     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4885 }
4886 
4887 static void gen_clrex(DisasContext *s)
4888 {
4889     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4890 }
4891 
4892 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4893                                 TCGv_i32 addr, int size)
4894 {
4895     TCGv_i32 t0, t1, t2;
4896     TCGv_i64 extaddr;
4897     TCGv taddr;
4898     TCGLabel *done_label;
4899     TCGLabel *fail_label;
4900     MemOp opc = size | MO_ALIGN | s->be_data;
4901 
4902     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4903          [addr] = {Rt};
4904          {Rd} = 0;
4905        } else {
4906          {Rd} = 1;
4907        } */
4908     fail_label = gen_new_label();
4909     done_label = gen_new_label();
4910     extaddr = tcg_temp_new_i64();
4911     tcg_gen_extu_i32_i64(extaddr, addr);
4912     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4913 
4914     taddr = gen_aa32_addr(s, addr, opc);
4915     t0 = tcg_temp_new_i32();
4916     t1 = load_reg(s, rt);
4917     if (size == 3) {
4918         TCGv_i64 o64 = tcg_temp_new_i64();
4919         TCGv_i64 n64 = tcg_temp_new_i64();
4920 
4921         t2 = load_reg(s, rt2);
4922 
4923         /*
4924          * For AArch32, architecturally the 32-bit word at the lowest
4925          * address is always Rt and the one at addr+4 is Rt2, even if
4926          * the CPU is big-endian. Since we're going to treat this as a
4927          * single 64-bit BE store, we need to put the two halves in the
4928          * opposite order for BE to LE, so that they end up in the right
4929          * places.  We don't want gen_aa32_st_i64, because that checks
4930          * SCTLR_B as if for an architectural 64-bit access.
4931          */
4932         if (s->be_data == MO_BE) {
4933             tcg_gen_concat_i32_i64(n64, t2, t1);
4934         } else {
4935             tcg_gen_concat_i32_i64(n64, t1, t2);
4936         }
4937 
4938         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4939                                    get_mem_index(s), opc);
4940 
4941         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4942         tcg_gen_extrl_i64_i32(t0, o64);
4943     } else {
4944         t2 = tcg_temp_new_i32();
4945         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4946         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4947         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4948     }
4949     tcg_gen_mov_i32(cpu_R[rd], t0);
4950     tcg_gen_br(done_label);
4951 
4952     gen_set_label(fail_label);
4953     tcg_gen_movi_i32(cpu_R[rd], 1);
4954     gen_set_label(done_label);
4955     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4956 }
4957 
4958 /* gen_srs:
4959  * @env: CPUARMState
4960  * @s: DisasContext
4961  * @mode: mode field from insn (which stack to store to)
4962  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4963  * @writeback: true if writeback bit set
4964  *
4965  * Generate code for the SRS (Store Return State) insn.
4966  */
4967 static void gen_srs(DisasContext *s,
4968                     uint32_t mode, uint32_t amode, bool writeback)
4969 {
4970     int32_t offset;
4971     TCGv_i32 addr, tmp;
4972     bool undef = false;
4973 
4974     /* SRS is:
4975      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4976      *   and specified mode is monitor mode
4977      * - UNDEFINED in Hyp mode
4978      * - UNPREDICTABLE in User or System mode
4979      * - UNPREDICTABLE if the specified mode is:
4980      * -- not implemented
4981      * -- not a valid mode number
4982      * -- a mode that's at a higher exception level
4983      * -- Monitor, if we are Non-secure
4984      * For the UNPREDICTABLE cases we choose to UNDEF.
4985      */
4986     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4987         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4988         return;
4989     }
4990 
4991     if (s->current_el == 0 || s->current_el == 2) {
4992         undef = true;
4993     }
4994 
4995     switch (mode) {
4996     case ARM_CPU_MODE_USR:
4997     case ARM_CPU_MODE_FIQ:
4998     case ARM_CPU_MODE_IRQ:
4999     case ARM_CPU_MODE_SVC:
5000     case ARM_CPU_MODE_ABT:
5001     case ARM_CPU_MODE_UND:
5002     case ARM_CPU_MODE_SYS:
5003         break;
5004     case ARM_CPU_MODE_HYP:
5005         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5006             undef = true;
5007         }
5008         break;
5009     case ARM_CPU_MODE_MON:
5010         /* No need to check specifically for "are we non-secure" because
5011          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5012          * so if this isn't EL3 then we must be non-secure.
5013          */
5014         if (s->current_el != 3) {
5015             undef = true;
5016         }
5017         break;
5018     default:
5019         undef = true;
5020     }
5021 
5022     if (undef) {
5023         unallocated_encoding(s);
5024         return;
5025     }
5026 
5027     addr = tcg_temp_new_i32();
5028     /* get_r13_banked() will raise an exception if called from System mode */
5029     gen_set_condexec(s);
5030     gen_update_pc(s, 0);
5031     gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode));
5032     switch (amode) {
5033     case 0: /* DA */
5034         offset = -4;
5035         break;
5036     case 1: /* IA */
5037         offset = 0;
5038         break;
5039     case 2: /* DB */
5040         offset = -8;
5041         break;
5042     case 3: /* IB */
5043         offset = 4;
5044         break;
5045     default:
5046         g_assert_not_reached();
5047     }
5048     tcg_gen_addi_i32(addr, addr, offset);
5049     tmp = load_reg(s, 14);
5050     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5051     tmp = load_cpu_field(spsr);
5052     tcg_gen_addi_i32(addr, addr, 4);
5053     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5054     if (writeback) {
5055         switch (amode) {
5056         case 0:
5057             offset = -8;
5058             break;
5059         case 1:
5060             offset = 4;
5061             break;
5062         case 2:
5063             offset = -4;
5064             break;
5065         case 3:
5066             offset = 0;
5067             break;
5068         default:
5069             g_assert_not_reached();
5070         }
5071         tcg_gen_addi_i32(addr, addr, offset);
5072         gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr);
5073     }
5074     s->base.is_jmp = DISAS_UPDATE_EXIT;
5075 }
5076 
5077 /* Skip this instruction if the ARM condition is false */
5078 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5079 {
5080     arm_gen_condlabel(s);
5081     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5082 }
5083 
5084 
5085 /*
5086  * Constant expanders used by T16/T32 decode
5087  */
5088 
5089 /* Return only the rotation part of T32ExpandImm.  */
5090 static int t32_expandimm_rot(DisasContext *s, int x)
5091 {
5092     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5093 }
5094 
5095 /* Return the unrotated immediate from T32ExpandImm.  */
5096 static int t32_expandimm_imm(DisasContext *s, int x)
5097 {
5098     int imm = extract32(x, 0, 8);
5099 
5100     switch (extract32(x, 8, 4)) {
5101     case 0: /* XY */
5102         /* Nothing to do.  */
5103         break;
5104     case 1: /* 00XY00XY */
5105         imm *= 0x00010001;
5106         break;
5107     case 2: /* XY00XY00 */
5108         imm *= 0x01000100;
5109         break;
5110     case 3: /* XYXYXYXY */
5111         imm *= 0x01010101;
5112         break;
5113     default:
5114         /* Rotated constant.  */
5115         imm |= 0x80;
5116         break;
5117     }
5118     return imm;
5119 }
5120 
5121 static int t32_branch24(DisasContext *s, int x)
5122 {
5123     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5124     x ^= !(x < 0) * (3 << 21);
5125     /* Append the final zero.  */
5126     return x << 1;
5127 }
5128 
5129 static int t16_setflags(DisasContext *s)
5130 {
5131     return s->condexec_mask == 0;
5132 }
5133 
5134 static int t16_push_list(DisasContext *s, int x)
5135 {
5136     return (x & 0xff) | (x & 0x100) << (14 - 8);
5137 }
5138 
5139 static int t16_pop_list(DisasContext *s, int x)
5140 {
5141     return (x & 0xff) | (x & 0x100) << (15 - 8);
5142 }
5143 
5144 /*
5145  * Include the generated decoders.
5146  */
5147 
5148 #include "decode-a32.c.inc"
5149 #include "decode-a32-uncond.c.inc"
5150 #include "decode-t32.c.inc"
5151 #include "decode-t16.c.inc"
5152 
5153 static bool valid_cp(DisasContext *s, int cp)
5154 {
5155     /*
5156      * Return true if this coprocessor field indicates something
5157      * that's really a possible coprocessor.
5158      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5159      * and of those only cp14 and cp15 were used for registers.
5160      * cp10 and cp11 were used for VFP and Neon, whose decode is
5161      * dealt with elsewhere. With the advent of fp16, cp9 is also
5162      * now part of VFP.
5163      * For v8A and later, the encoding has been tightened so that
5164      * only cp14 and cp15 are valid, and other values aren't considered
5165      * to be in the coprocessor-instruction space at all. v8M still
5166      * permits coprocessors 0..7.
5167      * For XScale, we must not decode the XScale cp0, cp1 space as
5168      * a standard coprocessor insn, because we want to fall through to
5169      * the legacy disas_xscale_insn() decoder after decodetree is done.
5170      */
5171     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5172         return false;
5173     }
5174 
5175     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5176         !arm_dc_feature(s, ARM_FEATURE_M)) {
5177         return cp >= 14;
5178     }
5179     return cp < 8 || cp >= 14;
5180 }
5181 
5182 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5183 {
5184     if (!valid_cp(s, a->cp)) {
5185         return false;
5186     }
5187     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5188                    false, a->rt, 0);
5189     return true;
5190 }
5191 
5192 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5193 {
5194     if (!valid_cp(s, a->cp)) {
5195         return false;
5196     }
5197     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5198                    true, a->rt, 0);
5199     return true;
5200 }
5201 
5202 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5203 {
5204     if (!valid_cp(s, a->cp)) {
5205         return false;
5206     }
5207     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5208                    false, a->rt, a->rt2);
5209     return true;
5210 }
5211 
5212 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5213 {
5214     if (!valid_cp(s, a->cp)) {
5215         return false;
5216     }
5217     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5218                    true, a->rt, a->rt2);
5219     return true;
5220 }
5221 
5222 /* Helpers to swap operands for reverse-subtract.  */
5223 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5224 {
5225     tcg_gen_sub_i32(dst, b, a);
5226 }
5227 
5228 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5229 {
5230     gen_sub_CC(dst, b, a);
5231 }
5232 
5233 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5234 {
5235     gen_sub_carry(dest, b, a);
5236 }
5237 
5238 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5239 {
5240     gen_sbc_CC(dest, b, a);
5241 }
5242 
5243 /*
5244  * Helpers for the data processing routines.
5245  *
5246  * After the computation store the results back.
5247  * This may be suppressed altogether (STREG_NONE), require a runtime
5248  * check against the stack limits (STREG_SP_CHECK), or generate an
5249  * exception return.  Oh, or store into a register.
5250  *
5251  * Always return true, indicating success for a trans_* function.
5252  */
5253 typedef enum {
5254    STREG_NONE,
5255    STREG_NORMAL,
5256    STREG_SP_CHECK,
5257    STREG_EXC_RET,
5258 } StoreRegKind;
5259 
5260 static bool store_reg_kind(DisasContext *s, int rd,
5261                             TCGv_i32 val, StoreRegKind kind)
5262 {
5263     switch (kind) {
5264     case STREG_NONE:
5265         return true;
5266     case STREG_NORMAL:
5267         /* See ALUWritePC: Interworking only from a32 mode. */
5268         if (s->thumb) {
5269             store_reg(s, rd, val);
5270         } else {
5271             store_reg_bx(s, rd, val);
5272         }
5273         return true;
5274     case STREG_SP_CHECK:
5275         store_sp_checked(s, val);
5276         return true;
5277     case STREG_EXC_RET:
5278         gen_exception_return(s, val);
5279         return true;
5280     }
5281     g_assert_not_reached();
5282 }
5283 
5284 /*
5285  * Data Processing (register)
5286  *
5287  * Operate, with set flags, one register source,
5288  * one immediate shifted register source, and a destination.
5289  */
5290 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5291                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5292                          int logic_cc, StoreRegKind kind)
5293 {
5294     TCGv_i32 tmp1, tmp2;
5295 
5296     tmp2 = load_reg(s, a->rm);
5297     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5298     tmp1 = load_reg(s, a->rn);
5299 
5300     gen(tmp1, tmp1, tmp2);
5301 
5302     if (logic_cc) {
5303         gen_logic_CC(tmp1);
5304     }
5305     return store_reg_kind(s, a->rd, tmp1, kind);
5306 }
5307 
5308 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5309                          void (*gen)(TCGv_i32, TCGv_i32),
5310                          int logic_cc, StoreRegKind kind)
5311 {
5312     TCGv_i32 tmp;
5313 
5314     tmp = load_reg(s, a->rm);
5315     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5316 
5317     gen(tmp, tmp);
5318     if (logic_cc) {
5319         gen_logic_CC(tmp);
5320     }
5321     return store_reg_kind(s, a->rd, tmp, kind);
5322 }
5323 
5324 /*
5325  * Data-processing (register-shifted register)
5326  *
5327  * Operate, with set flags, one register source,
5328  * one register shifted register source, and a destination.
5329  */
5330 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5331                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5332                          int logic_cc, StoreRegKind kind)
5333 {
5334     TCGv_i32 tmp1, tmp2;
5335 
5336     tmp1 = load_reg(s, a->rs);
5337     tmp2 = load_reg(s, a->rm);
5338     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5339     tmp1 = load_reg(s, a->rn);
5340 
5341     gen(tmp1, tmp1, tmp2);
5342 
5343     if (logic_cc) {
5344         gen_logic_CC(tmp1);
5345     }
5346     return store_reg_kind(s, a->rd, tmp1, kind);
5347 }
5348 
5349 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5350                          void (*gen)(TCGv_i32, TCGv_i32),
5351                          int logic_cc, StoreRegKind kind)
5352 {
5353     TCGv_i32 tmp1, tmp2;
5354 
5355     tmp1 = load_reg(s, a->rs);
5356     tmp2 = load_reg(s, a->rm);
5357     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5358 
5359     gen(tmp2, tmp2);
5360     if (logic_cc) {
5361         gen_logic_CC(tmp2);
5362     }
5363     return store_reg_kind(s, a->rd, tmp2, kind);
5364 }
5365 
5366 /*
5367  * Data-processing (immediate)
5368  *
5369  * Operate, with set flags, one register source,
5370  * one rotated immediate, and a destination.
5371  *
5372  * Note that logic_cc && a->rot setting CF based on the msb of the
5373  * immediate is the reason why we must pass in the unrotated form
5374  * of the immediate.
5375  */
5376 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5377                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5378                          int logic_cc, StoreRegKind kind)
5379 {
5380     TCGv_i32 tmp1;
5381     uint32_t imm;
5382 
5383     imm = ror32(a->imm, a->rot);
5384     if (logic_cc && a->rot) {
5385         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5386     }
5387     tmp1 = load_reg(s, a->rn);
5388 
5389     gen(tmp1, tmp1, tcg_constant_i32(imm));
5390 
5391     if (logic_cc) {
5392         gen_logic_CC(tmp1);
5393     }
5394     return store_reg_kind(s, a->rd, tmp1, kind);
5395 }
5396 
5397 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5398                          void (*gen)(TCGv_i32, TCGv_i32),
5399                          int logic_cc, StoreRegKind kind)
5400 {
5401     TCGv_i32 tmp;
5402     uint32_t imm;
5403 
5404     imm = ror32(a->imm, a->rot);
5405     if (logic_cc && a->rot) {
5406         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5407     }
5408 
5409     tmp = tcg_temp_new_i32();
5410     gen(tmp, tcg_constant_i32(imm));
5411 
5412     if (logic_cc) {
5413         gen_logic_CC(tmp);
5414     }
5415     return store_reg_kind(s, a->rd, tmp, kind);
5416 }
5417 
5418 #define DO_ANY3(NAME, OP, L, K)                                         \
5419     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5420     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5421     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5422     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5423     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5424     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5425 
5426 #define DO_ANY2(NAME, OP, L, K)                                         \
5427     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5428     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5429     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5430     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5431     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5432     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5433 
5434 #define DO_CMP2(NAME, OP, L)                                            \
5435     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5436     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5437     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5438     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5439     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5440     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5441 
5442 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5443 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5444 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5445 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5446 
5447 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5448 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5449 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5450 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5451 
5452 DO_CMP2(TST, tcg_gen_and_i32, true)
5453 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5454 DO_CMP2(CMN, gen_add_CC, false)
5455 DO_CMP2(CMP, gen_sub_CC, false)
5456 
5457 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5458         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5459 
5460 /*
5461  * Note for the computation of StoreRegKind we return out of the
5462  * middle of the functions that are expanded by DO_ANY3, and that
5463  * we modify a->s via that parameter before it is used by OP.
5464  */
5465 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5466         ({
5467             StoreRegKind ret = STREG_NORMAL;
5468             if (a->rd == 15 && a->s) {
5469                 /*
5470                  * See ALUExceptionReturn:
5471                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5472                  * In Hyp mode, UNDEFINED.
5473                  */
5474                 if (IS_USER(s) || s->current_el == 2) {
5475                     unallocated_encoding(s);
5476                     return true;
5477                 }
5478                 /* There is no writeback of nzcv to PSTATE.  */
5479                 a->s = 0;
5480                 ret = STREG_EXC_RET;
5481             } else if (a->rd == 13 && a->rn == 13) {
5482                 ret = STREG_SP_CHECK;
5483             }
5484             ret;
5485         }))
5486 
5487 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5488         ({
5489             StoreRegKind ret = STREG_NORMAL;
5490             if (a->rd == 15 && a->s) {
5491                 /*
5492                  * See ALUExceptionReturn:
5493                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5494                  * In Hyp mode, UNDEFINED.
5495                  */
5496                 if (IS_USER(s) || s->current_el == 2) {
5497                     unallocated_encoding(s);
5498                     return true;
5499                 }
5500                 /* There is no writeback of nzcv to PSTATE.  */
5501                 a->s = 0;
5502                 ret = STREG_EXC_RET;
5503             } else if (a->rd == 13) {
5504                 ret = STREG_SP_CHECK;
5505             }
5506             ret;
5507         }))
5508 
5509 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5510 
5511 /*
5512  * ORN is only available with T32, so there is no register-shifted-register
5513  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5514  */
5515 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5516 {
5517     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5518 }
5519 
5520 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5521 {
5522     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5523 }
5524 
5525 #undef DO_ANY3
5526 #undef DO_ANY2
5527 #undef DO_CMP2
5528 
5529 static bool trans_ADR(DisasContext *s, arg_ri *a)
5530 {
5531     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5532     return true;
5533 }
5534 
5535 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5536 {
5537     if (!ENABLE_ARCH_6T2) {
5538         return false;
5539     }
5540 
5541     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5542     return true;
5543 }
5544 
5545 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5546 {
5547     TCGv_i32 tmp;
5548 
5549     if (!ENABLE_ARCH_6T2) {
5550         return false;
5551     }
5552 
5553     tmp = load_reg(s, a->rd);
5554     tcg_gen_ext16u_i32(tmp, tmp);
5555     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5556     store_reg(s, a->rd, tmp);
5557     return true;
5558 }
5559 
5560 /*
5561  * v8.1M MVE wide-shifts
5562  */
5563 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5564                           WideShiftImmFn *fn)
5565 {
5566     TCGv_i64 rda;
5567     TCGv_i32 rdalo, rdahi;
5568 
5569     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5570         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5571         return false;
5572     }
5573     if (a->rdahi == 15) {
5574         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5575         return false;
5576     }
5577     if (!dc_isar_feature(aa32_mve, s) ||
5578         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5579         a->rdahi == 13) {
5580         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5581         unallocated_encoding(s);
5582         return true;
5583     }
5584 
5585     if (a->shim == 0) {
5586         a->shim = 32;
5587     }
5588 
5589     rda = tcg_temp_new_i64();
5590     rdalo = load_reg(s, a->rdalo);
5591     rdahi = load_reg(s, a->rdahi);
5592     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5593 
5594     fn(rda, rda, a->shim);
5595 
5596     tcg_gen_extrl_i64_i32(rdalo, rda);
5597     tcg_gen_extrh_i64_i32(rdahi, rda);
5598     store_reg(s, a->rdalo, rdalo);
5599     store_reg(s, a->rdahi, rdahi);
5600 
5601     return true;
5602 }
5603 
5604 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5605 {
5606     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5607 }
5608 
5609 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5610 {
5611     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5612 }
5613 
5614 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5615 {
5616     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5617 }
5618 
5619 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5620 {
5621     gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift));
5622 }
5623 
5624 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5625 {
5626     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5627 }
5628 
5629 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5630 {
5631     gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift));
5632 }
5633 
5634 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5635 {
5636     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5637 }
5638 
5639 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5640 {
5641     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5642 }
5643 
5644 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5645 {
5646     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5647 }
5648 
5649 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5650 {
5651     TCGv_i64 rda;
5652     TCGv_i32 rdalo, rdahi;
5653 
5654     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5655         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5656         return false;
5657     }
5658     if (a->rdahi == 15) {
5659         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5660         return false;
5661     }
5662     if (!dc_isar_feature(aa32_mve, s) ||
5663         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5664         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5665         a->rm == a->rdahi || a->rm == a->rdalo) {
5666         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5667         unallocated_encoding(s);
5668         return true;
5669     }
5670 
5671     rda = tcg_temp_new_i64();
5672     rdalo = load_reg(s, a->rdalo);
5673     rdahi = load_reg(s, a->rdahi);
5674     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5675 
5676     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5677     fn(rda, tcg_env, rda, cpu_R[a->rm]);
5678 
5679     tcg_gen_extrl_i64_i32(rdalo, rda);
5680     tcg_gen_extrh_i64_i32(rdahi, rda);
5681     store_reg(s, a->rdalo, rdalo);
5682     store_reg(s, a->rdahi, rdahi);
5683 
5684     return true;
5685 }
5686 
5687 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5688 {
5689     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5690 }
5691 
5692 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5693 {
5694     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5695 }
5696 
5697 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5698 {
5699     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5700 }
5701 
5702 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5703 {
5704     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5705 }
5706 
5707 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5708 {
5709     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5710 }
5711 
5712 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5713 {
5714     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5715 }
5716 
5717 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5718 {
5719     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5720         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5721         return false;
5722     }
5723     if (!dc_isar_feature(aa32_mve, s) ||
5724         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5725         a->rda == 13 || a->rda == 15) {
5726         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5727         unallocated_encoding(s);
5728         return true;
5729     }
5730 
5731     if (a->shim == 0) {
5732         a->shim = 32;
5733     }
5734     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5735 
5736     return true;
5737 }
5738 
5739 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5740 {
5741     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5742 }
5743 
5744 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5745 {
5746     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5747 }
5748 
5749 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5750 {
5751     gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift));
5752 }
5753 
5754 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5755 {
5756     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5757 }
5758 
5759 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5760 {
5761     gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift));
5762 }
5763 
5764 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5765 {
5766     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5767 }
5768 
5769 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5770 {
5771     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5772         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5773         return false;
5774     }
5775     if (!dc_isar_feature(aa32_mve, s) ||
5776         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5777         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5778         a->rm == a->rda) {
5779         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5780         unallocated_encoding(s);
5781         return true;
5782     }
5783 
5784     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5785     fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]);
5786     return true;
5787 }
5788 
5789 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5790 {
5791     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5792 }
5793 
5794 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5795 {
5796     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5797 }
5798 
5799 /*
5800  * Multiply and multiply accumulate
5801  */
5802 
5803 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5804 {
5805     TCGv_i32 t1, t2;
5806 
5807     t1 = load_reg(s, a->rn);
5808     t2 = load_reg(s, a->rm);
5809     tcg_gen_mul_i32(t1, t1, t2);
5810     if (add) {
5811         t2 = load_reg(s, a->ra);
5812         tcg_gen_add_i32(t1, t1, t2);
5813     }
5814     if (a->s) {
5815         gen_logic_CC(t1);
5816     }
5817     store_reg(s, a->rd, t1);
5818     return true;
5819 }
5820 
5821 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5822 {
5823     return op_mla(s, a, false);
5824 }
5825 
5826 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5827 {
5828     return op_mla(s, a, true);
5829 }
5830 
5831 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5832 {
5833     TCGv_i32 t1, t2;
5834 
5835     if (!ENABLE_ARCH_6T2) {
5836         return false;
5837     }
5838     t1 = load_reg(s, a->rn);
5839     t2 = load_reg(s, a->rm);
5840     tcg_gen_mul_i32(t1, t1, t2);
5841     t2 = load_reg(s, a->ra);
5842     tcg_gen_sub_i32(t1, t2, t1);
5843     store_reg(s, a->rd, t1);
5844     return true;
5845 }
5846 
5847 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5848 {
5849     TCGv_i32 t0, t1, t2, t3;
5850 
5851     t0 = load_reg(s, a->rm);
5852     t1 = load_reg(s, a->rn);
5853     if (uns) {
5854         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5855     } else {
5856         tcg_gen_muls2_i32(t0, t1, t0, t1);
5857     }
5858     if (add) {
5859         t2 = load_reg(s, a->ra);
5860         t3 = load_reg(s, a->rd);
5861         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5862     }
5863     if (a->s) {
5864         gen_logicq_cc(t0, t1);
5865     }
5866     store_reg(s, a->ra, t0);
5867     store_reg(s, a->rd, t1);
5868     return true;
5869 }
5870 
5871 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5872 {
5873     return op_mlal(s, a, true, false);
5874 }
5875 
5876 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5877 {
5878     return op_mlal(s, a, false, false);
5879 }
5880 
5881 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5882 {
5883     return op_mlal(s, a, true, true);
5884 }
5885 
5886 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5887 {
5888     return op_mlal(s, a, false, true);
5889 }
5890 
5891 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5892 {
5893     TCGv_i32 t0, t1, t2, zero;
5894 
5895     if (s->thumb
5896         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5897         : !ENABLE_ARCH_6) {
5898         return false;
5899     }
5900 
5901     t0 = load_reg(s, a->rm);
5902     t1 = load_reg(s, a->rn);
5903     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5904     zero = tcg_constant_i32(0);
5905     t2 = load_reg(s, a->ra);
5906     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5907     t2 = load_reg(s, a->rd);
5908     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5909     store_reg(s, a->ra, t0);
5910     store_reg(s, a->rd, t1);
5911     return true;
5912 }
5913 
5914 /*
5915  * Saturating addition and subtraction
5916  */
5917 
5918 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5919 {
5920     TCGv_i32 t0, t1;
5921 
5922     if (s->thumb
5923         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5924         : !ENABLE_ARCH_5TE) {
5925         return false;
5926     }
5927 
5928     t0 = load_reg(s, a->rm);
5929     t1 = load_reg(s, a->rn);
5930     if (doub) {
5931         gen_helper_add_saturate(t1, tcg_env, t1, t1);
5932     }
5933     if (add) {
5934         gen_helper_add_saturate(t0, tcg_env, t0, t1);
5935     } else {
5936         gen_helper_sub_saturate(t0, tcg_env, t0, t1);
5937     }
5938     store_reg(s, a->rd, t0);
5939     return true;
5940 }
5941 
5942 #define DO_QADDSUB(NAME, ADD, DOUB) \
5943 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5944 {                                                        \
5945     return op_qaddsub(s, a, ADD, DOUB);                  \
5946 }
5947 
5948 DO_QADDSUB(QADD, true, false)
5949 DO_QADDSUB(QSUB, false, false)
5950 DO_QADDSUB(QDADD, true, true)
5951 DO_QADDSUB(QDSUB, false, true)
5952 
5953 #undef DO_QADDSUB
5954 
5955 /*
5956  * Halfword multiply and multiply accumulate
5957  */
5958 
5959 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5960                        int add_long, bool nt, bool mt)
5961 {
5962     TCGv_i32 t0, t1, tl, th;
5963 
5964     if (s->thumb
5965         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5966         : !ENABLE_ARCH_5TE) {
5967         return false;
5968     }
5969 
5970     t0 = load_reg(s, a->rn);
5971     t1 = load_reg(s, a->rm);
5972     gen_mulxy(t0, t1, nt, mt);
5973 
5974     switch (add_long) {
5975     case 0:
5976         store_reg(s, a->rd, t0);
5977         break;
5978     case 1:
5979         t1 = load_reg(s, a->ra);
5980         gen_helper_add_setq(t0, tcg_env, t0, t1);
5981         store_reg(s, a->rd, t0);
5982         break;
5983     case 2:
5984         tl = load_reg(s, a->ra);
5985         th = load_reg(s, a->rd);
5986         /* Sign-extend the 32-bit product to 64 bits.  */
5987         t1 = tcg_temp_new_i32();
5988         tcg_gen_sari_i32(t1, t0, 31);
5989         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5990         store_reg(s, a->ra, tl);
5991         store_reg(s, a->rd, th);
5992         break;
5993     default:
5994         g_assert_not_reached();
5995     }
5996     return true;
5997 }
5998 
5999 #define DO_SMLAX(NAME, add, nt, mt) \
6000 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6001 {                                                          \
6002     return op_smlaxxx(s, a, add, nt, mt);                  \
6003 }
6004 
6005 DO_SMLAX(SMULBB, 0, 0, 0)
6006 DO_SMLAX(SMULBT, 0, 0, 1)
6007 DO_SMLAX(SMULTB, 0, 1, 0)
6008 DO_SMLAX(SMULTT, 0, 1, 1)
6009 
6010 DO_SMLAX(SMLABB, 1, 0, 0)
6011 DO_SMLAX(SMLABT, 1, 0, 1)
6012 DO_SMLAX(SMLATB, 1, 1, 0)
6013 DO_SMLAX(SMLATT, 1, 1, 1)
6014 
6015 DO_SMLAX(SMLALBB, 2, 0, 0)
6016 DO_SMLAX(SMLALBT, 2, 0, 1)
6017 DO_SMLAX(SMLALTB, 2, 1, 0)
6018 DO_SMLAX(SMLALTT, 2, 1, 1)
6019 
6020 #undef DO_SMLAX
6021 
6022 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6023 {
6024     TCGv_i32 t0, t1;
6025 
6026     if (!ENABLE_ARCH_5TE) {
6027         return false;
6028     }
6029 
6030     t0 = load_reg(s, a->rn);
6031     t1 = load_reg(s, a->rm);
6032     /*
6033      * Since the nominal result is product<47:16>, shift the 16-bit
6034      * input up by 16 bits, so that the result is at product<63:32>.
6035      */
6036     if (mt) {
6037         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6038     } else {
6039         tcg_gen_shli_i32(t1, t1, 16);
6040     }
6041     tcg_gen_muls2_i32(t0, t1, t0, t1);
6042     if (add) {
6043         t0 = load_reg(s, a->ra);
6044         gen_helper_add_setq(t1, tcg_env, t1, t0);
6045     }
6046     store_reg(s, a->rd, t1);
6047     return true;
6048 }
6049 
6050 #define DO_SMLAWX(NAME, add, mt) \
6051 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6052 {                                                          \
6053     return op_smlawx(s, a, add, mt);                       \
6054 }
6055 
6056 DO_SMLAWX(SMULWB, 0, 0)
6057 DO_SMLAWX(SMULWT, 0, 1)
6058 DO_SMLAWX(SMLAWB, 1, 0)
6059 DO_SMLAWX(SMLAWT, 1, 1)
6060 
6061 #undef DO_SMLAWX
6062 
6063 /*
6064  * MSR (immediate) and hints
6065  */
6066 
6067 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6068 {
6069     /*
6070      * When running single-threaded TCG code, use the helper to ensure that
6071      * the next round-robin scheduled vCPU gets a crack.  When running in
6072      * MTTCG we don't generate jumps to the helper as it won't affect the
6073      * scheduling of other vCPUs.
6074      */
6075     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6076         gen_update_pc(s, curr_insn_len(s));
6077         s->base.is_jmp = DISAS_YIELD;
6078     }
6079     return true;
6080 }
6081 
6082 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6083 {
6084     /*
6085      * When running single-threaded TCG code, use the helper to ensure that
6086      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6087      * just skip this instruction.  Currently the SEV/SEVL instructions,
6088      * which are *one* of many ways to wake the CPU from WFE, are not
6089      * implemented so we can't sleep like WFI does.
6090      */
6091     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6092         gen_update_pc(s, curr_insn_len(s));
6093         s->base.is_jmp = DISAS_WFE;
6094     }
6095     return true;
6096 }
6097 
6098 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6099 {
6100     /* For WFI, halt the vCPU until an IRQ. */
6101     gen_update_pc(s, curr_insn_len(s));
6102     s->base.is_jmp = DISAS_WFI;
6103     return true;
6104 }
6105 
6106 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6107 {
6108     /*
6109      * For M-profile, minimal-RAS ESB can be a NOP.
6110      * Without RAS, we must implement this as NOP.
6111      */
6112     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6113         /*
6114          * QEMU does not have a source of physical SErrors,
6115          * so we are only concerned with virtual SErrors.
6116          * The pseudocode in the ARM for this case is
6117          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6118          *      AArch32.vESBOperation();
6119          * Most of the condition can be evaluated at translation time.
6120          * Test for EL2 present, and defer test for SEL2 to runtime.
6121          */
6122         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6123             gen_helper_vesb(tcg_env);
6124         }
6125     }
6126     return true;
6127 }
6128 
6129 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6130 {
6131     return true;
6132 }
6133 
6134 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6135 {
6136     uint32_t val = ror32(a->imm, a->rot * 2);
6137     uint32_t mask = msr_mask(s, a->mask, a->r);
6138 
6139     if (gen_set_psr_im(s, mask, a->r, val)) {
6140         unallocated_encoding(s);
6141     }
6142     return true;
6143 }
6144 
6145 /*
6146  * Cyclic Redundancy Check
6147  */
6148 
6149 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6150 {
6151     TCGv_i32 t1, t2, t3;
6152 
6153     if (!dc_isar_feature(aa32_crc32, s)) {
6154         return false;
6155     }
6156 
6157     t1 = load_reg(s, a->rn);
6158     t2 = load_reg(s, a->rm);
6159     switch (sz) {
6160     case MO_8:
6161         gen_uxtb(t2);
6162         break;
6163     case MO_16:
6164         gen_uxth(t2);
6165         break;
6166     case MO_32:
6167         break;
6168     default:
6169         g_assert_not_reached();
6170     }
6171     t3 = tcg_constant_i32(1 << sz);
6172     if (c) {
6173         gen_helper_crc32c(t1, t1, t2, t3);
6174     } else {
6175         gen_helper_crc32(t1, t1, t2, t3);
6176     }
6177     store_reg(s, a->rd, t1);
6178     return true;
6179 }
6180 
6181 #define DO_CRC32(NAME, c, sz) \
6182 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6183     { return op_crc32(s, a, c, sz); }
6184 
6185 DO_CRC32(CRC32B, false, MO_8)
6186 DO_CRC32(CRC32H, false, MO_16)
6187 DO_CRC32(CRC32W, false, MO_32)
6188 DO_CRC32(CRC32CB, true, MO_8)
6189 DO_CRC32(CRC32CH, true, MO_16)
6190 DO_CRC32(CRC32CW, true, MO_32)
6191 
6192 #undef DO_CRC32
6193 
6194 /*
6195  * Miscellaneous instructions
6196  */
6197 
6198 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6199 {
6200     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6201         return false;
6202     }
6203     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6204     return true;
6205 }
6206 
6207 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6208 {
6209     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6210         return false;
6211     }
6212     gen_msr_banked(s, a->r, a->sysm, a->rn);
6213     return true;
6214 }
6215 
6216 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6217 {
6218     TCGv_i32 tmp;
6219 
6220     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6221         return false;
6222     }
6223     if (a->r) {
6224         if (IS_USER(s)) {
6225             unallocated_encoding(s);
6226             return true;
6227         }
6228         tmp = load_cpu_field(spsr);
6229     } else {
6230         tmp = tcg_temp_new_i32();
6231         gen_helper_cpsr_read(tmp, tcg_env);
6232     }
6233     store_reg(s, a->rd, tmp);
6234     return true;
6235 }
6236 
6237 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6238 {
6239     TCGv_i32 tmp;
6240     uint32_t mask = msr_mask(s, a->mask, a->r);
6241 
6242     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6243         return false;
6244     }
6245     tmp = load_reg(s, a->rn);
6246     if (gen_set_psr(s, mask, a->r, tmp)) {
6247         unallocated_encoding(s);
6248     }
6249     return true;
6250 }
6251 
6252 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6253 {
6254     TCGv_i32 tmp;
6255 
6256     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6257         return false;
6258     }
6259     tmp = tcg_temp_new_i32();
6260     gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm));
6261     store_reg(s, a->rd, tmp);
6262     return true;
6263 }
6264 
6265 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6266 {
6267     TCGv_i32 addr, reg;
6268 
6269     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6270         return false;
6271     }
6272     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6273     reg = load_reg(s, a->rn);
6274     gen_helper_v7m_msr(tcg_env, addr, reg);
6275     /* If we wrote to CONTROL, the EL might have changed */
6276     gen_rebuild_hflags(s, true);
6277     gen_lookup_tb(s);
6278     return true;
6279 }
6280 
6281 static bool trans_BX(DisasContext *s, arg_BX *a)
6282 {
6283     if (!ENABLE_ARCH_4T) {
6284         return false;
6285     }
6286     gen_bx_excret(s, load_reg(s, a->rm));
6287     return true;
6288 }
6289 
6290 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6291 {
6292     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6293         return false;
6294     }
6295     /*
6296      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6297      * TBFLAGS bit on a basically-never-happens case, so call a helper
6298      * function to check for the trap and raise the exception if needed
6299      * (passing it the register number for the syndrome value).
6300      * v8A doesn't have this HSTR bit.
6301      */
6302     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6303         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6304         s->current_el < 2 && s->ns) {
6305         gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm));
6306     }
6307     /* Trivial implementation equivalent to bx.  */
6308     gen_bx(s, load_reg(s, a->rm));
6309     return true;
6310 }
6311 
6312 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6313 {
6314     TCGv_i32 tmp;
6315 
6316     if (!ENABLE_ARCH_5) {
6317         return false;
6318     }
6319     tmp = load_reg(s, a->rm);
6320     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6321     gen_bx(s, tmp);
6322     return true;
6323 }
6324 
6325 /*
6326  * BXNS/BLXNS: only exist for v8M with the security extensions,
6327  * and always UNDEF if NonSecure.  We don't implement these in
6328  * the user-only mode either (in theory you can use them from
6329  * Secure User mode but they are too tied in to system emulation).
6330  */
6331 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6332 {
6333     if (!s->v8m_secure || IS_USER_ONLY) {
6334         unallocated_encoding(s);
6335     } else {
6336         gen_bxns(s, a->rm);
6337     }
6338     return true;
6339 }
6340 
6341 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6342 {
6343     if (!s->v8m_secure || IS_USER_ONLY) {
6344         unallocated_encoding(s);
6345     } else {
6346         gen_blxns(s, a->rm);
6347     }
6348     return true;
6349 }
6350 
6351 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6352 {
6353     TCGv_i32 tmp;
6354 
6355     if (!ENABLE_ARCH_5) {
6356         return false;
6357     }
6358     tmp = load_reg(s, a->rm);
6359     tcg_gen_clzi_i32(tmp, tmp, 32);
6360     store_reg(s, a->rd, tmp);
6361     return true;
6362 }
6363 
6364 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6365 {
6366     TCGv_i32 tmp;
6367 
6368     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6369         return false;
6370     }
6371     if (IS_USER(s)) {
6372         unallocated_encoding(s);
6373         return true;
6374     }
6375     if (s->current_el == 2) {
6376         /* ERET from Hyp uses ELR_Hyp, not LR */
6377         tmp = load_cpu_field_low32(elr_el[2]);
6378     } else {
6379         tmp = load_reg(s, 14);
6380     }
6381     gen_exception_return(s, tmp);
6382     return true;
6383 }
6384 
6385 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6386 {
6387     gen_hlt(s, a->imm);
6388     return true;
6389 }
6390 
6391 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6392 {
6393     if (!ENABLE_ARCH_5) {
6394         return false;
6395     }
6396     /* BKPT is OK with ECI set and leaves it untouched */
6397     s->eci_handled = true;
6398     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6399         semihosting_enabled(s->current_el == 0) &&
6400         (a->imm == 0xab)) {
6401         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6402     } else {
6403         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6404     }
6405     return true;
6406 }
6407 
6408 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6409 {
6410     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6411         return false;
6412     }
6413     if (IS_USER(s)) {
6414         unallocated_encoding(s);
6415     } else {
6416         gen_hvc(s, a->imm);
6417     }
6418     return true;
6419 }
6420 
6421 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6422 {
6423     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6424         return false;
6425     }
6426     if (IS_USER(s)) {
6427         unallocated_encoding(s);
6428     } else {
6429         gen_smc(s);
6430     }
6431     return true;
6432 }
6433 
6434 static bool trans_SG(DisasContext *s, arg_SG *a)
6435 {
6436     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6437         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6438         return false;
6439     }
6440     /*
6441      * SG (v8M only)
6442      * The bulk of the behaviour for this instruction is implemented
6443      * in v7m_handle_execute_nsc(), which deals with the insn when
6444      * it is executed by a CPU in non-secure state from memory
6445      * which is Secure & NonSecure-Callable.
6446      * Here we only need to handle the remaining cases:
6447      *  * in NS memory (including the "security extension not
6448      *    implemented" case) : NOP
6449      *  * in S memory but CPU already secure (clear IT bits)
6450      * We know that the attribute for the memory this insn is
6451      * in must match the current CPU state, because otherwise
6452      * get_phys_addr_pmsav8 would have generated an exception.
6453      */
6454     if (s->v8m_secure) {
6455         /* Like the IT insn, we don't need to generate any code */
6456         s->condexec_cond = 0;
6457         s->condexec_mask = 0;
6458     }
6459     return true;
6460 }
6461 
6462 static bool trans_TT(DisasContext *s, arg_TT *a)
6463 {
6464     TCGv_i32 addr, tmp;
6465 
6466     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6467         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6468         return false;
6469     }
6470     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6471         /* We UNDEF for these UNPREDICTABLE cases */
6472         unallocated_encoding(s);
6473         return true;
6474     }
6475     if (a->A && !s->v8m_secure) {
6476         /* This case is UNDEFINED.  */
6477         unallocated_encoding(s);
6478         return true;
6479     }
6480 
6481     addr = load_reg(s, a->rn);
6482     tmp = tcg_temp_new_i32();
6483     gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6484     store_reg(s, a->rd, tmp);
6485     return true;
6486 }
6487 
6488 /*
6489  * Load/store register index
6490  */
6491 
6492 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6493 {
6494     ISSInfo ret;
6495 
6496     /* ISS not valid if writeback */
6497     if (p && !w) {
6498         ret = rd;
6499         if (curr_insn_len(s) == 2) {
6500             ret |= ISSIs16Bit;
6501         }
6502     } else {
6503         ret = ISSInvalid;
6504     }
6505     return ret;
6506 }
6507 
6508 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6509 {
6510     TCGv_i32 addr = load_reg(s, a->rn);
6511 
6512     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6513         gen_helper_v8m_stackcheck(tcg_env, addr);
6514     }
6515 
6516     if (a->p) {
6517         TCGv_i32 ofs = load_reg(s, a->rm);
6518         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6519         if (a->u) {
6520             tcg_gen_add_i32(addr, addr, ofs);
6521         } else {
6522             tcg_gen_sub_i32(addr, addr, ofs);
6523         }
6524     }
6525     return addr;
6526 }
6527 
6528 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6529                             TCGv_i32 addr, int address_offset)
6530 {
6531     if (!a->p) {
6532         TCGv_i32 ofs = load_reg(s, a->rm);
6533         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6534         if (a->u) {
6535             tcg_gen_add_i32(addr, addr, ofs);
6536         } else {
6537             tcg_gen_sub_i32(addr, addr, ofs);
6538         }
6539     } else if (!a->w) {
6540         return;
6541     }
6542     tcg_gen_addi_i32(addr, addr, address_offset);
6543     store_reg(s, a->rn, addr);
6544 }
6545 
6546 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6547                        MemOp mop, int mem_idx)
6548 {
6549     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6550     TCGv_i32 addr, tmp;
6551 
6552     addr = op_addr_rr_pre(s, a);
6553 
6554     tmp = tcg_temp_new_i32();
6555     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6556     disas_set_da_iss(s, mop, issinfo);
6557 
6558     /*
6559      * Perform base writeback before the loaded value to
6560      * ensure correct behavior with overlapping index registers.
6561      */
6562     op_addr_rr_post(s, a, addr, 0);
6563     store_reg_from_load(s, a->rt, tmp);
6564     return true;
6565 }
6566 
6567 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6568                         MemOp mop, int mem_idx)
6569 {
6570     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6571     TCGv_i32 addr, tmp;
6572 
6573     /*
6574      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6575      * is either UNPREDICTABLE or has defined behaviour
6576      */
6577     if (s->thumb && a->rn == 15) {
6578         return false;
6579     }
6580 
6581     addr = op_addr_rr_pre(s, a);
6582 
6583     tmp = load_reg(s, a->rt);
6584     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6585     disas_set_da_iss(s, mop, issinfo);
6586 
6587     op_addr_rr_post(s, a, addr, 0);
6588     return true;
6589 }
6590 
6591 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6592 {
6593     int mem_idx = get_mem_index(s);
6594     TCGv_i32 addr, tmp;
6595 
6596     if (!ENABLE_ARCH_5TE) {
6597         return false;
6598     }
6599     if (a->rt & 1) {
6600         unallocated_encoding(s);
6601         return true;
6602     }
6603     addr = op_addr_rr_pre(s, a);
6604 
6605     tmp = tcg_temp_new_i32();
6606     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6607     store_reg(s, a->rt, tmp);
6608 
6609     tcg_gen_addi_i32(addr, addr, 4);
6610 
6611     tmp = tcg_temp_new_i32();
6612     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6613     store_reg(s, a->rt + 1, tmp);
6614 
6615     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6616     op_addr_rr_post(s, a, addr, -4);
6617     return true;
6618 }
6619 
6620 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6621 {
6622     int mem_idx = get_mem_index(s);
6623     TCGv_i32 addr, tmp;
6624 
6625     if (!ENABLE_ARCH_5TE) {
6626         return false;
6627     }
6628     if (a->rt & 1) {
6629         unallocated_encoding(s);
6630         return true;
6631     }
6632     addr = op_addr_rr_pre(s, a);
6633 
6634     tmp = load_reg(s, a->rt);
6635     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6636 
6637     tcg_gen_addi_i32(addr, addr, 4);
6638 
6639     tmp = load_reg(s, a->rt + 1);
6640     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6641 
6642     op_addr_rr_post(s, a, addr, -4);
6643     return true;
6644 }
6645 
6646 /*
6647  * Load/store immediate index
6648  */
6649 
6650 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6651 {
6652     int ofs = a->imm;
6653 
6654     if (!a->u) {
6655         ofs = -ofs;
6656     }
6657 
6658     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6659         /*
6660          * Stackcheck. Here we know 'addr' is the current SP;
6661          * U is set if we're moving SP up, else down. It is
6662          * UNKNOWN whether the limit check triggers when SP starts
6663          * below the limit and ends up above it; we chose to do so.
6664          */
6665         if (!a->u) {
6666             TCGv_i32 newsp = tcg_temp_new_i32();
6667             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6668             gen_helper_v8m_stackcheck(tcg_env, newsp);
6669         } else {
6670             gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]);
6671         }
6672     }
6673 
6674     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6675 }
6676 
6677 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6678                             TCGv_i32 addr, int address_offset)
6679 {
6680     if (!a->p) {
6681         if (a->u) {
6682             address_offset += a->imm;
6683         } else {
6684             address_offset -= a->imm;
6685         }
6686     } else if (!a->w) {
6687         return;
6688     }
6689     tcg_gen_addi_i32(addr, addr, address_offset);
6690     store_reg(s, a->rn, addr);
6691 }
6692 
6693 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6694                        MemOp mop, int mem_idx)
6695 {
6696     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6697     TCGv_i32 addr, tmp;
6698 
6699     addr = op_addr_ri_pre(s, a);
6700 
6701     tmp = tcg_temp_new_i32();
6702     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6703     disas_set_da_iss(s, mop, issinfo);
6704 
6705     /*
6706      * Perform base writeback before the loaded value to
6707      * ensure correct behavior with overlapping index registers.
6708      */
6709     op_addr_ri_post(s, a, addr, 0);
6710     store_reg_from_load(s, a->rt, tmp);
6711     return true;
6712 }
6713 
6714 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6715                         MemOp mop, int mem_idx)
6716 {
6717     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6718     TCGv_i32 addr, tmp;
6719 
6720     /*
6721      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6722      * is either UNPREDICTABLE or has defined behaviour
6723      */
6724     if (s->thumb && a->rn == 15) {
6725         return false;
6726     }
6727 
6728     addr = op_addr_ri_pre(s, a);
6729 
6730     tmp = load_reg(s, a->rt);
6731     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6732     disas_set_da_iss(s, mop, issinfo);
6733 
6734     op_addr_ri_post(s, a, addr, 0);
6735     return true;
6736 }
6737 
6738 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6739 {
6740     int mem_idx = get_mem_index(s);
6741     TCGv_i32 addr, tmp;
6742 
6743     addr = op_addr_ri_pre(s, a);
6744 
6745     tmp = tcg_temp_new_i32();
6746     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6747     store_reg(s, a->rt, tmp);
6748 
6749     tcg_gen_addi_i32(addr, addr, 4);
6750 
6751     tmp = tcg_temp_new_i32();
6752     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6753     store_reg(s, rt2, tmp);
6754 
6755     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6756     op_addr_ri_post(s, a, addr, -4);
6757     return true;
6758 }
6759 
6760 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6761 {
6762     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6763         return false;
6764     }
6765     return op_ldrd_ri(s, a, a->rt + 1);
6766 }
6767 
6768 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6769 {
6770     arg_ldst_ri b = {
6771         .u = a->u, .w = a->w, .p = a->p,
6772         .rn = a->rn, .rt = a->rt, .imm = a->imm
6773     };
6774     return op_ldrd_ri(s, &b, a->rt2);
6775 }
6776 
6777 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6778 {
6779     int mem_idx = get_mem_index(s);
6780     TCGv_i32 addr, tmp;
6781 
6782     addr = op_addr_ri_pre(s, a);
6783 
6784     tmp = load_reg(s, a->rt);
6785     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6786 
6787     tcg_gen_addi_i32(addr, addr, 4);
6788 
6789     tmp = load_reg(s, rt2);
6790     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6791 
6792     op_addr_ri_post(s, a, addr, -4);
6793     return true;
6794 }
6795 
6796 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6797 {
6798     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6799         return false;
6800     }
6801     return op_strd_ri(s, a, a->rt + 1);
6802 }
6803 
6804 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6805 {
6806     arg_ldst_ri b = {
6807         .u = a->u, .w = a->w, .p = a->p,
6808         .rn = a->rn, .rt = a->rt, .imm = a->imm
6809     };
6810     return op_strd_ri(s, &b, a->rt2);
6811 }
6812 
6813 #define DO_LDST(NAME, WHICH, MEMOP) \
6814 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6815 {                                                                     \
6816     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6817 }                                                                     \
6818 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6819 {                                                                     \
6820     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6821 }                                                                     \
6822 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6823 {                                                                     \
6824     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6825 }                                                                     \
6826 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6827 {                                                                     \
6828     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6829 }
6830 
6831 DO_LDST(LDR, load, MO_UL)
6832 DO_LDST(LDRB, load, MO_UB)
6833 DO_LDST(LDRH, load, MO_UW)
6834 DO_LDST(LDRSB, load, MO_SB)
6835 DO_LDST(LDRSH, load, MO_SW)
6836 
6837 DO_LDST(STR, store, MO_UL)
6838 DO_LDST(STRB, store, MO_UB)
6839 DO_LDST(STRH, store, MO_UW)
6840 
6841 #undef DO_LDST
6842 
6843 /*
6844  * Synchronization primitives
6845  */
6846 
6847 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6848 {
6849     TCGv_i32 addr, tmp;
6850     TCGv taddr;
6851 
6852     opc |= s->be_data;
6853     addr = load_reg(s, a->rn);
6854     taddr = gen_aa32_addr(s, addr, opc);
6855 
6856     tmp = load_reg(s, a->rt2);
6857     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6858 
6859     store_reg(s, a->rt, tmp);
6860     return true;
6861 }
6862 
6863 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6864 {
6865     return op_swp(s, a, MO_UL | MO_ALIGN);
6866 }
6867 
6868 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6869 {
6870     return op_swp(s, a, MO_UB);
6871 }
6872 
6873 /*
6874  * Load/Store Exclusive and Load-Acquire/Store-Release
6875  */
6876 
6877 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6878 {
6879     TCGv_i32 addr;
6880     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6881     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6882 
6883     /* We UNDEF for these UNPREDICTABLE cases.  */
6884     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6885         || a->rd == a->rn || a->rd == a->rt
6886         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6887         || (mop == MO_64
6888             && (a->rt2 == 15
6889                 || a->rd == a->rt2
6890                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6891         unallocated_encoding(s);
6892         return true;
6893     }
6894 
6895     if (rel) {
6896         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6897     }
6898 
6899     addr = tcg_temp_new_i32();
6900     load_reg_var(s, addr, a->rn);
6901     tcg_gen_addi_i32(addr, addr, a->imm);
6902 
6903     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6904     return true;
6905 }
6906 
6907 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6908 {
6909     if (!ENABLE_ARCH_6) {
6910         return false;
6911     }
6912     return op_strex(s, a, MO_32, false);
6913 }
6914 
6915 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6916 {
6917     if (!ENABLE_ARCH_6K) {
6918         return false;
6919     }
6920     /* We UNDEF for these UNPREDICTABLE cases.  */
6921     if (a->rt & 1) {
6922         unallocated_encoding(s);
6923         return true;
6924     }
6925     a->rt2 = a->rt + 1;
6926     return op_strex(s, a, MO_64, false);
6927 }
6928 
6929 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6930 {
6931     return op_strex(s, a, MO_64, false);
6932 }
6933 
6934 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6935 {
6936     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6937         return false;
6938     }
6939     return op_strex(s, a, MO_8, false);
6940 }
6941 
6942 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6943 {
6944     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6945         return false;
6946     }
6947     return op_strex(s, a, MO_16, false);
6948 }
6949 
6950 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6951 {
6952     if (!ENABLE_ARCH_8) {
6953         return false;
6954     }
6955     return op_strex(s, a, MO_32, true);
6956 }
6957 
6958 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6959 {
6960     if (!ENABLE_ARCH_8) {
6961         return false;
6962     }
6963     /* We UNDEF for these UNPREDICTABLE cases.  */
6964     if (a->rt & 1) {
6965         unallocated_encoding(s);
6966         return true;
6967     }
6968     a->rt2 = a->rt + 1;
6969     return op_strex(s, a, MO_64, true);
6970 }
6971 
6972 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6973 {
6974     if (!ENABLE_ARCH_8) {
6975         return false;
6976     }
6977     return op_strex(s, a, MO_64, true);
6978 }
6979 
6980 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6981 {
6982     if (!ENABLE_ARCH_8) {
6983         return false;
6984     }
6985     return op_strex(s, a, MO_8, true);
6986 }
6987 
6988 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6989 {
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     return op_strex(s, a, MO_16, true);
6994 }
6995 
6996 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6997 {
6998     TCGv_i32 addr, tmp;
6999 
7000     if (!ENABLE_ARCH_8) {
7001         return false;
7002     }
7003     /* We UNDEF for these UNPREDICTABLE cases.  */
7004     if (a->rn == 15 || a->rt == 15) {
7005         unallocated_encoding(s);
7006         return true;
7007     }
7008 
7009     addr = load_reg(s, a->rn);
7010     tmp = load_reg(s, a->rt);
7011     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7012     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7013     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7014 
7015     return true;
7016 }
7017 
7018 static bool trans_STL(DisasContext *s, arg_STL *a)
7019 {
7020     return op_stl(s, a, MO_UL);
7021 }
7022 
7023 static bool trans_STLB(DisasContext *s, arg_STL *a)
7024 {
7025     return op_stl(s, a, MO_UB);
7026 }
7027 
7028 static bool trans_STLH(DisasContext *s, arg_STL *a)
7029 {
7030     return op_stl(s, a, MO_UW);
7031 }
7032 
7033 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7034 {
7035     TCGv_i32 addr;
7036     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7037     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7038 
7039     /* We UNDEF for these UNPREDICTABLE cases.  */
7040     if (a->rn == 15 || a->rt == 15
7041         || (!v8a && s->thumb && a->rt == 13)
7042         || (mop == MO_64
7043             && (a->rt2 == 15 || a->rt == a->rt2
7044                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7045         unallocated_encoding(s);
7046         return true;
7047     }
7048 
7049     addr = tcg_temp_new_i32();
7050     load_reg_var(s, addr, a->rn);
7051     tcg_gen_addi_i32(addr, addr, a->imm);
7052 
7053     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7054 
7055     if (acq) {
7056         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7057     }
7058     return true;
7059 }
7060 
7061 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7062 {
7063     if (!ENABLE_ARCH_6) {
7064         return false;
7065     }
7066     return op_ldrex(s, a, MO_32, false);
7067 }
7068 
7069 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7070 {
7071     if (!ENABLE_ARCH_6K) {
7072         return false;
7073     }
7074     /* We UNDEF for these UNPREDICTABLE cases.  */
7075     if (a->rt & 1) {
7076         unallocated_encoding(s);
7077         return true;
7078     }
7079     a->rt2 = a->rt + 1;
7080     return op_ldrex(s, a, MO_64, false);
7081 }
7082 
7083 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7084 {
7085     return op_ldrex(s, a, MO_64, false);
7086 }
7087 
7088 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7089 {
7090     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7091         return false;
7092     }
7093     return op_ldrex(s, a, MO_8, false);
7094 }
7095 
7096 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7097 {
7098     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7099         return false;
7100     }
7101     return op_ldrex(s, a, MO_16, false);
7102 }
7103 
7104 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7105 {
7106     if (!ENABLE_ARCH_8) {
7107         return false;
7108     }
7109     return op_ldrex(s, a, MO_32, true);
7110 }
7111 
7112 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7113 {
7114     if (!ENABLE_ARCH_8) {
7115         return false;
7116     }
7117     /* We UNDEF for these UNPREDICTABLE cases.  */
7118     if (a->rt & 1) {
7119         unallocated_encoding(s);
7120         return true;
7121     }
7122     a->rt2 = a->rt + 1;
7123     return op_ldrex(s, a, MO_64, true);
7124 }
7125 
7126 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7127 {
7128     if (!ENABLE_ARCH_8) {
7129         return false;
7130     }
7131     return op_ldrex(s, a, MO_64, true);
7132 }
7133 
7134 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7135 {
7136     if (!ENABLE_ARCH_8) {
7137         return false;
7138     }
7139     return op_ldrex(s, a, MO_8, true);
7140 }
7141 
7142 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7143 {
7144     if (!ENABLE_ARCH_8) {
7145         return false;
7146     }
7147     return op_ldrex(s, a, MO_16, true);
7148 }
7149 
7150 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7151 {
7152     TCGv_i32 addr, tmp;
7153 
7154     if (!ENABLE_ARCH_8) {
7155         return false;
7156     }
7157     /* We UNDEF for these UNPREDICTABLE cases.  */
7158     if (a->rn == 15 || a->rt == 15) {
7159         unallocated_encoding(s);
7160         return true;
7161     }
7162 
7163     addr = load_reg(s, a->rn);
7164     tmp = tcg_temp_new_i32();
7165     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7166     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7167 
7168     store_reg(s, a->rt, tmp);
7169     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7170     return true;
7171 }
7172 
7173 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7174 {
7175     return op_lda(s, a, MO_UL);
7176 }
7177 
7178 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7179 {
7180     return op_lda(s, a, MO_UB);
7181 }
7182 
7183 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7184 {
7185     return op_lda(s, a, MO_UW);
7186 }
7187 
7188 /*
7189  * Media instructions
7190  */
7191 
7192 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7193 {
7194     TCGv_i32 t1, t2;
7195 
7196     if (!ENABLE_ARCH_6) {
7197         return false;
7198     }
7199 
7200     t1 = load_reg(s, a->rn);
7201     t2 = load_reg(s, a->rm);
7202     gen_helper_usad8(t1, t1, t2);
7203     if (a->ra != 15) {
7204         t2 = load_reg(s, a->ra);
7205         tcg_gen_add_i32(t1, t1, t2);
7206     }
7207     store_reg(s, a->rd, t1);
7208     return true;
7209 }
7210 
7211 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7212 {
7213     TCGv_i32 tmp;
7214     int width = a->widthm1 + 1;
7215     int shift = a->lsb;
7216 
7217     if (!ENABLE_ARCH_6T2) {
7218         return false;
7219     }
7220     if (shift + width > 32) {
7221         /* UNPREDICTABLE; we choose to UNDEF */
7222         unallocated_encoding(s);
7223         return true;
7224     }
7225 
7226     tmp = load_reg(s, a->rn);
7227     if (u) {
7228         tcg_gen_extract_i32(tmp, tmp, shift, width);
7229     } else {
7230         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7231     }
7232     store_reg(s, a->rd, tmp);
7233     return true;
7234 }
7235 
7236 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7237 {
7238     return op_bfx(s, a, false);
7239 }
7240 
7241 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7242 {
7243     return op_bfx(s, a, true);
7244 }
7245 
7246 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7247 {
7248     int msb = a->msb, lsb = a->lsb;
7249     TCGv_i32 t_in, t_rd;
7250     int width;
7251 
7252     if (!ENABLE_ARCH_6T2) {
7253         return false;
7254     }
7255     if (msb < lsb) {
7256         /* UNPREDICTABLE; we choose to UNDEF */
7257         unallocated_encoding(s);
7258         return true;
7259     }
7260 
7261     width = msb + 1 - lsb;
7262     if (a->rn == 15) {
7263         /* BFC */
7264         t_in = tcg_constant_i32(0);
7265     } else {
7266         /* BFI */
7267         t_in = load_reg(s, a->rn);
7268     }
7269     t_rd = load_reg(s, a->rd);
7270     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7271     store_reg(s, a->rd, t_rd);
7272     return true;
7273 }
7274 
7275 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7276 {
7277     unallocated_encoding(s);
7278     return true;
7279 }
7280 
7281 /*
7282  * Parallel addition and subtraction
7283  */
7284 
7285 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7286                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7287 {
7288     TCGv_i32 t0, t1;
7289 
7290     if (s->thumb
7291         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7292         : !ENABLE_ARCH_6) {
7293         return false;
7294     }
7295 
7296     t0 = load_reg(s, a->rn);
7297     t1 = load_reg(s, a->rm);
7298 
7299     gen(t0, t0, t1);
7300 
7301     store_reg(s, a->rd, t0);
7302     return true;
7303 }
7304 
7305 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7306                              void (*gen)(TCGv_i32, TCGv_i32,
7307                                          TCGv_i32, TCGv_ptr))
7308 {
7309     TCGv_i32 t0, t1;
7310     TCGv_ptr ge;
7311 
7312     if (s->thumb
7313         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7314         : !ENABLE_ARCH_6) {
7315         return false;
7316     }
7317 
7318     t0 = load_reg(s, a->rn);
7319     t1 = load_reg(s, a->rm);
7320 
7321     ge = tcg_temp_new_ptr();
7322     tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE));
7323     gen(t0, t0, t1, ge);
7324 
7325     store_reg(s, a->rd, t0);
7326     return true;
7327 }
7328 
7329 #define DO_PAR_ADDSUB(NAME, helper) \
7330 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7331 {                                                       \
7332     return op_par_addsub(s, a, helper);                 \
7333 }
7334 
7335 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7336 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7337 {                                                       \
7338     return op_par_addsub_ge(s, a, helper);              \
7339 }
7340 
7341 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7342 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7343 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7344 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7345 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7346 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7347 
7348 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7349 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7350 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7351 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7352 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7353 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7354 
7355 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7356 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7357 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7358 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7359 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7360 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7361 
7362 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7363 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7364 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7365 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7366 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7367 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7368 
7369 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7370 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7371 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7372 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7373 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7374 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7375 
7376 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7377 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7378 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7379 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7380 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7381 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7382 
7383 #undef DO_PAR_ADDSUB
7384 #undef DO_PAR_ADDSUB_GE
7385 
7386 /*
7387  * Packing, unpacking, saturation, and reversal
7388  */
7389 
7390 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7391 {
7392     TCGv_i32 tn, tm;
7393     int shift = a->imm;
7394 
7395     if (s->thumb
7396         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7397         : !ENABLE_ARCH_6) {
7398         return false;
7399     }
7400 
7401     tn = load_reg(s, a->rn);
7402     tm = load_reg(s, a->rm);
7403     if (a->tb) {
7404         /* PKHTB */
7405         if (shift == 0) {
7406             shift = 31;
7407         }
7408         tcg_gen_sari_i32(tm, tm, shift);
7409         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7410     } else {
7411         /* PKHBT */
7412         tcg_gen_shli_i32(tm, tm, shift);
7413         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7414     }
7415     store_reg(s, a->rd, tn);
7416     return true;
7417 }
7418 
7419 static bool op_sat(DisasContext *s, arg_sat *a,
7420                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7421 {
7422     TCGv_i32 tmp;
7423     int shift = a->imm;
7424 
7425     if (!ENABLE_ARCH_6) {
7426         return false;
7427     }
7428 
7429     tmp = load_reg(s, a->rn);
7430     if (a->sh) {
7431         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7432     } else {
7433         tcg_gen_shli_i32(tmp, tmp, shift);
7434     }
7435 
7436     gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm));
7437 
7438     store_reg(s, a->rd, tmp);
7439     return true;
7440 }
7441 
7442 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7443 {
7444     return op_sat(s, a, gen_helper_ssat);
7445 }
7446 
7447 static bool trans_USAT(DisasContext *s, arg_sat *a)
7448 {
7449     return op_sat(s, a, gen_helper_usat);
7450 }
7451 
7452 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7453 {
7454     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7455         return false;
7456     }
7457     return op_sat(s, a, gen_helper_ssat16);
7458 }
7459 
7460 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7461 {
7462     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7463         return false;
7464     }
7465     return op_sat(s, a, gen_helper_usat16);
7466 }
7467 
7468 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7469                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7470                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7471 {
7472     TCGv_i32 tmp;
7473 
7474     if (!ENABLE_ARCH_6) {
7475         return false;
7476     }
7477 
7478     tmp = load_reg(s, a->rm);
7479     /*
7480      * TODO: In many cases we could do a shift instead of a rotate.
7481      * Combined with a simple extend, that becomes an extract.
7482      */
7483     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7484     gen_extract(tmp, tmp);
7485 
7486     if (a->rn != 15) {
7487         TCGv_i32 tmp2 = load_reg(s, a->rn);
7488         gen_add(tmp, tmp, tmp2);
7489     }
7490     store_reg(s, a->rd, tmp);
7491     return true;
7492 }
7493 
7494 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7495 {
7496     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7497 }
7498 
7499 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7500 {
7501     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7502 }
7503 
7504 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7505 {
7506     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7507         return false;
7508     }
7509     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7510 }
7511 
7512 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7513 {
7514     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7515 }
7516 
7517 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7518 {
7519     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7520 }
7521 
7522 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7523 {
7524     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7525         return false;
7526     }
7527     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7528 }
7529 
7530 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7531 {
7532     TCGv_i32 t1, t2, t3;
7533 
7534     if (s->thumb
7535         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7536         : !ENABLE_ARCH_6) {
7537         return false;
7538     }
7539 
7540     t1 = load_reg(s, a->rn);
7541     t2 = load_reg(s, a->rm);
7542     t3 = tcg_temp_new_i32();
7543     tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE));
7544     gen_helper_sel_flags(t1, t3, t1, t2);
7545     store_reg(s, a->rd, t1);
7546     return true;
7547 }
7548 
7549 static bool op_rr(DisasContext *s, arg_rr *a,
7550                   void (*gen)(TCGv_i32, TCGv_i32))
7551 {
7552     TCGv_i32 tmp;
7553 
7554     tmp = load_reg(s, a->rm);
7555     gen(tmp, tmp);
7556     store_reg(s, a->rd, tmp);
7557     return true;
7558 }
7559 
7560 static bool trans_REV(DisasContext *s, arg_rr *a)
7561 {
7562     if (!ENABLE_ARCH_6) {
7563         return false;
7564     }
7565     return op_rr(s, a, tcg_gen_bswap32_i32);
7566 }
7567 
7568 static bool trans_REV16(DisasContext *s, arg_rr *a)
7569 {
7570     if (!ENABLE_ARCH_6) {
7571         return false;
7572     }
7573     return op_rr(s, a, gen_rev16);
7574 }
7575 
7576 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7577 {
7578     if (!ENABLE_ARCH_6) {
7579         return false;
7580     }
7581     return op_rr(s, a, gen_revsh);
7582 }
7583 
7584 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7585 {
7586     if (!ENABLE_ARCH_6T2) {
7587         return false;
7588     }
7589     return op_rr(s, a, gen_helper_rbit);
7590 }
7591 
7592 /*
7593  * Signed multiply, signed and unsigned divide
7594  */
7595 
7596 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7597 {
7598     TCGv_i32 t1, t2;
7599 
7600     if (!ENABLE_ARCH_6) {
7601         return false;
7602     }
7603 
7604     t1 = load_reg(s, a->rn);
7605     t2 = load_reg(s, a->rm);
7606     if (m_swap) {
7607         gen_swap_half(t2, t2);
7608     }
7609     gen_smul_dual(t1, t2);
7610 
7611     if (sub) {
7612         /*
7613          * This subtraction cannot overflow, so we can do a simple
7614          * 32-bit subtraction and then a possible 32-bit saturating
7615          * addition of Ra.
7616          */
7617         tcg_gen_sub_i32(t1, t1, t2);
7618 
7619         if (a->ra != 15) {
7620             t2 = load_reg(s, a->ra);
7621             gen_helper_add_setq(t1, tcg_env, t1, t2);
7622         }
7623     } else if (a->ra == 15) {
7624         /* Single saturation-checking addition */
7625         gen_helper_add_setq(t1, tcg_env, t1, t2);
7626     } else {
7627         /*
7628          * We need to add the products and Ra together and then
7629          * determine whether the final result overflowed. Doing
7630          * this as two separate add-and-check-overflow steps incorrectly
7631          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7632          * Do all the arithmetic at 64-bits and then check for overflow.
7633          */
7634         TCGv_i64 p64, q64;
7635         TCGv_i32 t3, qf, one;
7636 
7637         p64 = tcg_temp_new_i64();
7638         q64 = tcg_temp_new_i64();
7639         tcg_gen_ext_i32_i64(p64, t1);
7640         tcg_gen_ext_i32_i64(q64, t2);
7641         tcg_gen_add_i64(p64, p64, q64);
7642         load_reg_var(s, t2, a->ra);
7643         tcg_gen_ext_i32_i64(q64, t2);
7644         tcg_gen_add_i64(p64, p64, q64);
7645 
7646         tcg_gen_extr_i64_i32(t1, t2, p64);
7647         /*
7648          * t1 is the low half of the result which goes into Rd.
7649          * We have overflow and must set Q if the high half (t2)
7650          * is different from the sign-extension of t1.
7651          */
7652         t3 = tcg_temp_new_i32();
7653         tcg_gen_sari_i32(t3, t1, 31);
7654         qf = load_cpu_field(QF);
7655         one = tcg_constant_i32(1);
7656         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7657         store_cpu_field(qf, QF);
7658     }
7659     store_reg(s, a->rd, t1);
7660     return true;
7661 }
7662 
7663 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7664 {
7665     return op_smlad(s, a, false, false);
7666 }
7667 
7668 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7669 {
7670     return op_smlad(s, a, true, false);
7671 }
7672 
7673 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7674 {
7675     return op_smlad(s, a, false, true);
7676 }
7677 
7678 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7679 {
7680     return op_smlad(s, a, true, true);
7681 }
7682 
7683 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7684 {
7685     TCGv_i32 t1, t2;
7686     TCGv_i64 l1, l2;
7687 
7688     if (!ENABLE_ARCH_6) {
7689         return false;
7690     }
7691 
7692     t1 = load_reg(s, a->rn);
7693     t2 = load_reg(s, a->rm);
7694     if (m_swap) {
7695         gen_swap_half(t2, t2);
7696     }
7697     gen_smul_dual(t1, t2);
7698 
7699     l1 = tcg_temp_new_i64();
7700     l2 = tcg_temp_new_i64();
7701     tcg_gen_ext_i32_i64(l1, t1);
7702     tcg_gen_ext_i32_i64(l2, t2);
7703 
7704     if (sub) {
7705         tcg_gen_sub_i64(l1, l1, l2);
7706     } else {
7707         tcg_gen_add_i64(l1, l1, l2);
7708     }
7709 
7710     gen_addq(s, l1, a->ra, a->rd);
7711     gen_storeq_reg(s, a->ra, a->rd, l1);
7712     return true;
7713 }
7714 
7715 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7716 {
7717     return op_smlald(s, a, false, false);
7718 }
7719 
7720 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7721 {
7722     return op_smlald(s, a, true, false);
7723 }
7724 
7725 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7726 {
7727     return op_smlald(s, a, false, true);
7728 }
7729 
7730 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7731 {
7732     return op_smlald(s, a, true, true);
7733 }
7734 
7735 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7736 {
7737     TCGv_i32 t1, t2;
7738 
7739     if (s->thumb
7740         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7741         : !ENABLE_ARCH_6) {
7742         return false;
7743     }
7744 
7745     t1 = load_reg(s, a->rn);
7746     t2 = load_reg(s, a->rm);
7747     tcg_gen_muls2_i32(t2, t1, t1, t2);
7748 
7749     if (a->ra != 15) {
7750         TCGv_i32 t3 = load_reg(s, a->ra);
7751         if (sub) {
7752             /*
7753              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7754              * a non-zero multiplicand lowpart, and the correct result
7755              * lowpart for rounding.
7756              */
7757             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7758         } else {
7759             tcg_gen_add_i32(t1, t1, t3);
7760         }
7761     }
7762     if (round) {
7763         /*
7764          * Adding 0x80000000 to the 64-bit quantity means that we have
7765          * carry in to the high word when the low word has the msb set.
7766          */
7767         tcg_gen_shri_i32(t2, t2, 31);
7768         tcg_gen_add_i32(t1, t1, t2);
7769     }
7770     store_reg(s, a->rd, t1);
7771     return true;
7772 }
7773 
7774 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7775 {
7776     return op_smmla(s, a, false, false);
7777 }
7778 
7779 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7780 {
7781     return op_smmla(s, a, true, false);
7782 }
7783 
7784 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7785 {
7786     return op_smmla(s, a, false, true);
7787 }
7788 
7789 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7790 {
7791     return op_smmla(s, a, true, true);
7792 }
7793 
7794 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7795 {
7796     TCGv_i32 t1, t2;
7797 
7798     if (s->thumb
7799         ? !dc_isar_feature(aa32_thumb_div, s)
7800         : !dc_isar_feature(aa32_arm_div, s)) {
7801         return false;
7802     }
7803 
7804     t1 = load_reg(s, a->rn);
7805     t2 = load_reg(s, a->rm);
7806     if (u) {
7807         gen_helper_udiv(t1, tcg_env, t1, t2);
7808     } else {
7809         gen_helper_sdiv(t1, tcg_env, t1, t2);
7810     }
7811     store_reg(s, a->rd, t1);
7812     return true;
7813 }
7814 
7815 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7816 {
7817     return op_div(s, a, false);
7818 }
7819 
7820 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7821 {
7822     return op_div(s, a, true);
7823 }
7824 
7825 /*
7826  * Block data transfer
7827  */
7828 
7829 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7830 {
7831     TCGv_i32 addr = load_reg(s, a->rn);
7832 
7833     if (a->b) {
7834         if (a->i) {
7835             /* pre increment */
7836             tcg_gen_addi_i32(addr, addr, 4);
7837         } else {
7838             /* pre decrement */
7839             tcg_gen_addi_i32(addr, addr, -(n * 4));
7840         }
7841     } else if (!a->i && n != 1) {
7842         /* post decrement */
7843         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7844     }
7845 
7846     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7847         /*
7848          * If the writeback is incrementing SP rather than
7849          * decrementing it, and the initial SP is below the
7850          * stack limit but the final written-back SP would
7851          * be above, then we must not perform any memory
7852          * accesses, but it is IMPDEF whether we generate
7853          * an exception. We choose to do so in this case.
7854          * At this point 'addr' is the lowest address, so
7855          * either the original SP (if incrementing) or our
7856          * final SP (if decrementing), so that's what we check.
7857          */
7858         gen_helper_v8m_stackcheck(tcg_env, addr);
7859     }
7860 
7861     return addr;
7862 }
7863 
7864 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7865                                TCGv_i32 addr, int n)
7866 {
7867     if (a->w) {
7868         /* write back */
7869         if (!a->b) {
7870             if (a->i) {
7871                 /* post increment */
7872                 tcg_gen_addi_i32(addr, addr, 4);
7873             } else {
7874                 /* post decrement */
7875                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7876             }
7877         } else if (!a->i && n != 1) {
7878             /* pre decrement */
7879             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7880         }
7881         store_reg(s, a->rn, addr);
7882     }
7883 }
7884 
7885 static bool op_stm(DisasContext *s, arg_ldst_block *a)
7886 {
7887     int i, j, n, list, mem_idx;
7888     bool user = a->u;
7889     TCGv_i32 addr, tmp;
7890 
7891     if (user) {
7892         /* STM (user) */
7893         if (IS_USER(s)) {
7894             /* Only usable in supervisor mode.  */
7895             unallocated_encoding(s);
7896             return true;
7897         }
7898     }
7899 
7900     list = a->list;
7901     n = ctpop16(list);
7902     /*
7903      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7904      * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE,
7905      * but hardware treats it like the A32 version and implements the
7906      * single-register-store, and some in-the-wild (buggy) software
7907      * assumes that, so we don't UNDEF on that case.
7908      */
7909     if (n < 1 || a->rn == 15) {
7910         unallocated_encoding(s);
7911         return true;
7912     }
7913 
7914     s->eci_handled = true;
7915 
7916     addr = op_addr_block_pre(s, a, n);
7917     mem_idx = get_mem_index(s);
7918 
7919     for (i = j = 0; i < 16; i++) {
7920         if (!(list & (1 << i))) {
7921             continue;
7922         }
7923 
7924         if (user && i != 15) {
7925             tmp = tcg_temp_new_i32();
7926             gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i));
7927         } else {
7928             tmp = load_reg(s, i);
7929         }
7930         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7931 
7932         /* No need to add after the last transfer.  */
7933         if (++j != n) {
7934             tcg_gen_addi_i32(addr, addr, 4);
7935         }
7936     }
7937 
7938     op_addr_block_post(s, a, addr, n);
7939     clear_eci_state(s);
7940     return true;
7941 }
7942 
7943 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7944 {
7945     return op_stm(s, a);
7946 }
7947 
7948 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7949 {
7950     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7951     if (a->w && (a->list & (1 << a->rn))) {
7952         unallocated_encoding(s);
7953         return true;
7954     }
7955     return op_stm(s, a);
7956 }
7957 
7958 static bool do_ldm(DisasContext *s, arg_ldst_block *a)
7959 {
7960     int i, j, n, list, mem_idx;
7961     bool loaded_base;
7962     bool user = a->u;
7963     bool exc_return = false;
7964     TCGv_i32 addr, tmp, loaded_var;
7965 
7966     if (user) {
7967         /* LDM (user), LDM (exception return) */
7968         if (IS_USER(s)) {
7969             /* Only usable in supervisor mode.  */
7970             unallocated_encoding(s);
7971             return true;
7972         }
7973         if (extract32(a->list, 15, 1)) {
7974             exc_return = true;
7975             user = false;
7976         } else {
7977             /* LDM (user) does not allow writeback.  */
7978             if (a->w) {
7979                 unallocated_encoding(s);
7980                 return true;
7981             }
7982         }
7983     }
7984 
7985     list = a->list;
7986     n = ctpop16(list);
7987     /*
7988      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7989      * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE,
7990      * but hardware treats it like the A32 version and implements the
7991      * single-register-load, and some in-the-wild (buggy) software
7992      * assumes that, so we don't UNDEF on that case.
7993      */
7994     if (n < 1 || a->rn == 15) {
7995         unallocated_encoding(s);
7996         return true;
7997     }
7998 
7999     s->eci_handled = true;
8000 
8001     addr = op_addr_block_pre(s, a, n);
8002     mem_idx = get_mem_index(s);
8003     loaded_base = false;
8004     loaded_var = NULL;
8005 
8006     for (i = j = 0; i < 16; i++) {
8007         if (!(list & (1 << i))) {
8008             continue;
8009         }
8010 
8011         tmp = tcg_temp_new_i32();
8012         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8013         if (user) {
8014             gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp);
8015         } else if (i == a->rn) {
8016             loaded_var = tmp;
8017             loaded_base = true;
8018         } else if (i == 15 && exc_return) {
8019             store_pc_exc_ret(s, tmp);
8020         } else {
8021             store_reg_from_load(s, i, tmp);
8022         }
8023 
8024         /* No need to add after the last transfer.  */
8025         if (++j != n) {
8026             tcg_gen_addi_i32(addr, addr, 4);
8027         }
8028     }
8029 
8030     op_addr_block_post(s, a, addr, n);
8031 
8032     if (loaded_base) {
8033         /* Note that we reject base == pc above.  */
8034         store_reg(s, a->rn, loaded_var);
8035     }
8036 
8037     if (exc_return) {
8038         /* Restore CPSR from SPSR.  */
8039         tmp = load_cpu_field(spsr);
8040         translator_io_start(&s->base);
8041         gen_helper_cpsr_write_eret(tcg_env, tmp);
8042         /* Must exit loop to check un-masked IRQs */
8043         s->base.is_jmp = DISAS_EXIT;
8044     }
8045     clear_eci_state(s);
8046     return true;
8047 }
8048 
8049 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8050 {
8051     /*
8052      * Writeback register in register list is UNPREDICTABLE
8053      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8054      * an UNKNOWN value to the base register.
8055      */
8056     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8057         unallocated_encoding(s);
8058         return true;
8059     }
8060     return do_ldm(s, a);
8061 }
8062 
8063 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8064 {
8065     /* Writeback register in register list is UNPREDICTABLE for T32. */
8066     if (a->w && (a->list & (1 << a->rn))) {
8067         unallocated_encoding(s);
8068         return true;
8069     }
8070     return do_ldm(s, a);
8071 }
8072 
8073 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8074 {
8075     /* Writeback is conditional on the base register not being loaded.  */
8076     a->w = !(a->list & (1 << a->rn));
8077     return do_ldm(s, a);
8078 }
8079 
8080 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8081 {
8082     int i;
8083     TCGv_i32 zero;
8084 
8085     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8086         return false;
8087     }
8088 
8089     if (extract32(a->list, 13, 1)) {
8090         return false;
8091     }
8092 
8093     if (!a->list) {
8094         /* UNPREDICTABLE; we choose to UNDEF */
8095         return false;
8096     }
8097 
8098     s->eci_handled = true;
8099 
8100     zero = tcg_constant_i32(0);
8101     for (i = 0; i < 15; i++) {
8102         if (extract32(a->list, i, 1)) {
8103             /* Clear R[i] */
8104             tcg_gen_mov_i32(cpu_R[i], zero);
8105         }
8106     }
8107     if (extract32(a->list, 15, 1)) {
8108         /*
8109          * Clear APSR (by calling the MSR helper with the same argument
8110          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8111          */
8112         gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero);
8113     }
8114     clear_eci_state(s);
8115     return true;
8116 }
8117 
8118 /*
8119  * Branch, branch with link
8120  */
8121 
8122 static bool trans_B(DisasContext *s, arg_i *a)
8123 {
8124     gen_jmp(s, jmp_diff(s, a->imm));
8125     return true;
8126 }
8127 
8128 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8129 {
8130     /* This has cond from encoding, required to be outside IT block.  */
8131     if (a->cond >= 0xe) {
8132         return false;
8133     }
8134     if (s->condexec_mask) {
8135         unallocated_encoding(s);
8136         return true;
8137     }
8138     arm_skip_unless(s, a->cond);
8139     gen_jmp(s, jmp_diff(s, a->imm));
8140     return true;
8141 }
8142 
8143 static bool trans_BL(DisasContext *s, arg_i *a)
8144 {
8145     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8146     gen_jmp(s, jmp_diff(s, a->imm));
8147     return true;
8148 }
8149 
8150 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8151 {
8152     /*
8153      * BLX <imm> would be useless on M-profile; the encoding space
8154      * is used for other insns from v8.1M onward, and UNDEFs before that.
8155      */
8156     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8157         return false;
8158     }
8159 
8160     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8161     if (s->thumb && (a->imm & 2)) {
8162         return false;
8163     }
8164     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8165     store_cpu_field_constant(!s->thumb, thumb);
8166     /* This jump is computed from an aligned PC: subtract off the low bits. */
8167     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8168     return true;
8169 }
8170 
8171 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8172 {
8173     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8174     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8175     return true;
8176 }
8177 
8178 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8179 {
8180     TCGv_i32 tmp = tcg_temp_new_i32();
8181 
8182     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8183     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8184     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8185     gen_bx(s, tmp);
8186     return true;
8187 }
8188 
8189 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8190 {
8191     TCGv_i32 tmp;
8192 
8193     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8194     if (!ENABLE_ARCH_5) {
8195         return false;
8196     }
8197     tmp = tcg_temp_new_i32();
8198     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8199     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8200     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8201     gen_bx(s, tmp);
8202     return true;
8203 }
8204 
8205 static bool trans_BF(DisasContext *s, arg_BF *a)
8206 {
8207     /*
8208      * M-profile branch future insns. The architecture permits an
8209      * implementation to implement these as NOPs (equivalent to
8210      * discarding the LO_BRANCH_INFO cache immediately), and we
8211      * take that IMPDEF option because for QEMU a "real" implementation
8212      * would be complicated and wouldn't execute any faster.
8213      */
8214     if (!dc_isar_feature(aa32_lob, s)) {
8215         return false;
8216     }
8217     if (a->boff == 0) {
8218         /* SEE "Related encodings" (loop insns) */
8219         return false;
8220     }
8221     /* Handle as NOP */
8222     return true;
8223 }
8224 
8225 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8226 {
8227     /* M-profile low-overhead loop start */
8228     TCGv_i32 tmp;
8229 
8230     if (!dc_isar_feature(aa32_lob, s)) {
8231         return false;
8232     }
8233     if (a->rn == 13 || a->rn == 15) {
8234         /*
8235          * For DLSTP rn == 15 is a related encoding (LCTP); the
8236          * other cases caught by this condition are all
8237          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8238          */
8239         return false;
8240     }
8241 
8242     if (a->size != 4) {
8243         /* DLSTP */
8244         if (!dc_isar_feature(aa32_mve, s)) {
8245             return false;
8246         }
8247         if (!vfp_access_check(s)) {
8248             return true;
8249         }
8250     }
8251 
8252     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8253     tmp = load_reg(s, a->rn);
8254     store_reg(s, 14, tmp);
8255     if (a->size != 4) {
8256         /* DLSTP: set FPSCR.LTPSIZE */
8257         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8258         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8259     }
8260     return true;
8261 }
8262 
8263 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8264 {
8265     /* M-profile low-overhead while-loop start */
8266     TCGv_i32 tmp;
8267     DisasLabel nextlabel;
8268 
8269     if (!dc_isar_feature(aa32_lob, s)) {
8270         return false;
8271     }
8272     if (a->rn == 13 || a->rn == 15) {
8273         /*
8274          * For WLSTP rn == 15 is a related encoding (LE); the
8275          * other cases caught by this condition are all
8276          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8277          */
8278         return false;
8279     }
8280     if (s->condexec_mask) {
8281         /*
8282          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8283          * we choose to UNDEF, because otherwise our use of
8284          * gen_goto_tb(1) would clash with the use of TB exit 1
8285          * in the dc->condjmp condition-failed codepath in
8286          * arm_tr_tb_stop() and we'd get an assertion.
8287          */
8288         return false;
8289     }
8290     if (a->size != 4) {
8291         /* WLSTP */
8292         if (!dc_isar_feature(aa32_mve, s)) {
8293             return false;
8294         }
8295         /*
8296          * We need to check that the FPU is enabled here, but mustn't
8297          * call vfp_access_check() to do that because we don't want to
8298          * do the lazy state preservation in the "loop count is zero" case.
8299          * Do the check-and-raise-exception by hand.
8300          */
8301         if (s->fp_excp_el) {
8302             gen_exception_insn_el(s, 0, EXCP_NOCP,
8303                                   syn_uncategorized(), s->fp_excp_el);
8304             return true;
8305         }
8306     }
8307 
8308     nextlabel = gen_disas_label(s);
8309     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8310     tmp = load_reg(s, a->rn);
8311     store_reg(s, 14, tmp);
8312     if (a->size != 4) {
8313         /*
8314          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8315          * lazy state preservation, new FP context creation, etc,
8316          * that vfp_access_check() does. We know that the actual
8317          * access check will succeed (ie it won't generate code that
8318          * throws an exception) because we did that check by hand earlier.
8319          */
8320         bool ok = vfp_access_check(s);
8321         assert(ok);
8322         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8323         /*
8324          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8325          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8326          */
8327     }
8328     gen_jmp_tb(s, curr_insn_len(s), 1);
8329 
8330     set_disas_label(s, nextlabel);
8331     gen_jmp(s, jmp_diff(s, a->imm));
8332     return true;
8333 }
8334 
8335 static bool trans_LE(DisasContext *s, arg_LE *a)
8336 {
8337     /*
8338      * M-profile low-overhead loop end. The architecture permits an
8339      * implementation to discard the LO_BRANCH_INFO cache at any time,
8340      * and we take the IMPDEF option to never set it in the first place
8341      * (equivalent to always discarding it immediately), because for QEMU
8342      * a "real" implementation would be complicated and wouldn't execute
8343      * any faster.
8344      */
8345     TCGv_i32 tmp;
8346     DisasLabel loopend;
8347     bool fpu_active;
8348 
8349     if (!dc_isar_feature(aa32_lob, s)) {
8350         return false;
8351     }
8352     if (a->f && a->tp) {
8353         return false;
8354     }
8355     if (s->condexec_mask) {
8356         /*
8357          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8358          * we choose to UNDEF, because otherwise our use of
8359          * gen_goto_tb(1) would clash with the use of TB exit 1
8360          * in the dc->condjmp condition-failed codepath in
8361          * arm_tr_tb_stop() and we'd get an assertion.
8362          */
8363         return false;
8364     }
8365     if (a->tp) {
8366         /* LETP */
8367         if (!dc_isar_feature(aa32_mve, s)) {
8368             return false;
8369         }
8370         if (!vfp_access_check(s)) {
8371             s->eci_handled = true;
8372             return true;
8373         }
8374     }
8375 
8376     /* LE/LETP is OK with ECI set and leaves it untouched */
8377     s->eci_handled = true;
8378 
8379     /*
8380      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8381      * UsageFault exception for the LE insn in that case. Note that we
8382      * are not directly checking FPSCR.LTPSIZE but instead check the
8383      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8384      * not currently active (ie ActiveFPState() returns false). We
8385      * can identify not-active purely from our TB state flags, as the
8386      * FPU is active only if:
8387      *  the FPU is enabled
8388      *  AND lazy state preservation is not active
8389      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8390      *
8391      * Usually we don't need to care about this distinction between
8392      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8393      * will either take an exception or clear the conditions that make
8394      * the FPU not active. But LE is an unusual case of a non-FP insn
8395      * that looks at LTPSIZE.
8396      */
8397     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8398 
8399     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8400         /* Need to do a runtime check for LTPSIZE != 4 */
8401         DisasLabel skipexc = gen_disas_label(s);
8402         tmp = load_cpu_field(v7m.ltpsize);
8403         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8404         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8405         set_disas_label(s, skipexc);
8406     }
8407 
8408     if (a->f) {
8409         /* Loop-forever: just jump back to the loop start */
8410         gen_jmp(s, jmp_diff(s, -a->imm));
8411         return true;
8412     }
8413 
8414     /*
8415      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8416      * For LE, we know at this point that LTPSIZE must be 4 and the
8417      * loop decrement value is 1. For LETP we need to calculate the decrement
8418      * value from LTPSIZE.
8419      */
8420     loopend = gen_disas_label(s);
8421     if (!a->tp) {
8422         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8423         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8424     } else {
8425         /*
8426          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8427          * so that decr stays live after the brcondi.
8428          */
8429         TCGv_i32 decr = tcg_temp_new_i32();
8430         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8431         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8432         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8433 
8434         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8435 
8436         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8437     }
8438     /* Jump back to the loop start */
8439     gen_jmp(s, jmp_diff(s, -a->imm));
8440 
8441     set_disas_label(s, loopend);
8442     if (a->tp) {
8443         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8444         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8445     }
8446     /* End TB, continuing to following insn */
8447     gen_jmp_tb(s, curr_insn_len(s), 1);
8448     return true;
8449 }
8450 
8451 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8452 {
8453     /*
8454      * M-profile Loop Clear with Tail Predication. Since our implementation
8455      * doesn't cache branch information, all we need to do is reset
8456      * FPSCR.LTPSIZE to 4.
8457      */
8458 
8459     if (!dc_isar_feature(aa32_lob, s) ||
8460         !dc_isar_feature(aa32_mve, s)) {
8461         return false;
8462     }
8463 
8464     if (!vfp_access_check(s)) {
8465         return true;
8466     }
8467 
8468     store_cpu_field_constant(4, v7m.ltpsize);
8469     return true;
8470 }
8471 
8472 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8473 {
8474     /*
8475      * M-profile Create Vector Tail Predicate. This insn is itself
8476      * predicated and is subject to beatwise execution.
8477      */
8478     TCGv_i32 rn_shifted, masklen;
8479 
8480     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8481         return false;
8482     }
8483 
8484     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8485         return true;
8486     }
8487 
8488     /*
8489      * We pre-calculate the mask length here to avoid having
8490      * to have multiple helpers specialized for size.
8491      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8492      */
8493     rn_shifted = tcg_temp_new_i32();
8494     masklen = load_reg(s, a->rn);
8495     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8496     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8497                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8498                         rn_shifted, tcg_constant_i32(16));
8499     gen_helper_mve_vctp(tcg_env, masklen);
8500     /* This insn updates predication bits */
8501     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8502     mve_update_eci(s);
8503     return true;
8504 }
8505 
8506 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8507 {
8508     TCGv_i32 addr, tmp;
8509 
8510     tmp = load_reg(s, a->rm);
8511     if (half) {
8512         tcg_gen_add_i32(tmp, tmp, tmp);
8513     }
8514     addr = load_reg(s, a->rn);
8515     tcg_gen_add_i32(addr, addr, tmp);
8516 
8517     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8518 
8519     tcg_gen_add_i32(tmp, tmp, tmp);
8520     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8521     tcg_gen_add_i32(tmp, tmp, addr);
8522     store_reg(s, 15, tmp);
8523     return true;
8524 }
8525 
8526 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8527 {
8528     return op_tbranch(s, a, false);
8529 }
8530 
8531 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8532 {
8533     return op_tbranch(s, a, true);
8534 }
8535 
8536 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8537 {
8538     TCGv_i32 tmp = load_reg(s, a->rn);
8539 
8540     arm_gen_condlabel(s);
8541     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8542                         tmp, 0, s->condlabel.label);
8543     gen_jmp(s, jmp_diff(s, a->imm));
8544     return true;
8545 }
8546 
8547 /*
8548  * Supervisor call - both T32 & A32 come here so we need to check
8549  * which mode we are in when checking for semihosting.
8550  */
8551 
8552 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8553 {
8554     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8555 
8556     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8557         semihosting_enabled(s->current_el == 0) &&
8558         (a->imm == semihost_imm)) {
8559         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8560     } else {
8561         if (s->fgt_svc) {
8562             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8563             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8564         } else {
8565             gen_update_pc(s, curr_insn_len(s));
8566             s->svc_imm = a->imm;
8567             s->base.is_jmp = DISAS_SWI;
8568         }
8569     }
8570     return true;
8571 }
8572 
8573 /*
8574  * Unconditional system instructions
8575  */
8576 
8577 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8578 {
8579     static const int8_t pre_offset[4] = {
8580         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8581     };
8582     static const int8_t post_offset[4] = {
8583         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8584     };
8585     TCGv_i32 addr, t1, t2;
8586 
8587     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8588         return false;
8589     }
8590     if (IS_USER(s)) {
8591         unallocated_encoding(s);
8592         return true;
8593     }
8594 
8595     addr = load_reg(s, a->rn);
8596     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8597 
8598     /* Load PC into tmp and CPSR into tmp2.  */
8599     t1 = tcg_temp_new_i32();
8600     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8601     tcg_gen_addi_i32(addr, addr, 4);
8602     t2 = tcg_temp_new_i32();
8603     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8604 
8605     if (a->w) {
8606         /* Base writeback.  */
8607         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8608         store_reg(s, a->rn, addr);
8609     }
8610     gen_rfe(s, t1, t2);
8611     return true;
8612 }
8613 
8614 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8615 {
8616     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8617         return false;
8618     }
8619     gen_srs(s, a->mode, a->pu, a->w);
8620     return true;
8621 }
8622 
8623 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8624 {
8625     uint32_t mask, val;
8626 
8627     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8628         return false;
8629     }
8630     if (IS_USER(s)) {
8631         /* Implemented as NOP in user mode.  */
8632         return true;
8633     }
8634     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8635 
8636     mask = val = 0;
8637     if (a->imod & 2) {
8638         if (a->A) {
8639             mask |= CPSR_A;
8640         }
8641         if (a->I) {
8642             mask |= CPSR_I;
8643         }
8644         if (a->F) {
8645             mask |= CPSR_F;
8646         }
8647         if (a->imod & 1) {
8648             val |= mask;
8649         }
8650     }
8651     if (a->M) {
8652         mask |= CPSR_M;
8653         val |= a->mode;
8654     }
8655     if (mask) {
8656         gen_set_psr_im(s, mask, 0, val);
8657     }
8658     return true;
8659 }
8660 
8661 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8662 {
8663     TCGv_i32 tmp, addr;
8664 
8665     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8666         return false;
8667     }
8668     if (IS_USER(s)) {
8669         /* Implemented as NOP in user mode.  */
8670         return true;
8671     }
8672 
8673     tmp = tcg_constant_i32(a->im);
8674     /* FAULTMASK */
8675     if (a->F) {
8676         addr = tcg_constant_i32(19);
8677         gen_helper_v7m_msr(tcg_env, addr, tmp);
8678     }
8679     /* PRIMASK */
8680     if (a->I) {
8681         addr = tcg_constant_i32(16);
8682         gen_helper_v7m_msr(tcg_env, addr, tmp);
8683     }
8684     gen_rebuild_hflags(s, false);
8685     gen_lookup_tb(s);
8686     return true;
8687 }
8688 
8689 /*
8690  * Clear-Exclusive, Barriers
8691  */
8692 
8693 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8694 {
8695     if (s->thumb
8696         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8697         : !ENABLE_ARCH_6K) {
8698         return false;
8699     }
8700     gen_clrex(s);
8701     return true;
8702 }
8703 
8704 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8705 {
8706     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8707         return false;
8708     }
8709     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8710     return true;
8711 }
8712 
8713 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8714 {
8715     return trans_DSB(s, NULL);
8716 }
8717 
8718 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8719 {
8720     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8721         return false;
8722     }
8723     /*
8724      * We need to break the TB after this insn to execute
8725      * self-modifying code correctly and also to take
8726      * any pending interrupts immediately.
8727      */
8728     s->base.is_jmp = DISAS_TOO_MANY;
8729     return true;
8730 }
8731 
8732 static bool trans_SB(DisasContext *s, arg_SB *a)
8733 {
8734     if (!dc_isar_feature(aa32_sb, s)) {
8735         return false;
8736     }
8737     /*
8738      * TODO: There is no speculation barrier opcode
8739      * for TCG; MB and end the TB instead.
8740      */
8741     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8742     s->base.is_jmp = DISAS_TOO_MANY;
8743     return true;
8744 }
8745 
8746 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8747 {
8748     if (!ENABLE_ARCH_6) {
8749         return false;
8750     }
8751     if (a->E != (s->be_data == MO_BE)) {
8752         gen_helper_setend(tcg_env);
8753         s->base.is_jmp = DISAS_UPDATE_EXIT;
8754     }
8755     return true;
8756 }
8757 
8758 /*
8759  * Preload instructions
8760  * All are nops, contingent on the appropriate arch level.
8761  */
8762 
8763 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8764 {
8765     return ENABLE_ARCH_5TE;
8766 }
8767 
8768 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8769 {
8770     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8771 }
8772 
8773 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8774 {
8775     return ENABLE_ARCH_7;
8776 }
8777 
8778 /*
8779  * If-then
8780  */
8781 
8782 static bool trans_IT(DisasContext *s, arg_IT *a)
8783 {
8784     int cond_mask = a->cond_mask;
8785 
8786     /*
8787      * No actual code generated for this insn, just setup state.
8788      *
8789      * Combinations of firstcond and mask which set up an 0b1111
8790      * condition are UNPREDICTABLE; we take the CONSTRAINED
8791      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8792      * i.e. both meaning "execute always".
8793      */
8794     s->condexec_cond = (cond_mask >> 4) & 0xe;
8795     s->condexec_mask = cond_mask & 0x1f;
8796     return true;
8797 }
8798 
8799 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8800 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8801 {
8802     TCGv_i32 rn, rm;
8803     DisasCompare c;
8804 
8805     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8806         return false;
8807     }
8808 
8809     if (a->rm == 13) {
8810         /* SEE "Related encodings" (MVE shifts) */
8811         return false;
8812     }
8813 
8814     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8815         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8816         return false;
8817     }
8818 
8819     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8820     rn = tcg_temp_new_i32();
8821     rm = tcg_temp_new_i32();
8822     if (a->rn == 15) {
8823         tcg_gen_movi_i32(rn, 0);
8824     } else {
8825         load_reg_var(s, rn, a->rn);
8826     }
8827     if (a->rm == 15) {
8828         tcg_gen_movi_i32(rm, 0);
8829     } else {
8830         load_reg_var(s, rm, a->rm);
8831     }
8832 
8833     switch (a->op) {
8834     case 0: /* CSEL */
8835         break;
8836     case 1: /* CSINC */
8837         tcg_gen_addi_i32(rm, rm, 1);
8838         break;
8839     case 2: /* CSINV */
8840         tcg_gen_not_i32(rm, rm);
8841         break;
8842     case 3: /* CSNEG */
8843         tcg_gen_neg_i32(rm, rm);
8844         break;
8845     default:
8846         g_assert_not_reached();
8847     }
8848 
8849     arm_test_cc(&c, a->fcond);
8850     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8851 
8852     store_reg(s, a->rd, rn);
8853     return true;
8854 }
8855 
8856 /*
8857  * Legacy decoder.
8858  */
8859 
8860 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8861 {
8862     unsigned int cond = insn >> 28;
8863 
8864     /* M variants do not implement ARM mode; this must raise the INVSTATE
8865      * UsageFault exception.
8866      */
8867     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8868         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8869         return;
8870     }
8871 
8872     if (s->pstate_il) {
8873         /*
8874          * Illegal execution state. This has priority over BTI
8875          * exceptions, but comes after instruction abort exceptions.
8876          */
8877         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8878         return;
8879     }
8880 
8881     if (cond == 0xf) {
8882         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8883          * choose to UNDEF. In ARMv5 and above the space is used
8884          * for miscellaneous unconditional instructions.
8885          */
8886         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8887             unallocated_encoding(s);
8888             return;
8889         }
8890 
8891         /* Unconditional instructions.  */
8892         /* TODO: Perhaps merge these into one decodetree output file.  */
8893         if (disas_a32_uncond(s, insn) ||
8894             disas_vfp_uncond(s, insn) ||
8895             disas_neon_dp(s, insn) ||
8896             disas_neon_ls(s, insn) ||
8897             disas_neon_shared(s, insn)) {
8898             return;
8899         }
8900         /* fall back to legacy decoder */
8901 
8902         if ((insn & 0x0e000f00) == 0x0c000100) {
8903             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8904                 /* iWMMXt register transfer.  */
8905                 if (extract32(s->c15_cpar, 1, 1)) {
8906                     if (!disas_iwmmxt_insn(s, insn)) {
8907                         return;
8908                     }
8909                 }
8910             }
8911         }
8912         goto illegal_op;
8913     }
8914     if (cond != 0xe) {
8915         /* if not always execute, we generate a conditional jump to
8916            next instruction */
8917         arm_skip_unless(s, cond);
8918     }
8919 
8920     /* TODO: Perhaps merge these into one decodetree output file.  */
8921     if (disas_a32(s, insn) ||
8922         disas_vfp(s, insn)) {
8923         return;
8924     }
8925     /* fall back to legacy decoder */
8926     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8927     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8928         if (((insn & 0x0c000e00) == 0x0c000000)
8929             && ((insn & 0x03000000) != 0x03000000)) {
8930             /* Coprocessor insn, coprocessor 0 or 1 */
8931             disas_xscale_insn(s, insn);
8932             return;
8933         }
8934     }
8935 
8936 illegal_op:
8937     unallocated_encoding(s);
8938 }
8939 
8940 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8941 {
8942     /*
8943      * Return true if this is a 16 bit instruction. We must be precise
8944      * about this (matching the decode).
8945      */
8946     if ((insn >> 11) < 0x1d) {
8947         /* Definitely a 16-bit instruction */
8948         return true;
8949     }
8950 
8951     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8952      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8953      * end up actually treating this as two 16-bit insns, though,
8954      * if it's half of a bl/blx pair that might span a page boundary.
8955      */
8956     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8957         arm_dc_feature(s, ARM_FEATURE_M)) {
8958         /* Thumb2 cores (including all M profile ones) always treat
8959          * 32-bit insns as 32-bit.
8960          */
8961         return false;
8962     }
8963 
8964     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8965         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8966          * is not on the next page; we merge this into a 32-bit
8967          * insn.
8968          */
8969         return false;
8970     }
8971     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8972      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8973      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8974      *  -- handle as single 16 bit insn
8975      */
8976     return true;
8977 }
8978 
8979 /* Translate a 32-bit thumb instruction. */
8980 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8981 {
8982     /*
8983      * ARMv6-M supports a limited subset of Thumb2 instructions.
8984      * Other Thumb1 architectures allow only 32-bit
8985      * combined BL/BLX prefix and suffix.
8986      */
8987     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8988         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8989         int i;
8990         bool found = false;
8991         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8992                                                0xf3b08040 /* dsb */,
8993                                                0xf3b08050 /* dmb */,
8994                                                0xf3b08060 /* isb */,
8995                                                0xf3e08000 /* mrs */,
8996                                                0xf000d000 /* bl */};
8997         static const uint32_t armv6m_mask[] = {0xffe0d000,
8998                                                0xfff0d0f0,
8999                                                0xfff0d0f0,
9000                                                0xfff0d0f0,
9001                                                0xffe0d000,
9002                                                0xf800d000};
9003 
9004         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9005             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9006                 found = true;
9007                 break;
9008             }
9009         }
9010         if (!found) {
9011             goto illegal_op;
9012         }
9013     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9014         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9015             unallocated_encoding(s);
9016             return;
9017         }
9018     }
9019 
9020     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9021         /*
9022          * NOCP takes precedence over any UNDEF for (almost) the
9023          * entire wide range of coprocessor-space encodings, so check
9024          * for it first before proceeding to actually decode eg VFP
9025          * insns. This decode also handles the few insns which are
9026          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9027          */
9028         if (disas_m_nocp(s, insn)) {
9029             return;
9030         }
9031     }
9032 
9033     if ((insn & 0xef000000) == 0xef000000) {
9034         /*
9035          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9036          * transform into
9037          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9038          */
9039         uint32_t a32_insn = (insn & 0xe2ffffff) |
9040             ((insn & (1 << 28)) >> 4) | (1 << 28);
9041 
9042         if (disas_neon_dp(s, a32_insn)) {
9043             return;
9044         }
9045     }
9046 
9047     if ((insn & 0xff100000) == 0xf9000000) {
9048         /*
9049          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9050          * transform into
9051          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9052          */
9053         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9054 
9055         if (disas_neon_ls(s, a32_insn)) {
9056             return;
9057         }
9058     }
9059 
9060     /*
9061      * TODO: Perhaps merge these into one decodetree output file.
9062      * Note disas_vfp is written for a32 with cond field in the
9063      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9064      */
9065     if (disas_t32(s, insn) ||
9066         disas_vfp_uncond(s, insn) ||
9067         disas_neon_shared(s, insn) ||
9068         disas_mve(s, insn) ||
9069         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9070         return;
9071     }
9072 
9073 illegal_op:
9074     unallocated_encoding(s);
9075 }
9076 
9077 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9078 {
9079     if (!disas_t16(s, insn)) {
9080         unallocated_encoding(s);
9081     }
9082 }
9083 
9084 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9085 {
9086     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9087      * (False positives are OK, false negatives are not.)
9088      * We know this is a Thumb insn, and our caller ensures we are
9089      * only called if dc->base.pc_next is less than 4 bytes from the page
9090      * boundary, so we cross the page if the first 16 bits indicate
9091      * that this is a 32 bit insn.
9092      */
9093     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9094 
9095     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9096 }
9097 
9098 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9099 {
9100     DisasContext *dc = container_of(dcbase, DisasContext, base);
9101     CPUARMState *env = cpu_env(cs);
9102     ARMCPU *cpu = env_archcpu(env);
9103     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9104     uint32_t condexec, core_mmu_idx;
9105 
9106     dc->isar = &cpu->isar;
9107     dc->condjmp = 0;
9108     dc->pc_save = dc->base.pc_first;
9109     dc->aarch64 = false;
9110     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9111     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9112     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9113     /*
9114      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9115      * is always the IT bits. On M-profile, some of the reserved encodings
9116      * of IT are used instead to indicate either ICI or ECI, which
9117      * indicate partial progress of a restartable insn that was interrupted
9118      * partway through by an exception:
9119      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9120      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9121      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9122      * insn, behave normally".
9123      */
9124     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9125     dc->eci_handled = false;
9126     if (condexec & 0xf) {
9127         dc->condexec_mask = (condexec & 0xf) << 1;
9128         dc->condexec_cond = condexec >> 4;
9129     } else {
9130         if (arm_feature(env, ARM_FEATURE_M)) {
9131             dc->eci = condexec >> 4;
9132         }
9133     }
9134 
9135     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9136     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9137     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9138 #if !defined(CONFIG_USER_ONLY)
9139     dc->user = (dc->current_el == 0);
9140 #endif
9141     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9142     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9143     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9144     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9145     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9146 
9147     if (arm_feature(env, ARM_FEATURE_M)) {
9148         dc->vfp_enabled = 1;
9149         dc->be_data = MO_TE;
9150         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9151         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9152         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9153         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9154         dc->v7m_new_fp_ctxt_needed =
9155             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9156         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9157         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9158     } else {
9159         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9160         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9161         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9162         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9163         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9164             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9165         } else {
9166             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9167             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9168         }
9169         dc->sme_trap_nonstreaming =
9170             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9171     }
9172     dc->lse2 = false; /* applies only to aarch64 */
9173     dc->cp_regs = cpu->cp_regs;
9174     dc->features = env->features;
9175 
9176     /* Single step state. The code-generation logic here is:
9177      *  SS_ACTIVE == 0:
9178      *   generate code with no special handling for single-stepping (except
9179      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9180      *   this happens anyway because those changes are all system register or
9181      *   PSTATE writes).
9182      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9183      *   emit code for one insn
9184      *   emit code to clear PSTATE.SS
9185      *   emit code to generate software step exception for completed step
9186      *   end TB (as usual for having generated an exception)
9187      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9188      *   emit code to generate a software step exception
9189      *   end the TB
9190      */
9191     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9192     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9193     dc->is_ldex = false;
9194 
9195     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9196 
9197     /* If architectural single step active, limit to 1.  */
9198     if (dc->ss_active) {
9199         dc->base.max_insns = 1;
9200     }
9201 
9202     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9203        to those left on the page.  */
9204     if (!dc->thumb) {
9205         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9206         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9207     }
9208 
9209     cpu_V0 = tcg_temp_new_i64();
9210     cpu_V1 = tcg_temp_new_i64();
9211     cpu_M0 = tcg_temp_new_i64();
9212 }
9213 
9214 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9215 {
9216     DisasContext *dc = container_of(dcbase, DisasContext, base);
9217 
9218     /* A note on handling of the condexec (IT) bits:
9219      *
9220      * We want to avoid the overhead of having to write the updated condexec
9221      * bits back to the CPUARMState for every instruction in an IT block. So:
9222      * (1) if the condexec bits are not already zero then we write
9223      * zero back into the CPUARMState now. This avoids complications trying
9224      * to do it at the end of the block. (For example if we don't do this
9225      * it's hard to identify whether we can safely skip writing condexec
9226      * at the end of the TB, which we definitely want to do for the case
9227      * where a TB doesn't do anything with the IT state at all.)
9228      * (2) if we are going to leave the TB then we call gen_set_condexec()
9229      * which will write the correct value into CPUARMState if zero is wrong.
9230      * This is done both for leaving the TB at the end, and for leaving
9231      * it because of an exception we know will happen, which is done in
9232      * gen_exception_insn(). The latter is necessary because we need to
9233      * leave the TB with the PC/IT state just prior to execution of the
9234      * instruction which caused the exception.
9235      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9236      * then the CPUARMState will be wrong and we need to reset it.
9237      * This is handled in the same way as restoration of the
9238      * PC in these situations; we save the value of the condexec bits
9239      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9240      * then uses this to restore them after an exception.
9241      *
9242      * Note that there are no instructions which can read the condexec
9243      * bits, and none which can write non-static values to them, so
9244      * we don't need to care about whether CPUARMState is correct in the
9245      * middle of a TB.
9246      */
9247 
9248     /* Reset the conditional execution bits immediately. This avoids
9249        complications trying to do it at the end of the block.  */
9250     if (dc->condexec_mask || dc->condexec_cond) {
9251         store_cpu_field_constant(0, condexec_bits);
9252     }
9253 }
9254 
9255 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9256 {
9257     DisasContext *dc = container_of(dcbase, DisasContext, base);
9258     /*
9259      * The ECI/ICI bits share PSR bits with the IT bits, so we
9260      * need to reconstitute the bits from the split-out DisasContext
9261      * fields here.
9262      */
9263     uint32_t condexec_bits;
9264     target_ulong pc_arg = dc->base.pc_next;
9265 
9266     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9267         pc_arg &= ~TARGET_PAGE_MASK;
9268     }
9269     if (dc->eci) {
9270         condexec_bits = dc->eci << 4;
9271     } else {
9272         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9273     }
9274     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9275     dc->insn_start_updated = false;
9276 }
9277 
9278 static bool arm_check_kernelpage(DisasContext *dc)
9279 {
9280 #ifdef CONFIG_USER_ONLY
9281     /* Intercept jump to the magic kernel page.  */
9282     if (dc->base.pc_next >= 0xffff0000) {
9283         /* We always get here via a jump, so know we are not in a
9284            conditional execution block.  */
9285         gen_exception_internal(EXCP_KERNEL_TRAP);
9286         dc->base.is_jmp = DISAS_NORETURN;
9287         return true;
9288     }
9289 #endif
9290     return false;
9291 }
9292 
9293 static bool arm_check_ss_active(DisasContext *dc)
9294 {
9295     if (dc->ss_active && !dc->pstate_ss) {
9296         /* Singlestep state is Active-pending.
9297          * If we're in this state at the start of a TB then either
9298          *  a) we just took an exception to an EL which is being debugged
9299          *     and this is the first insn in the exception handler
9300          *  b) debug exceptions were masked and we just unmasked them
9301          *     without changing EL (eg by clearing PSTATE.D)
9302          * In either case we're going to take a swstep exception in the
9303          * "did not step an insn" case, and so the syndrome ISV and EX
9304          * bits should be zero.
9305          */
9306         assert(dc->base.num_insns == 1);
9307         gen_swstep_exception(dc, 0, 0);
9308         dc->base.is_jmp = DISAS_NORETURN;
9309         return true;
9310     }
9311 
9312     return false;
9313 }
9314 
9315 static void arm_post_translate_insn(DisasContext *dc)
9316 {
9317     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9318         if (dc->pc_save != dc->condlabel.pc_save) {
9319             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9320         }
9321         gen_set_label(dc->condlabel.label);
9322         dc->condjmp = 0;
9323     }
9324 }
9325 
9326 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9327 {
9328     DisasContext *dc = container_of(dcbase, DisasContext, base);
9329     CPUARMState *env = cpu_env(cpu);
9330     uint32_t pc = dc->base.pc_next;
9331     unsigned int insn;
9332 
9333     /* Singlestep exceptions have the highest priority. */
9334     if (arm_check_ss_active(dc)) {
9335         dc->base.pc_next = pc + 4;
9336         return;
9337     }
9338 
9339     if (pc & 3) {
9340         /*
9341          * PC alignment fault.  This has priority over the instruction abort
9342          * that we would receive from a translation fault via arm_ldl_code
9343          * (or the execution of the kernelpage entrypoint). This should only
9344          * be possible after an indirect branch, at the start of the TB.
9345          */
9346         assert(dc->base.num_insns == 1);
9347         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9348         dc->base.is_jmp = DISAS_NORETURN;
9349         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9350         return;
9351     }
9352 
9353     if (arm_check_kernelpage(dc)) {
9354         dc->base.pc_next = pc + 4;
9355         return;
9356     }
9357 
9358     dc->pc_curr = pc;
9359     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9360     dc->insn = insn;
9361     dc->base.pc_next = pc + 4;
9362     disas_arm_insn(dc, insn);
9363 
9364     arm_post_translate_insn(dc);
9365 
9366     /* ARM is a fixed-length ISA.  We performed the cross-page check
9367        in init_disas_context by adjusting max_insns.  */
9368 }
9369 
9370 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9371 {
9372     /* Return true if this Thumb insn is always unconditional,
9373      * even inside an IT block. This is true of only a very few
9374      * instructions: BKPT, HLT, and SG.
9375      *
9376      * A larger class of instructions are UNPREDICTABLE if used
9377      * inside an IT block; we do not need to detect those here, because
9378      * what we do by default (perform the cc check and update the IT
9379      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9380      * choice for those situations.
9381      *
9382      * insn is either a 16-bit or a 32-bit instruction; the two are
9383      * distinguishable because for the 16-bit case the top 16 bits
9384      * are zeroes, and that isn't a valid 32-bit encoding.
9385      */
9386     if ((insn & 0xffffff00) == 0xbe00) {
9387         /* BKPT */
9388         return true;
9389     }
9390 
9391     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9392         !arm_dc_feature(s, ARM_FEATURE_M)) {
9393         /* HLT: v8A only. This is unconditional even when it is going to
9394          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9395          * For v7 cores this was a plain old undefined encoding and so
9396          * honours its cc check. (We might be using the encoding as
9397          * a semihosting trap, but we don't change the cc check behaviour
9398          * on that account, because a debugger connected to a real v7A
9399          * core and emulating semihosting traps by catching the UNDEF
9400          * exception would also only see cases where the cc check passed.
9401          * No guest code should be trying to do a HLT semihosting trap
9402          * in an IT block anyway.
9403          */
9404         return true;
9405     }
9406 
9407     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9408         arm_dc_feature(s, ARM_FEATURE_M)) {
9409         /* SG: v8M only */
9410         return true;
9411     }
9412 
9413     return false;
9414 }
9415 
9416 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9417 {
9418     DisasContext *dc = container_of(dcbase, DisasContext, base);
9419     CPUARMState *env = cpu_env(cpu);
9420     uint32_t pc = dc->base.pc_next;
9421     uint32_t insn;
9422     bool is_16bit;
9423     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9424     TCGOp *insn_eci_rewind = NULL;
9425     target_ulong insn_eci_pc_save = -1;
9426 
9427     /* Misaligned thumb PC is architecturally impossible. */
9428     assert((dc->base.pc_next & 1) == 0);
9429 
9430     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9431         dc->base.pc_next = pc + 2;
9432         return;
9433     }
9434 
9435     dc->pc_curr = pc;
9436     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9437     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9438     pc += 2;
9439     if (!is_16bit) {
9440         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9441         insn = insn << 16 | insn2;
9442         pc += 2;
9443     }
9444     dc->base.pc_next = pc;
9445     dc->insn = insn;
9446 
9447     if (dc->pstate_il) {
9448         /*
9449          * Illegal execution state. This has priority over BTI
9450          * exceptions, but comes after instruction abort exceptions.
9451          */
9452         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9453         return;
9454     }
9455 
9456     if (dc->eci) {
9457         /*
9458          * For M-profile continuable instructions, ECI/ICI handling
9459          * falls into these cases:
9460          *  - interrupt-continuable instructions
9461          *     These are the various load/store multiple insns (both
9462          *     integer and fp). The ICI bits indicate the register
9463          *     where the load/store can resume. We make the IMPDEF
9464          *     choice to always do "instruction restart", ie ignore
9465          *     the ICI value and always execute the ldm/stm from the
9466          *     start. So all we need to do is zero PSR.ICI if the
9467          *     insn executes.
9468          *  - MVE instructions subject to beat-wise execution
9469          *     Here the ECI bits indicate which beats have already been
9470          *     executed, and we must honour this. Each insn of this
9471          *     type will handle it correctly. We will update PSR.ECI
9472          *     in the helper function for the insn (some ECI values
9473          *     mean that the following insn also has been partially
9474          *     executed).
9475          *  - Special cases which don't advance ECI
9476          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9477          *     bits untouched.
9478          *  - all other insns (the common case)
9479          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9480          *     We place a rewind-marker here. Insns in the previous
9481          *     three categories will set a flag in the DisasContext.
9482          *     If the flag isn't set after we call disas_thumb_insn()
9483          *     or disas_thumb2_insn() then we know we have a "some other
9484          *     insn" case. We will rewind to the marker (ie throwing away
9485          *     all the generated code) and instead emit "take exception".
9486          */
9487         insn_eci_rewind = tcg_last_op();
9488         insn_eci_pc_save = dc->pc_save;
9489     }
9490 
9491     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9492         uint32_t cond = dc->condexec_cond;
9493 
9494         /*
9495          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9496          * "always"; 0xf is not "never".
9497          */
9498         if (cond < 0x0e) {
9499             arm_skip_unless(dc, cond);
9500         }
9501     }
9502 
9503     if (is_16bit) {
9504         disas_thumb_insn(dc, insn);
9505     } else {
9506         disas_thumb2_insn(dc, insn);
9507     }
9508 
9509     /* Advance the Thumb condexec condition.  */
9510     if (dc->condexec_mask) {
9511         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9512                              ((dc->condexec_mask >> 4) & 1));
9513         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9514         if (dc->condexec_mask == 0) {
9515             dc->condexec_cond = 0;
9516         }
9517     }
9518 
9519     if (dc->eci && !dc->eci_handled) {
9520         /*
9521          * Insn wasn't valid for ECI/ICI at all: undo what we
9522          * just generated and instead emit an exception
9523          */
9524         tcg_remove_ops_after(insn_eci_rewind);
9525         dc->pc_save = insn_eci_pc_save;
9526         dc->condjmp = 0;
9527         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9528     }
9529 
9530     arm_post_translate_insn(dc);
9531 
9532     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9533      * will touch a new page.  This ensures that prefetch aborts occur at
9534      * the right place.
9535      *
9536      * We want to stop the TB if the next insn starts in a new page,
9537      * or if it spans between this page and the next. This means that
9538      * if we're looking at the last halfword in the page we need to
9539      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9540      * or a 32-bit Thumb insn (which won't).
9541      * This is to avoid generating a silly TB with a single 16-bit insn
9542      * in it at the end of this page (which would execute correctly
9543      * but isn't very efficient).
9544      */
9545     if (dc->base.is_jmp == DISAS_NEXT
9546         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9547             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9548                 && insn_crosses_page(env, dc)))) {
9549         dc->base.is_jmp = DISAS_TOO_MANY;
9550     }
9551 }
9552 
9553 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9554 {
9555     DisasContext *dc = container_of(dcbase, DisasContext, base);
9556 
9557     /* At this stage dc->condjmp will only be set when the skipped
9558        instruction was a conditional branch or trap, and the PC has
9559        already been written.  */
9560     gen_set_condexec(dc);
9561     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9562         /* Exception return branches need some special case code at the
9563          * end of the TB, which is complex enough that it has to
9564          * handle the single-step vs not and the condition-failed
9565          * insn codepath itself.
9566          */
9567         gen_bx_excret_final_code(dc);
9568     } else if (unlikely(dc->ss_active)) {
9569         /* Unconditional and "condition passed" instruction codepath. */
9570         switch (dc->base.is_jmp) {
9571         case DISAS_SWI:
9572             gen_ss_advance(dc);
9573             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9574             break;
9575         case DISAS_HVC:
9576             gen_ss_advance(dc);
9577             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9578             break;
9579         case DISAS_SMC:
9580             gen_ss_advance(dc);
9581             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9582             break;
9583         case DISAS_NEXT:
9584         case DISAS_TOO_MANY:
9585         case DISAS_UPDATE_EXIT:
9586         case DISAS_UPDATE_NOCHAIN:
9587             gen_update_pc(dc, curr_insn_len(dc));
9588             /* fall through */
9589         default:
9590             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9591             gen_singlestep_exception(dc);
9592             break;
9593         case DISAS_NORETURN:
9594             break;
9595         }
9596     } else {
9597         /* While branches must always occur at the end of an IT block,
9598            there are a few other things that can cause us to terminate
9599            the TB in the middle of an IT block:
9600             - Exception generating instructions (bkpt, swi, undefined).
9601             - Page boundaries.
9602             - Hardware watchpoints.
9603            Hardware breakpoints have already been handled and skip this code.
9604          */
9605         switch (dc->base.is_jmp) {
9606         case DISAS_NEXT:
9607         case DISAS_TOO_MANY:
9608             gen_goto_tb(dc, 1, curr_insn_len(dc));
9609             break;
9610         case DISAS_UPDATE_NOCHAIN:
9611             gen_update_pc(dc, curr_insn_len(dc));
9612             /* fall through */
9613         case DISAS_JUMP:
9614             gen_goto_ptr();
9615             break;
9616         case DISAS_UPDATE_EXIT:
9617             gen_update_pc(dc, curr_insn_len(dc));
9618             /* fall through */
9619         default:
9620             /* indicate that the hash table must be used to find the next TB */
9621             tcg_gen_exit_tb(NULL, 0);
9622             break;
9623         case DISAS_NORETURN:
9624             /* nothing more to generate */
9625             break;
9626         case DISAS_WFI:
9627             gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc)));
9628             /*
9629              * The helper doesn't necessarily throw an exception, but we
9630              * must go back to the main loop to check for interrupts anyway.
9631              */
9632             tcg_gen_exit_tb(NULL, 0);
9633             break;
9634         case DISAS_WFE:
9635             gen_helper_wfe(tcg_env);
9636             break;
9637         case DISAS_YIELD:
9638             gen_helper_yield(tcg_env);
9639             break;
9640         case DISAS_SWI:
9641             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9642             break;
9643         case DISAS_HVC:
9644             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9645             break;
9646         case DISAS_SMC:
9647             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9648             break;
9649         }
9650     }
9651 
9652     if (dc->condjmp) {
9653         /* "Condition failed" instruction codepath for the branch/trap insn */
9654         set_disas_label(dc, dc->condlabel);
9655         gen_set_condexec(dc);
9656         if (unlikely(dc->ss_active)) {
9657             gen_update_pc(dc, curr_insn_len(dc));
9658             gen_singlestep_exception(dc);
9659         } else {
9660             gen_goto_tb(dc, 1, curr_insn_len(dc));
9661         }
9662     }
9663 }
9664 
9665 static const TranslatorOps arm_translator_ops = {
9666     .init_disas_context = arm_tr_init_disas_context,
9667     .tb_start           = arm_tr_tb_start,
9668     .insn_start         = arm_tr_insn_start,
9669     .translate_insn     = arm_tr_translate_insn,
9670     .tb_stop            = arm_tr_tb_stop,
9671 };
9672 
9673 static const TranslatorOps thumb_translator_ops = {
9674     .init_disas_context = arm_tr_init_disas_context,
9675     .tb_start           = arm_tr_tb_start,
9676     .insn_start         = arm_tr_insn_start,
9677     .translate_insn     = thumb_tr_translate_insn,
9678     .tb_stop            = arm_tr_tb_stop,
9679 };
9680 
9681 /* generate intermediate code for basic block 'tb'.  */
9682 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9683                            vaddr pc, void *host_pc)
9684 {
9685     DisasContext dc = { };
9686     const TranslatorOps *ops = &arm_translator_ops;
9687     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9688 
9689     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9690         ops = &thumb_translator_ops;
9691     }
9692 #ifdef TARGET_AARCH64
9693     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9694         ops = &aarch64_translator_ops;
9695     }
9696 #endif
9697 
9698     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9699 }
9700