xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 2df1eb2756658dc2c0e9d739cec6929e74e6c3b0)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(tcg_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, tcg_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, tcg_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(tcg_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(tcg_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(tcg_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(tcg_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(tcg_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(tcg_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(tcg_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
904         /*
905          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
906          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
907          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
908          */
909         MO_ALIGN_16
910     };
911     g_assert(i < ARRAY_SIZE(mop_align));
912     return mop_align[i];
913 }
914 
915 /*
916  * Abstractions of "generate code to do a guest load/store for
917  * AArch32", where a vaddr is always 32 bits (and is zero
918  * extended if we're a 64 bit core) and  data is also
919  * 32 bits unless specifically doing a 64 bit access.
920  * These functions work like tcg_gen_qemu_{ld,st}* except
921  * that the address argument is TCGv_i32 rather than TCGv.
922  */
923 
924 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926     TCGv addr = tcg_temp_new();
927     tcg_gen_extu_i32_tl(addr, a32);
928 
929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932     }
933     return addr;
934 }
935 
936 /*
937  * Internal routines are used for NEON cases where the endianness
938  * and/or alignment has already been taken into account and manipulated.
939  */
940 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_ld_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951     tcg_gen_qemu_st_i32(val, addr, index, opc);
952 }
953 
954 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
955                               TCGv_i32 a32, int index, MemOp opc)
956 {
957     TCGv addr = gen_aa32_addr(s, a32, opc);
958 
959     tcg_gen_qemu_ld_i64(val, addr, index, opc);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
963         tcg_gen_rotri_i64(val, val, 32);
964     }
965 }
966 
967 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971 
972     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
973     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
974         TCGv_i64 tmp = tcg_temp_new_i64();
975         tcg_gen_rotri_i64(tmp, val, 32);
976         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
977     } else {
978         tcg_gen_qemu_st_i64(val, addr, index, opc);
979     }
980 }
981 
982 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                      int index, MemOp opc)
1002 {
1003     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004 }
1005 
1006 #define DO_GEN_LD(SUFF, OPC)                                            \
1007     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 #define DO_GEN_ST(SUFF, OPC)                                            \
1014     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                          TCGv_i32 a32, int index)       \
1016     {                                                                   \
1017         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018     }
1019 
1020 static inline void gen_hvc(DisasContext *s, int imm16)
1021 {
1022     /* The pre HVC helper handles cases when HVC gets trapped
1023      * as an undefined insn by runtime configuration (ie before
1024      * the insn really executes).
1025      */
1026     gen_update_pc(s, 0);
1027     gen_helper_pre_hvc(tcg_env);
1028     /* Otherwise we will treat this as a real exception which
1029      * happens after execution of the insn. (The distinction matters
1030      * for the PC value reported to the exception handler and also
1031      * for single stepping.)
1032      */
1033     s->svc_imm = imm16;
1034     gen_update_pc(s, curr_insn_len(s));
1035     s->base.is_jmp = DISAS_HVC;
1036 }
1037 
1038 static inline void gen_smc(DisasContext *s)
1039 {
1040     /* As with HVC, we may take an exception either before or after
1041      * the insn executes.
1042      */
1043     gen_update_pc(s, 0);
1044     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc()));
1045     gen_update_pc(s, curr_insn_len(s));
1046     s->base.is_jmp = DISAS_SMC;
1047 }
1048 
1049 static void gen_exception_internal_insn(DisasContext *s, int excp)
1050 {
1051     gen_set_condexec(s);
1052     gen_update_pc(s, 0);
1053     gen_exception_internal(excp);
1054     s->base.is_jmp = DISAS_NORETURN;
1055 }
1056 
1057 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058 {
1059     gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp),
1060                                           tcg_constant_i32(syndrome), tcg_el);
1061 }
1062 
1063 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064 {
1065     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066 }
1067 
1068 static void gen_exception(int excp, uint32_t syndrome)
1069 {
1070     gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp),
1071                                        tcg_constant_i32(syndrome));
1072 }
1073 
1074 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1076 {
1077     if (s->aarch64) {
1078         gen_a64_update_pc(s, pc_diff);
1079     } else {
1080         gen_set_condexec(s);
1081         gen_update_pc(s, pc_diff);
1082     }
1083     gen_exception_el_v(excp, syn, tcg_el);
1084     s->base.is_jmp = DISAS_NORETURN;
1085 }
1086 
1087 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                            uint32_t syn, uint32_t target_el)
1089 {
1090     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                             tcg_constant_i32(target_el));
1092 }
1093 
1094 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                         int excp, uint32_t syn)
1096 {
1097     if (s->aarch64) {
1098         gen_a64_update_pc(s, pc_diff);
1099     } else {
1100         gen_set_condexec(s);
1101         gen_update_pc(s, pc_diff);
1102     }
1103     gen_exception(excp, syn);
1104     s->base.is_jmp = DISAS_NORETURN;
1105 }
1106 
1107 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108 {
1109     gen_set_condexec(s);
1110     gen_update_pc(s, 0);
1111     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn));
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 void unallocated_encoding(DisasContext *s)
1116 {
1117     /* Unallocated and reserved encodings are uncategorized */
1118     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119 }
1120 
1121 /* Force a TB lookup after an instruction that changes the CPU state.  */
1122 void gen_lookup_tb(DisasContext *s)
1123 {
1124     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125     s->base.is_jmp = DISAS_EXIT;
1126 }
1127 
1128 static inline void gen_hlt(DisasContext *s, int imm)
1129 {
1130     /* HLT. This has two purposes.
1131      * Architecturally, it is an external halting debug instruction.
1132      * Since QEMU doesn't implement external debug, we treat this as
1133      * it is required for halting debug disabled: it will UNDEF.
1134      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136      * must trigger semihosting even for ARMv7 and earlier, where
1137      * HLT was an undefined encoding.
1138      * In system mode, we don't allow userspace access to
1139      * semihosting, to provide some semblance of security
1140      * (and for consistency with our 32-bit semihosting).
1141      */
1142     if (semihosting_enabled(s->current_el == 0) &&
1143         (imm == (s->thumb ? 0x3c : 0xf000))) {
1144         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145         return;
1146     }
1147 
1148     unallocated_encoding(s);
1149 }
1150 
1151 /*
1152  * Return the offset of a "full" NEON Dreg.
1153  */
1154 long neon_full_reg_offset(unsigned reg)
1155 {
1156     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157 }
1158 
1159 /*
1160  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161  * where 0 is the least significant end of the register.
1162  */
1163 long neon_element_offset(int reg, int element, MemOp memop)
1164 {
1165     int element_size = 1 << (memop & MO_SIZE);
1166     int ofs = element * element_size;
1167 #if HOST_BIG_ENDIAN
1168     /*
1169      * Calculate the offset assuming fully little-endian,
1170      * then XOR to account for the order of the 8-byte units.
1171      */
1172     if (element_size < 8) {
1173         ofs ^= 8 - element_size;
1174     }
1175 #endif
1176     return neon_full_reg_offset(reg) + ofs;
1177 }
1178 
1179 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180 long vfp_reg_offset(bool dp, unsigned reg)
1181 {
1182     if (dp) {
1183         return neon_element_offset(reg, 0, MO_64);
1184     } else {
1185         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186     }
1187 }
1188 
1189 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190 {
1191     long off = neon_element_offset(reg, ele, memop);
1192 
1193     switch (memop) {
1194     case MO_SB:
1195         tcg_gen_ld8s_i32(dest, tcg_env, off);
1196         break;
1197     case MO_UB:
1198         tcg_gen_ld8u_i32(dest, tcg_env, off);
1199         break;
1200     case MO_SW:
1201         tcg_gen_ld16s_i32(dest, tcg_env, off);
1202         break;
1203     case MO_UW:
1204         tcg_gen_ld16u_i32(dest, tcg_env, off);
1205         break;
1206     case MO_UL:
1207     case MO_SL:
1208         tcg_gen_ld_i32(dest, tcg_env, off);
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213 }
1214 
1215 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216 {
1217     long off = neon_element_offset(reg, ele, memop);
1218 
1219     switch (memop) {
1220     case MO_SL:
1221         tcg_gen_ld32s_i64(dest, tcg_env, off);
1222         break;
1223     case MO_UL:
1224         tcg_gen_ld32u_i64(dest, tcg_env, off);
1225         break;
1226     case MO_UQ:
1227         tcg_gen_ld_i64(dest, tcg_env, off);
1228         break;
1229     default:
1230         g_assert_not_reached();
1231     }
1232 }
1233 
1234 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_8:
1240         tcg_gen_st8_i32(src, tcg_env, off);
1241         break;
1242     case MO_16:
1243         tcg_gen_st16_i32(src, tcg_env, off);
1244         break;
1245     case MO_32:
1246         tcg_gen_st_i32(src, tcg_env, off);
1247         break;
1248     default:
1249         g_assert_not_reached();
1250     }
1251 }
1252 
1253 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254 {
1255     long off = neon_element_offset(reg, ele, memop);
1256 
1257     switch (memop) {
1258     case MO_32:
1259         tcg_gen_st32_i64(src, tcg_env, off);
1260         break;
1261     case MO_64:
1262         tcg_gen_st_i64(src, tcg_env, off);
1263         break;
1264     default:
1265         g_assert_not_reached();
1266     }
1267 }
1268 
1269 #define ARM_CP_RW_BIT   (1 << 20)
1270 
1271 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272 {
1273     tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274 }
1275 
1276 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277 {
1278     tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279 }
1280 
1281 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282 {
1283     TCGv_i32 var = tcg_temp_new_i32();
1284     tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285     return var;
1286 }
1287 
1288 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289 {
1290     tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291 }
1292 
1293 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294 {
1295     iwmmxt_store_reg(cpu_M0, rn);
1296 }
1297 
1298 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299 {
1300     iwmmxt_load_reg(cpu_M0, rn);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316 {
1317     iwmmxt_load_reg(cpu_V1, rn);
1318     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319 }
1320 
1321 #define IWMMXT_OP(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV(name) \
1329 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330 { \
1331     iwmmxt_load_reg(cpu_V1, rn); \
1332     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
1333 }
1334 
1335 #define IWMMXT_OP_ENV_SIZE(name) \
1336 IWMMXT_OP_ENV(name##b) \
1337 IWMMXT_OP_ENV(name##w) \
1338 IWMMXT_OP_ENV(name##l)
1339 
1340 #define IWMMXT_OP_ENV1(name) \
1341 static inline void gen_op_iwmmxt_##name##_M0(void) \
1342 { \
1343     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
1344 }
1345 
1346 IWMMXT_OP(maddsq)
1347 IWMMXT_OP(madduq)
1348 IWMMXT_OP(sadb)
1349 IWMMXT_OP(sadw)
1350 IWMMXT_OP(mulslw)
1351 IWMMXT_OP(mulshw)
1352 IWMMXT_OP(mululw)
1353 IWMMXT_OP(muluhw)
1354 IWMMXT_OP(macsw)
1355 IWMMXT_OP(macuw)
1356 
1357 IWMMXT_OP_ENV_SIZE(unpackl)
1358 IWMMXT_OP_ENV_SIZE(unpackh)
1359 
1360 IWMMXT_OP_ENV1(unpacklub)
1361 IWMMXT_OP_ENV1(unpackluw)
1362 IWMMXT_OP_ENV1(unpacklul)
1363 IWMMXT_OP_ENV1(unpackhub)
1364 IWMMXT_OP_ENV1(unpackhuw)
1365 IWMMXT_OP_ENV1(unpackhul)
1366 IWMMXT_OP_ENV1(unpacklsb)
1367 IWMMXT_OP_ENV1(unpacklsw)
1368 IWMMXT_OP_ENV1(unpacklsl)
1369 IWMMXT_OP_ENV1(unpackhsb)
1370 IWMMXT_OP_ENV1(unpackhsw)
1371 IWMMXT_OP_ENV1(unpackhsl)
1372 
1373 IWMMXT_OP_ENV_SIZE(cmpeq)
1374 IWMMXT_OP_ENV_SIZE(cmpgtu)
1375 IWMMXT_OP_ENV_SIZE(cmpgts)
1376 
1377 IWMMXT_OP_ENV_SIZE(mins)
1378 IWMMXT_OP_ENV_SIZE(minu)
1379 IWMMXT_OP_ENV_SIZE(maxs)
1380 IWMMXT_OP_ENV_SIZE(maxu)
1381 
1382 IWMMXT_OP_ENV_SIZE(subn)
1383 IWMMXT_OP_ENV_SIZE(addn)
1384 IWMMXT_OP_ENV_SIZE(subu)
1385 IWMMXT_OP_ENV_SIZE(addu)
1386 IWMMXT_OP_ENV_SIZE(subs)
1387 IWMMXT_OP_ENV_SIZE(adds)
1388 
1389 IWMMXT_OP_ENV(avgb0)
1390 IWMMXT_OP_ENV(avgb1)
1391 IWMMXT_OP_ENV(avgw0)
1392 IWMMXT_OP_ENV(avgw1)
1393 
1394 IWMMXT_OP_ENV(packuw)
1395 IWMMXT_OP_ENV(packul)
1396 IWMMXT_OP_ENV(packuq)
1397 IWMMXT_OP_ENV(packsw)
1398 IWMMXT_OP_ENV(packsl)
1399 IWMMXT_OP_ENV(packsq)
1400 
1401 static void gen_op_iwmmxt_set_mup(void)
1402 {
1403     TCGv_i32 tmp;
1404     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405     tcg_gen_ori_i32(tmp, tmp, 2);
1406     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407 }
1408 
1409 static void gen_op_iwmmxt_set_cup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 1);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_setpsr_nz(void)
1418 {
1419     TCGv_i32 tmp = tcg_temp_new_i32();
1420     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422 }
1423 
1424 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425 {
1426     iwmmxt_load_reg(cpu_V1, rn);
1427     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429 }
1430 
1431 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                      TCGv_i32 dest)
1433 {
1434     int rd;
1435     uint32_t offset;
1436     TCGv_i32 tmp;
1437 
1438     rd = (insn >> 16) & 0xf;
1439     tmp = load_reg(s, rd);
1440 
1441     offset = (insn & 0xff) << ((insn >> 7) & 2);
1442     if (insn & (1 << 24)) {
1443         /* Pre indexed */
1444         if (insn & (1 << 23))
1445             tcg_gen_addi_i32(tmp, tmp, offset);
1446         else
1447             tcg_gen_addi_i32(tmp, tmp, -offset);
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 21)) {
1450             store_reg(s, rd, tmp);
1451         }
1452     } else if (insn & (1 << 21)) {
1453         /* Post indexed */
1454         tcg_gen_mov_i32(dest, tmp);
1455         if (insn & (1 << 23))
1456             tcg_gen_addi_i32(tmp, tmp, offset);
1457         else
1458             tcg_gen_addi_i32(tmp, tmp, -offset);
1459         store_reg(s, rd, tmp);
1460     } else if (!(insn & (1 << 23)))
1461         return 1;
1462     return 0;
1463 }
1464 
1465 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466 {
1467     int rd = (insn >> 0) & 0xf;
1468     TCGv_i32 tmp;
1469 
1470     if (insn & (1 << 8)) {
1471         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472             return 1;
1473         } else {
1474             tmp = iwmmxt_load_creg(rd);
1475         }
1476     } else {
1477         tmp = tcg_temp_new_i32();
1478         iwmmxt_load_reg(cpu_V0, rd);
1479         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480     }
1481     tcg_gen_andi_i32(tmp, tmp, mask);
1482     tcg_gen_mov_i32(dest, tmp);
1483     return 0;
1484 }
1485 
1486 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487    (ie. an undefined instruction).  */
1488 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489 {
1490     int rd, wrd;
1491     int rdhi, rdlo, rd0, rd1, i;
1492     TCGv_i32 addr;
1493     TCGv_i32 tmp, tmp2, tmp3;
1494 
1495     if ((insn & 0x0e000e00) == 0x0c000000) {
1496         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497             wrd = insn & 0xf;
1498             rdlo = (insn >> 12) & 0xf;
1499             rdhi = (insn >> 16) & 0xf;
1500             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                 iwmmxt_load_reg(cpu_V0, wrd);
1502                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504             } else {                                    /* TMCRR */
1505                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                 iwmmxt_store_reg(cpu_V0, wrd);
1507                 gen_op_iwmmxt_set_mup();
1508             }
1509             return 0;
1510         }
1511 
1512         wrd = (insn >> 12) & 0xf;
1513         addr = tcg_temp_new_i32();
1514         if (gen_iwmmxt_address(s, insn, addr)) {
1515             return 1;
1516         }
1517         if (insn & ARM_CP_RW_BIT) {
1518             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                 tmp = tcg_temp_new_i32();
1520                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                 iwmmxt_store_creg(wrd, tmp);
1522             } else {
1523                 i = 1;
1524                 if (insn & (1 << 8)) {
1525                     if (insn & (1 << 22)) {             /* WLDRD */
1526                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                         i = 0;
1528                     } else {                            /* WLDRW wRd */
1529                         tmp = tcg_temp_new_i32();
1530                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 } else {
1533                     tmp = tcg_temp_new_i32();
1534                     if (insn & (1 << 22)) {             /* WLDRH */
1535                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                     } else {                            /* WLDRB */
1537                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                     }
1539                 }
1540                 if (i) {
1541                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                 }
1543                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1544             }
1545         } else {
1546             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                 tmp = iwmmxt_load_creg(wrd);
1548                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549             } else {
1550                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                 tmp = tcg_temp_new_i32();
1552                 if (insn & (1 << 8)) {
1553                     if (insn & (1 << 22)) {             /* WSTRD */
1554                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                     } else {                            /* WSTRW wRd */
1556                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                     }
1559                 } else {
1560                     if (insn & (1 << 22)) {             /* WSTRH */
1561                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                     } else {                            /* WSTRB */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 }
1568             }
1569         }
1570         return 0;
1571     }
1572 
1573     if ((insn & 0x0f000000) != 0x0e000000)
1574         return 1;
1575 
1576     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577     case 0x000:                                                 /* WOR */
1578         wrd = (insn >> 12) & 0xf;
1579         rd0 = (insn >> 0) & 0xf;
1580         rd1 = (insn >> 16) & 0xf;
1581         gen_op_iwmmxt_movq_M0_wRn(rd0);
1582         gen_op_iwmmxt_orq_M0_wRn(rd1);
1583         gen_op_iwmmxt_setpsr_nz();
1584         gen_op_iwmmxt_movq_wRn_M0(wrd);
1585         gen_op_iwmmxt_set_mup();
1586         gen_op_iwmmxt_set_cup();
1587         break;
1588     case 0x011:                                                 /* TMCR */
1589         if (insn & 0xf)
1590             return 1;
1591         rd = (insn >> 12) & 0xf;
1592         wrd = (insn >> 16) & 0xf;
1593         switch (wrd) {
1594         case ARM_IWMMXT_wCID:
1595         case ARM_IWMMXT_wCASF:
1596             break;
1597         case ARM_IWMMXT_wCon:
1598             gen_op_iwmmxt_set_cup();
1599             /* Fall through.  */
1600         case ARM_IWMMXT_wCSSF:
1601             tmp = iwmmxt_load_creg(wrd);
1602             tmp2 = load_reg(s, rd);
1603             tcg_gen_andc_i32(tmp, tmp, tmp2);
1604             iwmmxt_store_creg(wrd, tmp);
1605             break;
1606         case ARM_IWMMXT_wCGR0:
1607         case ARM_IWMMXT_wCGR1:
1608         case ARM_IWMMXT_wCGR2:
1609         case ARM_IWMMXT_wCGR3:
1610             gen_op_iwmmxt_set_cup();
1611             tmp = load_reg(s, rd);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         default:
1615             return 1;
1616         }
1617         break;
1618     case 0x100:                                                 /* WXOR */
1619         wrd = (insn >> 12) & 0xf;
1620         rd0 = (insn >> 0) & 0xf;
1621         rd1 = (insn >> 16) & 0xf;
1622         gen_op_iwmmxt_movq_M0_wRn(rd0);
1623         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624         gen_op_iwmmxt_setpsr_nz();
1625         gen_op_iwmmxt_movq_wRn_M0(wrd);
1626         gen_op_iwmmxt_set_mup();
1627         gen_op_iwmmxt_set_cup();
1628         break;
1629     case 0x111:                                                 /* TMRC */
1630         if (insn & 0xf)
1631             return 1;
1632         rd = (insn >> 12) & 0xf;
1633         wrd = (insn >> 16) & 0xf;
1634         tmp = iwmmxt_load_creg(wrd);
1635         store_reg(s, rd, tmp);
1636         break;
1637     case 0x300:                                                 /* WANDN */
1638         wrd = (insn >> 12) & 0xf;
1639         rd0 = (insn >> 0) & 0xf;
1640         rd1 = (insn >> 16) & 0xf;
1641         gen_op_iwmmxt_movq_M0_wRn(rd0);
1642         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643         gen_op_iwmmxt_andq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x200:                                                 /* WAND */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 0) & 0xf;
1652         rd1 = (insn >> 16) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         gen_op_iwmmxt_andq_M0_wRn(rd1);
1655         gen_op_iwmmxt_setpsr_nz();
1656         gen_op_iwmmxt_movq_wRn_M0(wrd);
1657         gen_op_iwmmxt_set_mup();
1658         gen_op_iwmmxt_set_cup();
1659         break;
1660     case 0x810: case 0xa10:                             /* WMADD */
1661         wrd = (insn >> 12) & 0xf;
1662         rd0 = (insn >> 0) & 0xf;
1663         rd1 = (insn >> 16) & 0xf;
1664         gen_op_iwmmxt_movq_M0_wRn(rd0);
1665         if (insn & (1 << 21))
1666             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667         else
1668             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669         gen_op_iwmmxt_movq_wRn_M0(wrd);
1670         gen_op_iwmmxt_set_mup();
1671         break;
1672     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673         wrd = (insn >> 12) & 0xf;
1674         rd0 = (insn >> 16) & 0xf;
1675         rd1 = (insn >> 0) & 0xf;
1676         gen_op_iwmmxt_movq_M0_wRn(rd0);
1677         switch ((insn >> 22) & 3) {
1678         case 0:
1679             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680             break;
1681         case 1:
1682             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683             break;
1684         case 2:
1685             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686             break;
1687         case 3:
1688             return 1;
1689         }
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 16) & 0xf;
1697         rd1 = (insn >> 0) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         switch ((insn >> 22) & 3) {
1700         case 0:
1701             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702             break;
1703         case 1:
1704             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705             break;
1706         case 2:
1707             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708             break;
1709         case 3:
1710             return 1;
1711         }
1712         gen_op_iwmmxt_movq_wRn_M0(wrd);
1713         gen_op_iwmmxt_set_mup();
1714         gen_op_iwmmxt_set_cup();
1715         break;
1716     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717         wrd = (insn >> 12) & 0xf;
1718         rd0 = (insn >> 16) & 0xf;
1719         rd1 = (insn >> 0) & 0xf;
1720         gen_op_iwmmxt_movq_M0_wRn(rd0);
1721         if (insn & (1 << 22))
1722             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723         else
1724             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725         if (!(insn & (1 << 20)))
1726             gen_op_iwmmxt_addl_M0_wRn(wrd);
1727         gen_op_iwmmxt_movq_wRn_M0(wrd);
1728         gen_op_iwmmxt_set_mup();
1729         break;
1730     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731         wrd = (insn >> 12) & 0xf;
1732         rd0 = (insn >> 16) & 0xf;
1733         rd1 = (insn >> 0) & 0xf;
1734         gen_op_iwmmxt_movq_M0_wRn(rd0);
1735         if (insn & (1 << 21)) {
1736             if (insn & (1 << 20))
1737                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738             else
1739                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740         } else {
1741             if (insn & (1 << 20))
1742                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743             else
1744                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745         }
1746         gen_op_iwmmxt_movq_wRn_M0(wrd);
1747         gen_op_iwmmxt_set_mup();
1748         break;
1749     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750         wrd = (insn >> 12) & 0xf;
1751         rd0 = (insn >> 16) & 0xf;
1752         rd1 = (insn >> 0) & 0xf;
1753         gen_op_iwmmxt_movq_M0_wRn(rd0);
1754         if (insn & (1 << 21))
1755             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756         else
1757             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758         if (!(insn & (1 << 20))) {
1759             iwmmxt_load_reg(cpu_V1, wrd);
1760             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761         }
1762         gen_op_iwmmxt_movq_wRn_M0(wrd);
1763         gen_op_iwmmxt_set_mup();
1764         break;
1765     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766         wrd = (insn >> 12) & 0xf;
1767         rd0 = (insn >> 16) & 0xf;
1768         rd1 = (insn >> 0) & 0xf;
1769         gen_op_iwmmxt_movq_M0_wRn(rd0);
1770         switch ((insn >> 22) & 3) {
1771         case 0:
1772             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773             break;
1774         case 1:
1775             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776             break;
1777         case 2:
1778             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779             break;
1780         case 3:
1781             return 1;
1782         }
1783         gen_op_iwmmxt_movq_wRn_M0(wrd);
1784         gen_op_iwmmxt_set_mup();
1785         gen_op_iwmmxt_set_cup();
1786         break;
1787     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 22)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813         tcg_gen_andi_i32(tmp, tmp, 7);
1814         iwmmxt_load_reg(cpu_V1, rd1);
1815         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816         gen_op_iwmmxt_movq_wRn_M0(wrd);
1817         gen_op_iwmmxt_set_mup();
1818         break;
1819     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820         if (((insn >> 6) & 3) == 3)
1821             return 1;
1822         rd = (insn >> 12) & 0xf;
1823         wrd = (insn >> 16) & 0xf;
1824         tmp = load_reg(s, rd);
1825         gen_op_iwmmxt_movq_M0_wRn(wrd);
1826         switch ((insn >> 6) & 3) {
1827         case 0:
1828             tmp2 = tcg_constant_i32(0xff);
1829             tmp3 = tcg_constant_i32((insn & 7) << 3);
1830             break;
1831         case 1:
1832             tmp2 = tcg_constant_i32(0xffff);
1833             tmp3 = tcg_constant_i32((insn & 3) << 4);
1834             break;
1835         case 2:
1836             tmp2 = tcg_constant_i32(0xffffffff);
1837             tmp3 = tcg_constant_i32((insn & 1) << 5);
1838             break;
1839         default:
1840             g_assert_not_reached();
1841         }
1842         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         break;
1846     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847         rd = (insn >> 12) & 0xf;
1848         wrd = (insn >> 16) & 0xf;
1849         if (rd == 15 || ((insn >> 22) & 3) == 3)
1850             return 1;
1851         gen_op_iwmmxt_movq_M0_wRn(wrd);
1852         tmp = tcg_temp_new_i32();
1853         switch ((insn >> 22) & 3) {
1854         case 0:
1855             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857             if (insn & 8) {
1858                 tcg_gen_ext8s_i32(tmp, tmp);
1859             } else {
1860                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1861             }
1862             break;
1863         case 1:
1864             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866             if (insn & 8) {
1867                 tcg_gen_ext16s_i32(tmp, tmp);
1868             } else {
1869                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870             }
1871             break;
1872         case 2:
1873             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875             break;
1876         }
1877         store_reg(s, rd, tmp);
1878         break;
1879     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881             return 1;
1882         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883         switch ((insn >> 22) & 3) {
1884         case 0:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886             break;
1887         case 1:
1888             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889             break;
1890         case 2:
1891             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892             break;
1893         }
1894         tcg_gen_shli_i32(tmp, tmp, 28);
1895         gen_set_nzcv(tmp);
1896         break;
1897     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898         if (((insn >> 6) & 3) == 3)
1899             return 1;
1900         rd = (insn >> 12) & 0xf;
1901         wrd = (insn >> 16) & 0xf;
1902         tmp = load_reg(s, rd);
1903         switch ((insn >> 6) & 3) {
1904         case 0:
1905             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906             break;
1907         case 1:
1908             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909             break;
1910         case 2:
1911             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912             break;
1913         }
1914         gen_op_iwmmxt_movq_wRn_M0(wrd);
1915         gen_op_iwmmxt_set_mup();
1916         break;
1917     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919             return 1;
1920         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921         tmp2 = tcg_temp_new_i32();
1922         tcg_gen_mov_i32(tmp2, tmp);
1923         switch ((insn >> 22) & 3) {
1924         case 0:
1925             for (i = 0; i < 7; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 1:
1931             for (i = 0; i < 3; i ++) {
1932                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                 tcg_gen_and_i32(tmp, tmp, tmp2);
1934             }
1935             break;
1936         case 2:
1937             tcg_gen_shli_i32(tmp2, tmp2, 16);
1938             tcg_gen_and_i32(tmp, tmp, tmp2);
1939             break;
1940         }
1941         gen_set_nzcv(tmp);
1942         break;
1943     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944         wrd = (insn >> 12) & 0xf;
1945         rd0 = (insn >> 16) & 0xf;
1946         gen_op_iwmmxt_movq_M0_wRn(rd0);
1947         switch ((insn >> 22) & 3) {
1948         case 0:
1949             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950             break;
1951         case 1:
1952             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953             break;
1954         case 2:
1955             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956             break;
1957         case 3:
1958             return 1;
1959         }
1960         gen_op_iwmmxt_movq_wRn_M0(wrd);
1961         gen_op_iwmmxt_set_mup();
1962         break;
1963     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965             return 1;
1966         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967         tmp2 = tcg_temp_new_i32();
1968         tcg_gen_mov_i32(tmp2, tmp);
1969         switch ((insn >> 22) & 3) {
1970         case 0:
1971             for (i = 0; i < 7; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 1:
1977             for (i = 0; i < 3; i ++) {
1978                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                 tcg_gen_or_i32(tmp, tmp, tmp2);
1980             }
1981             break;
1982         case 2:
1983             tcg_gen_shli_i32(tmp2, tmp2, 16);
1984             tcg_gen_or_i32(tmp, tmp, tmp2);
1985             break;
1986         }
1987         gen_set_nzcv(tmp);
1988         break;
1989     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990         rd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993             return 1;
1994         gen_op_iwmmxt_movq_M0_wRn(rd0);
1995         tmp = tcg_temp_new_i32();
1996         switch ((insn >> 22) & 3) {
1997         case 0:
1998             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999             break;
2000         case 1:
2001             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002             break;
2003         case 2:
2004             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005             break;
2006         }
2007         store_reg(s, rd, tmp);
2008         break;
2009     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011         wrd = (insn >> 12) & 0xf;
2012         rd0 = (insn >> 16) & 0xf;
2013         rd1 = (insn >> 0) & 0xf;
2014         gen_op_iwmmxt_movq_M0_wRn(rd0);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021             break;
2022         case 1:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027             break;
2028         case 2:
2029             if (insn & (1 << 21))
2030                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031             else
2032                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033             break;
2034         case 3:
2035             return 1;
2036         }
2037         gen_op_iwmmxt_movq_wRn_M0(wrd);
2038         gen_op_iwmmxt_set_mup();
2039         gen_op_iwmmxt_set_cup();
2040         break;
2041     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043         wrd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsb_M0();
2050             else
2051                 gen_op_iwmmxt_unpacklub_M0();
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsw_M0();
2056             else
2057                 gen_op_iwmmxt_unpackluw_M0();
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_unpacklsl_M0();
2062             else
2063                 gen_op_iwmmxt_unpacklul_M0();
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhuw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpackhsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpackhul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104     case 0x214: case 0x614: case 0xa14: case 0xe14:
2105         if (((insn >> 22) & 3) == 0)
2106             return 1;
2107         wrd = (insn >> 12) & 0xf;
2108         rd0 = (insn >> 16) & 0xf;
2109         gen_op_iwmmxt_movq_M0_wRn(rd0);
2110         tmp = tcg_temp_new_i32();
2111         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112             return 1;
2113         }
2114         switch ((insn >> 22) & 3) {
2115         case 1:
2116             gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
2117             break;
2118         case 2:
2119             gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
2120             break;
2121         case 3:
2122             gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
2123             break;
2124         }
2125         gen_op_iwmmxt_movq_wRn_M0(wrd);
2126         gen_op_iwmmxt_set_mup();
2127         gen_op_iwmmxt_set_cup();
2128         break;
2129     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130     case 0x014: case 0x414: case 0x814: case 0xc14:
2131         if (((insn >> 22) & 3) == 0)
2132             return 1;
2133         wrd = (insn >> 12) & 0xf;
2134         rd0 = (insn >> 16) & 0xf;
2135         gen_op_iwmmxt_movq_M0_wRn(rd0);
2136         tmp = tcg_temp_new_i32();
2137         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138             return 1;
2139         }
2140         switch ((insn >> 22) & 3) {
2141         case 1:
2142             gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
2143             break;
2144         case 2:
2145             gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
2146             break;
2147         case 3:
2148             gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
2149             break;
2150         }
2151         gen_op_iwmmxt_movq_wRn_M0(wrd);
2152         gen_op_iwmmxt_set_mup();
2153         gen_op_iwmmxt_set_cup();
2154         break;
2155     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156     case 0x114: case 0x514: case 0x914: case 0xd14:
2157         if (((insn >> 22) & 3) == 0)
2158             return 1;
2159         wrd = (insn >> 12) & 0xf;
2160         rd0 = (insn >> 16) & 0xf;
2161         gen_op_iwmmxt_movq_M0_wRn(rd0);
2162         tmp = tcg_temp_new_i32();
2163         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164             return 1;
2165         }
2166         switch ((insn >> 22) & 3) {
2167         case 1:
2168             gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
2169             break;
2170         case 2:
2171             gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
2172             break;
2173         case 3:
2174             gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
2175             break;
2176         }
2177         gen_op_iwmmxt_movq_wRn_M0(wrd);
2178         gen_op_iwmmxt_set_mup();
2179         gen_op_iwmmxt_set_cup();
2180         break;
2181     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182     case 0x314: case 0x714: case 0xb14: case 0xf14:
2183         if (((insn >> 22) & 3) == 0)
2184             return 1;
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         gen_op_iwmmxt_movq_M0_wRn(rd0);
2188         tmp = tcg_temp_new_i32();
2189         switch ((insn >> 22) & 3) {
2190         case 1:
2191             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
2195             break;
2196         case 2:
2197             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
2201             break;
2202         case 3:
2203             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                 return 1;
2205             }
2206             gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
2207             break;
2208         }
2209         gen_op_iwmmxt_movq_wRn_M0(wrd);
2210         gen_op_iwmmxt_set_mup();
2211         gen_op_iwmmxt_set_cup();
2212         break;
2213     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215         wrd = (insn >> 12) & 0xf;
2216         rd0 = (insn >> 16) & 0xf;
2217         rd1 = (insn >> 0) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         switch ((insn >> 22) & 3) {
2220         case 0:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2225             break;
2226         case 1:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231             break;
2232         case 2:
2233             if (insn & (1 << 21))
2234                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235             else
2236                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2237             break;
2238         case 3:
2239             return 1;
2240         }
2241         gen_op_iwmmxt_movq_wRn_M0(wrd);
2242         gen_op_iwmmxt_set_mup();
2243         break;
2244     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246         wrd = (insn >> 12) & 0xf;
2247         rd0 = (insn >> 16) & 0xf;
2248         rd1 = (insn >> 0) & 0xf;
2249         gen_op_iwmmxt_movq_M0_wRn(rd0);
2250         switch ((insn >> 22) & 3) {
2251         case 0:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256             break;
2257         case 1:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262             break;
2263         case 2:
2264             if (insn & (1 << 21))
2265                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266             else
2267                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268             break;
2269         case 3:
2270             return 1;
2271         }
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         break;
2275     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276     case 0x402: case 0x502: case 0x602: case 0x702:
2277         wrd = (insn >> 12) & 0xf;
2278         rd0 = (insn >> 16) & 0xf;
2279         rd1 = (insn >> 0) & 0xf;
2280         gen_op_iwmmxt_movq_M0_wRn(rd0);
2281         iwmmxt_load_reg(cpu_V1, rd1);
2282         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                 tcg_constant_i32((insn >> 20) & 3));
2284         gen_op_iwmmxt_movq_wRn_M0(wrd);
2285         gen_op_iwmmxt_set_mup();
2286         break;
2287     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291         wrd = (insn >> 12) & 0xf;
2292         rd0 = (insn >> 16) & 0xf;
2293         rd1 = (insn >> 0) & 0xf;
2294         gen_op_iwmmxt_movq_M0_wRn(rd0);
2295         switch ((insn >> 20) & 0xf) {
2296         case 0x0:
2297             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298             break;
2299         case 0x1:
2300             gen_op_iwmmxt_subub_M0_wRn(rd1);
2301             break;
2302         case 0x3:
2303             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304             break;
2305         case 0x4:
2306             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307             break;
2308         case 0x5:
2309             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310             break;
2311         case 0x7:
2312             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313             break;
2314         case 0x8:
2315             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316             break;
2317         case 0x9:
2318             gen_op_iwmmxt_subul_M0_wRn(rd1);
2319             break;
2320         case 0xb:
2321             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322             break;
2323         default:
2324             return 1;
2325         }
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         gen_op_iwmmxt_set_cup();
2329         break;
2330     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338         gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
2339         gen_op_iwmmxt_movq_wRn_M0(wrd);
2340         gen_op_iwmmxt_set_mup();
2341         gen_op_iwmmxt_set_cup();
2342         break;
2343     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344     case 0x418: case 0x518: case 0x618: case 0x718:
2345     case 0x818: case 0x918: case 0xa18: case 0xb18:
2346     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347         wrd = (insn >> 12) & 0xf;
2348         rd0 = (insn >> 16) & 0xf;
2349         rd1 = (insn >> 0) & 0xf;
2350         gen_op_iwmmxt_movq_M0_wRn(rd0);
2351         switch ((insn >> 20) & 0xf) {
2352         case 0x0:
2353             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354             break;
2355         case 0x1:
2356             gen_op_iwmmxt_addub_M0_wRn(rd1);
2357             break;
2358         case 0x3:
2359             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360             break;
2361         case 0x4:
2362             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363             break;
2364         case 0x5:
2365             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366             break;
2367         case 0x7:
2368             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369             break;
2370         case 0x8:
2371             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372             break;
2373         case 0x9:
2374             gen_op_iwmmxt_addul_M0_wRn(rd1);
2375             break;
2376         case 0xb:
2377             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378             break;
2379         default:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387     case 0x408: case 0x508: case 0x608: case 0x708:
2388     case 0x808: case 0x908: case 0xa08: case 0xb08:
2389     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391             return 1;
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         rd1 = (insn >> 0) & 0xf;
2395         gen_op_iwmmxt_movq_M0_wRn(rd0);
2396         switch ((insn >> 22) & 3) {
2397         case 1:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402             break;
2403         case 2:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2408             break;
2409         case 3:
2410             if (insn & (1 << 21))
2411                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412             else
2413                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414             break;
2415         }
2416         gen_op_iwmmxt_movq_wRn_M0(wrd);
2417         gen_op_iwmmxt_set_mup();
2418         gen_op_iwmmxt_set_cup();
2419         break;
2420     case 0x201: case 0x203: case 0x205: case 0x207:
2421     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422     case 0x211: case 0x213: case 0x215: case 0x217:
2423     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424         wrd = (insn >> 5) & 0xf;
2425         rd0 = (insn >> 12) & 0xf;
2426         rd1 = (insn >> 0) & 0xf;
2427         if (rd0 == 0xf || rd1 == 0xf)
2428             return 1;
2429         gen_op_iwmmxt_movq_M0_wRn(wrd);
2430         tmp = load_reg(s, rd0);
2431         tmp2 = load_reg(s, rd1);
2432         switch ((insn >> 16) & 0xf) {
2433         case 0x0:                                       /* TMIA */
2434             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435             break;
2436         case 0x8:                                       /* TMIAPH */
2437             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440             if (insn & (1 << 16))
2441                 tcg_gen_shri_i32(tmp, tmp, 16);
2442             if (insn & (1 << 17))
2443                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2444             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445             break;
2446         default:
2447             return 1;
2448         }
2449         gen_op_iwmmxt_movq_wRn_M0(wrd);
2450         gen_op_iwmmxt_set_mup();
2451         break;
2452     default:
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460    (ie. an undefined instruction).  */
2461 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462 {
2463     int acc, rd0, rd1, rdhi, rdlo;
2464     TCGv_i32 tmp, tmp2;
2465 
2466     if ((insn & 0x0ff00f10) == 0x0e200010) {
2467         /* Multiply with Internal Accumulate Format */
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = insn & 0xf;
2470         acc = (insn >> 5) & 7;
2471 
2472         if (acc != 0)
2473             return 1;
2474 
2475         tmp = load_reg(s, rd0);
2476         tmp2 = load_reg(s, rd1);
2477         switch ((insn >> 16) & 0xf) {
2478         case 0x0:                                       /* MIA */
2479             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480             break;
2481         case 0x8:                                       /* MIAPH */
2482             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483             break;
2484         case 0xc:                                       /* MIABB */
2485         case 0xd:                                       /* MIABT */
2486         case 0xe:                                       /* MIATB */
2487         case 0xf:                                       /* MIATT */
2488             if (insn & (1 << 16))
2489                 tcg_gen_shri_i32(tmp, tmp, 16);
2490             if (insn & (1 << 17))
2491                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2492             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         default:
2495             return 1;
2496         }
2497 
2498         gen_op_iwmmxt_movq_wRn_M0(acc);
2499         return 0;
2500     }
2501 
2502     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503         /* Internal Accumulator Access Format */
2504         rdhi = (insn >> 16) & 0xf;
2505         rdlo = (insn >> 12) & 0xf;
2506         acc = insn & 7;
2507 
2508         if (acc != 0)
2509             return 1;
2510 
2511         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512             iwmmxt_load_reg(cpu_V0, acc);
2513             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516         } else {                                        /* MAR */
2517             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518             iwmmxt_store_reg(cpu_V0, acc);
2519         }
2520         return 0;
2521     }
2522 
2523     return 1;
2524 }
2525 
2526 static void gen_goto_ptr(void)
2527 {
2528     tcg_gen_lookup_and_goto_ptr();
2529 }
2530 
2531 /* This will end the TB but doesn't guarantee we'll return to
2532  * cpu_loop_exec. Any live exit_requests will be processed as we
2533  * enter the next TB.
2534  */
2535 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536 {
2537     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538         /*
2539          * For pcrel, the pc must always be up-to-date on entry to
2540          * the linked TB, so that it can use simple additions for all
2541          * further adjustments.  For !pcrel, the linked TB is compiled
2542          * to know its full virtual address, so we can delay the
2543          * update to pc to the unlinked path.  A long chain of links
2544          * can thus avoid many updates to the PC.
2545          */
2546         if (tb_cflags(s->base.tb) & CF_PCREL) {
2547             gen_update_pc(s, diff);
2548             tcg_gen_goto_tb(n);
2549         } else {
2550             tcg_gen_goto_tb(n);
2551             gen_update_pc(s, diff);
2552         }
2553         tcg_gen_exit_tb(s->base.tb, n);
2554     } else {
2555         gen_update_pc(s, diff);
2556         gen_goto_ptr();
2557     }
2558     s->base.is_jmp = DISAS_NORETURN;
2559 }
2560 
2561 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2562 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563 {
2564     if (unlikely(s->ss_active)) {
2565         /* An indirect jump so that we still trigger the debug exception.  */
2566         gen_update_pc(s, diff);
2567         s->base.is_jmp = DISAS_JUMP;
2568         return;
2569     }
2570     switch (s->base.is_jmp) {
2571     case DISAS_NEXT:
2572     case DISAS_TOO_MANY:
2573     case DISAS_NORETURN:
2574         /*
2575          * The normal case: just go to the destination TB.
2576          * NB: NORETURN happens if we generate code like
2577          *    gen_brcondi(l);
2578          *    gen_jmp();
2579          *    gen_set_label(l);
2580          *    gen_jmp();
2581          * on the second call to gen_jmp().
2582          */
2583         gen_goto_tb(s, tbno, diff);
2584         break;
2585     case DISAS_UPDATE_NOCHAIN:
2586     case DISAS_UPDATE_EXIT:
2587         /*
2588          * We already decided we're leaving the TB for some other reason.
2589          * Avoid using goto_tb so we really do exit back to the main loop
2590          * and don't chain to another TB.
2591          */
2592         gen_update_pc(s, diff);
2593         gen_goto_ptr();
2594         s->base.is_jmp = DISAS_NORETURN;
2595         break;
2596     default:
2597         /*
2598          * We shouldn't be emitting code for a jump and also have
2599          * is_jmp set to one of the special cases like DISAS_SWI.
2600          */
2601         g_assert_not_reached();
2602     }
2603 }
2604 
2605 static inline void gen_jmp(DisasContext *s, target_long diff)
2606 {
2607     gen_jmp_tb(s, diff, 0);
2608 }
2609 
2610 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611 {
2612     if (x)
2613         tcg_gen_sari_i32(t0, t0, 16);
2614     else
2615         gen_sxth(t0);
2616     if (y)
2617         tcg_gen_sari_i32(t1, t1, 16);
2618     else
2619         gen_sxth(t1);
2620     tcg_gen_mul_i32(t0, t0, t1);
2621 }
2622 
2623 /* Return the mask of PSR bits set by a MSR instruction.  */
2624 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625 {
2626     uint32_t mask = 0;
2627 
2628     if (flags & (1 << 0)) {
2629         mask |= 0xff;
2630     }
2631     if (flags & (1 << 1)) {
2632         mask |= 0xff00;
2633     }
2634     if (flags & (1 << 2)) {
2635         mask |= 0xff0000;
2636     }
2637     if (flags & (1 << 3)) {
2638         mask |= 0xff000000;
2639     }
2640 
2641     /* Mask out undefined and reserved bits.  */
2642     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643 
2644     /* Mask out execution state.  */
2645     if (!spsr) {
2646         mask &= ~CPSR_EXEC;
2647     }
2648 
2649     /* Mask out privileged bits.  */
2650     if (IS_USER(s)) {
2651         mask &= CPSR_USER;
2652     }
2653     return mask;
2654 }
2655 
2656 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658 {
2659     TCGv_i32 tmp;
2660     if (spsr) {
2661         /* ??? This is also undefined in system mode.  */
2662         if (IS_USER(s))
2663             return 1;
2664 
2665         tmp = load_cpu_field(spsr);
2666         tcg_gen_andi_i32(tmp, tmp, ~mask);
2667         tcg_gen_andi_i32(t0, t0, mask);
2668         tcg_gen_or_i32(tmp, tmp, t0);
2669         store_cpu_field(tmp, spsr);
2670     } else {
2671         gen_set_cpsr(t0, mask);
2672     }
2673     gen_lookup_tb(s);
2674     return 0;
2675 }
2676 
2677 /* Returns nonzero if access to the PSR is not permitted.  */
2678 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679 {
2680     TCGv_i32 tmp;
2681     tmp = tcg_temp_new_i32();
2682     tcg_gen_movi_i32(tmp, val);
2683     return gen_set_psr(s, mask, spsr, tmp);
2684 }
2685 
2686 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                      int *tgtmode, int *regno)
2688 {
2689     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690      * the target mode and register number, and identify the various
2691      * unpredictable cases.
2692      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693      *  + executed in user mode
2694      *  + using R15 as the src/dest register
2695      *  + accessing an unimplemented register
2696      *  + accessing a register that's inaccessible at current PL/security state*
2697      *  + accessing a register that you could access with a different insn
2698      * We choose to UNDEF in all these cases.
2699      * Since we don't know which of the various AArch32 modes we are in
2700      * we have to defer some checks to runtime.
2701      * Accesses to Monitor mode registers from Secure EL1 (which implies
2702      * that EL3 is AArch64) must trap to EL3.
2703      *
2704      * If the access checks fail this function will emit code to take
2705      * an exception and return false. Otherwise it will return true,
2706      * and set *tgtmode and *regno appropriately.
2707      */
2708     /* These instructions are present only in ARMv8, or in ARMv7 with the
2709      * Virtualization Extensions.
2710      */
2711     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713         goto undef;
2714     }
2715 
2716     if (IS_USER(s) || rn == 15) {
2717         goto undef;
2718     }
2719 
2720     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721      * of registers into (r, sysm).
2722      */
2723     if (r) {
2724         /* SPSRs for other modes */
2725         switch (sysm) {
2726         case 0xe: /* SPSR_fiq */
2727             *tgtmode = ARM_CPU_MODE_FIQ;
2728             break;
2729         case 0x10: /* SPSR_irq */
2730             *tgtmode = ARM_CPU_MODE_IRQ;
2731             break;
2732         case 0x12: /* SPSR_svc */
2733             *tgtmode = ARM_CPU_MODE_SVC;
2734             break;
2735         case 0x14: /* SPSR_abt */
2736             *tgtmode = ARM_CPU_MODE_ABT;
2737             break;
2738         case 0x16: /* SPSR_und */
2739             *tgtmode = ARM_CPU_MODE_UND;
2740             break;
2741         case 0x1c: /* SPSR_mon */
2742             *tgtmode = ARM_CPU_MODE_MON;
2743             break;
2744         case 0x1e: /* SPSR_hyp */
2745             *tgtmode = ARM_CPU_MODE_HYP;
2746             break;
2747         default: /* unallocated */
2748             goto undef;
2749         }
2750         /* We arbitrarily assign SPSR a register number of 16. */
2751         *regno = 16;
2752     } else {
2753         /* general purpose registers for other modes */
2754         switch (sysm) {
2755         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756             *tgtmode = ARM_CPU_MODE_USR;
2757             *regno = sysm + 8;
2758             break;
2759         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760             *tgtmode = ARM_CPU_MODE_FIQ;
2761             *regno = sysm;
2762             break;
2763         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764             *tgtmode = ARM_CPU_MODE_IRQ;
2765             *regno = sysm & 1 ? 13 : 14;
2766             break;
2767         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768             *tgtmode = ARM_CPU_MODE_SVC;
2769             *regno = sysm & 1 ? 13 : 14;
2770             break;
2771         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772             *tgtmode = ARM_CPU_MODE_ABT;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776             *tgtmode = ARM_CPU_MODE_UND;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780             *tgtmode = ARM_CPU_MODE_MON;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784             *tgtmode = ARM_CPU_MODE_HYP;
2785             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786             *regno = sysm & 1 ? 13 : 17;
2787             break;
2788         default: /* unallocated */
2789             goto undef;
2790         }
2791     }
2792 
2793     /* Catch the 'accessing inaccessible register' cases we can detect
2794      * at translate time.
2795      */
2796     switch (*tgtmode) {
2797     case ARM_CPU_MODE_MON:
2798         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799             goto undef;
2800         }
2801         if (s->current_el == 1) {
2802             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803              * then accesses to Mon registers trap to Secure EL2, if it exists,
2804              * otherwise EL3.
2805              */
2806             TCGv_i32 tcg_el;
2807 
2808             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                 dc_isar_feature(aa64_sel2, s)) {
2810                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814             } else {
2815                 tcg_el = tcg_constant_i32(3);
2816             }
2817 
2818             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                     syn_uncategorized(), tcg_el);
2820             return false;
2821         }
2822         break;
2823     case ARM_CPU_MODE_HYP:
2824         /*
2825          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2826          * (and so we can forbid accesses from EL2 or below). elr_hyp
2827          * can be accessed also from Hyp mode, so forbid accesses from
2828          * EL0 or EL1.
2829          */
2830         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2831             (s->current_el < 3 && *regno != 17)) {
2832             goto undef;
2833         }
2834         break;
2835     default:
2836         break;
2837     }
2838 
2839     return true;
2840 
2841 undef:
2842     /* If we get here then some access check did not pass */
2843     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844     return false;
2845 }
2846 
2847 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848 {
2849     TCGv_i32 tcg_reg;
2850     int tgtmode = 0, regno = 0;
2851 
2852     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853         return;
2854     }
2855 
2856     /* Sync state because msr_banked() can raise exceptions */
2857     gen_set_condexec(s);
2858     gen_update_pc(s, 0);
2859     tcg_reg = load_reg(s, rn);
2860     gen_helper_msr_banked(tcg_env, tcg_reg,
2861                           tcg_constant_i32(tgtmode),
2862                           tcg_constant_i32(regno));
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864 }
2865 
2866 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867 {
2868     TCGv_i32 tcg_reg;
2869     int tgtmode = 0, regno = 0;
2870 
2871     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872         return;
2873     }
2874 
2875     /* Sync state because mrs_banked() can raise exceptions */
2876     gen_set_condexec(s);
2877     gen_update_pc(s, 0);
2878     tcg_reg = tcg_temp_new_i32();
2879     gen_helper_mrs_banked(tcg_reg, tcg_env,
2880                           tcg_constant_i32(tgtmode),
2881                           tcg_constant_i32(regno));
2882     store_reg(s, rn, tcg_reg);
2883     s->base.is_jmp = DISAS_UPDATE_EXIT;
2884 }
2885 
2886 /* Store value to PC as for an exception return (ie don't
2887  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888  * will do the masking based on the new value of the Thumb bit.
2889  */
2890 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891 {
2892     tcg_gen_mov_i32(cpu_R[15], pc);
2893 }
2894 
2895 /* Generate a v6 exception return.  Marks both values as dead.  */
2896 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897 {
2898     store_pc_exc_ret(s, pc);
2899     /* The cpsr_write_eret helper will mask the low bits of PC
2900      * appropriately depending on the new Thumb bit, so it must
2901      * be called after storing the new PC.
2902      */
2903     translator_io_start(&s->base);
2904     gen_helper_cpsr_write_eret(tcg_env, cpsr);
2905     /* Must exit loop to check un-masked IRQs */
2906     s->base.is_jmp = DISAS_EXIT;
2907 }
2908 
2909 /* Generate an old-style exception return. Marks pc as dead. */
2910 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911 {
2912     gen_rfe(s, pc, load_cpu_field(spsr));
2913 }
2914 
2915 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                             uint32_t opr_sz, uint32_t max_sz,
2917                             gen_helper_gvec_3_ptr *fn)
2918 {
2919     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920 
2921     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
2922     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                        opr_sz, max_sz, 0, fn);
2924 }
2925 
2926 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928 {
2929     static gen_helper_gvec_3_ptr * const fns[2] = {
2930         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931     };
2932     tcg_debug_assert(vece >= 1 && vece <= 2);
2933     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934 }
2935 
2936 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 #define GEN_CMP0(NAME, COND)                              \
2947     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
2948               uint32_t opr_sz, uint32_t max_sz)           \
2949     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
2950 
2951 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
2952 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
2953 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
2954 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
2955 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
2956 
2957 #undef GEN_CMP0
2958 
2959 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2960 {
2961     tcg_gen_vec_sar8i_i64(a, a, shift);
2962     tcg_gen_vec_add8_i64(d, d, a);
2963 }
2964 
2965 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2966 {
2967     tcg_gen_vec_sar16i_i64(a, a, shift);
2968     tcg_gen_vec_add16_i64(d, d, a);
2969 }
2970 
2971 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2972 {
2973     tcg_gen_sari_i32(a, a, shift);
2974     tcg_gen_add_i32(d, d, a);
2975 }
2976 
2977 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2978 {
2979     tcg_gen_sari_i64(a, a, shift);
2980     tcg_gen_add_i64(d, d, a);
2981 }
2982 
2983 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2984 {
2985     tcg_gen_sari_vec(vece, a, a, sh);
2986     tcg_gen_add_vec(vece, d, d, a);
2987 }
2988 
2989 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2990                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2991 {
2992     static const TCGOpcode vecop_list[] = {
2993         INDEX_op_sari_vec, INDEX_op_add_vec, 0
2994     };
2995     static const GVecGen2i ops[4] = {
2996         { .fni8 = gen_ssra8_i64,
2997           .fniv = gen_ssra_vec,
2998           .fno = gen_helper_gvec_ssra_b,
2999           .load_dest = true,
3000           .opt_opc = vecop_list,
3001           .vece = MO_8 },
3002         { .fni8 = gen_ssra16_i64,
3003           .fniv = gen_ssra_vec,
3004           .fno = gen_helper_gvec_ssra_h,
3005           .load_dest = true,
3006           .opt_opc = vecop_list,
3007           .vece = MO_16 },
3008         { .fni4 = gen_ssra32_i32,
3009           .fniv = gen_ssra_vec,
3010           .fno = gen_helper_gvec_ssra_s,
3011           .load_dest = true,
3012           .opt_opc = vecop_list,
3013           .vece = MO_32 },
3014         { .fni8 = gen_ssra64_i64,
3015           .fniv = gen_ssra_vec,
3016           .fno = gen_helper_gvec_ssra_d,
3017           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3018           .opt_opc = vecop_list,
3019           .load_dest = true,
3020           .vece = MO_64 },
3021     };
3022 
3023     /* tszimm encoding produces immediates in the range [1..esize]. */
3024     tcg_debug_assert(shift > 0);
3025     tcg_debug_assert(shift <= (8 << vece));
3026 
3027     /*
3028      * Shifts larger than the element size are architecturally valid.
3029      * Signed results in all sign bits.
3030      */
3031     shift = MIN(shift, (8 << vece) - 1);
3032     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3033 }
3034 
3035 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3036 {
3037     tcg_gen_vec_shr8i_i64(a, a, shift);
3038     tcg_gen_vec_add8_i64(d, d, a);
3039 }
3040 
3041 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3042 {
3043     tcg_gen_vec_shr16i_i64(a, a, shift);
3044     tcg_gen_vec_add16_i64(d, d, a);
3045 }
3046 
3047 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3048 {
3049     tcg_gen_shri_i32(a, a, shift);
3050     tcg_gen_add_i32(d, d, a);
3051 }
3052 
3053 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3054 {
3055     tcg_gen_shri_i64(a, a, shift);
3056     tcg_gen_add_i64(d, d, a);
3057 }
3058 
3059 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3060 {
3061     tcg_gen_shri_vec(vece, a, a, sh);
3062     tcg_gen_add_vec(vece, d, d, a);
3063 }
3064 
3065 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3066                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3067 {
3068     static const TCGOpcode vecop_list[] = {
3069         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3070     };
3071     static const GVecGen2i ops[4] = {
3072         { .fni8 = gen_usra8_i64,
3073           .fniv = gen_usra_vec,
3074           .fno = gen_helper_gvec_usra_b,
3075           .load_dest = true,
3076           .opt_opc = vecop_list,
3077           .vece = MO_8, },
3078         { .fni8 = gen_usra16_i64,
3079           .fniv = gen_usra_vec,
3080           .fno = gen_helper_gvec_usra_h,
3081           .load_dest = true,
3082           .opt_opc = vecop_list,
3083           .vece = MO_16, },
3084         { .fni4 = gen_usra32_i32,
3085           .fniv = gen_usra_vec,
3086           .fno = gen_helper_gvec_usra_s,
3087           .load_dest = true,
3088           .opt_opc = vecop_list,
3089           .vece = MO_32, },
3090         { .fni8 = gen_usra64_i64,
3091           .fniv = gen_usra_vec,
3092           .fno = gen_helper_gvec_usra_d,
3093           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3094           .load_dest = true,
3095           .opt_opc = vecop_list,
3096           .vece = MO_64, },
3097     };
3098 
3099     /* tszimm encoding produces immediates in the range [1..esize]. */
3100     tcg_debug_assert(shift > 0);
3101     tcg_debug_assert(shift <= (8 << vece));
3102 
3103     /*
3104      * Shifts larger than the element size are architecturally valid.
3105      * Unsigned results in all zeros as input to accumulate: nop.
3106      */
3107     if (shift < (8 << vece)) {
3108         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3109     } else {
3110         /* Nop, but we do need to clear the tail. */
3111         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3112     }
3113 }
3114 
3115 /*
3116  * Shift one less than the requested amount, and the low bit is
3117  * the rounding bit.  For the 8 and 16-bit operations, because we
3118  * mask the low bit, we can perform a normal integer shift instead
3119  * of a vector shift.
3120  */
3121 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3122 {
3123     TCGv_i64 t = tcg_temp_new_i64();
3124 
3125     tcg_gen_shri_i64(t, a, sh - 1);
3126     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3127     tcg_gen_vec_sar8i_i64(d, a, sh);
3128     tcg_gen_vec_add8_i64(d, d, t);
3129 }
3130 
3131 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3132 {
3133     TCGv_i64 t = tcg_temp_new_i64();
3134 
3135     tcg_gen_shri_i64(t, a, sh - 1);
3136     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3137     tcg_gen_vec_sar16i_i64(d, a, sh);
3138     tcg_gen_vec_add16_i64(d, d, t);
3139 }
3140 
3141 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3142 {
3143     TCGv_i32 t;
3144 
3145     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3146     if (sh == 32) {
3147         tcg_gen_movi_i32(d, 0);
3148         return;
3149     }
3150     t = tcg_temp_new_i32();
3151     tcg_gen_extract_i32(t, a, sh - 1, 1);
3152     tcg_gen_sari_i32(d, a, sh);
3153     tcg_gen_add_i32(d, d, t);
3154 }
3155 
3156 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3157 {
3158     TCGv_i64 t = tcg_temp_new_i64();
3159 
3160     tcg_gen_extract_i64(t, a, sh - 1, 1);
3161     tcg_gen_sari_i64(d, a, sh);
3162     tcg_gen_add_i64(d, d, t);
3163 }
3164 
3165 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3166 {
3167     TCGv_vec t = tcg_temp_new_vec_matching(d);
3168     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3169 
3170     tcg_gen_shri_vec(vece, t, a, sh - 1);
3171     tcg_gen_dupi_vec(vece, ones, 1);
3172     tcg_gen_and_vec(vece, t, t, ones);
3173     tcg_gen_sari_vec(vece, d, a, sh);
3174     tcg_gen_add_vec(vece, d, d, t);
3175 }
3176 
3177 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3178                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3179 {
3180     static const TCGOpcode vecop_list[] = {
3181         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3182     };
3183     static const GVecGen2i ops[4] = {
3184         { .fni8 = gen_srshr8_i64,
3185           .fniv = gen_srshr_vec,
3186           .fno = gen_helper_gvec_srshr_b,
3187           .opt_opc = vecop_list,
3188           .vece = MO_8 },
3189         { .fni8 = gen_srshr16_i64,
3190           .fniv = gen_srshr_vec,
3191           .fno = gen_helper_gvec_srshr_h,
3192           .opt_opc = vecop_list,
3193           .vece = MO_16 },
3194         { .fni4 = gen_srshr32_i32,
3195           .fniv = gen_srshr_vec,
3196           .fno = gen_helper_gvec_srshr_s,
3197           .opt_opc = vecop_list,
3198           .vece = MO_32 },
3199         { .fni8 = gen_srshr64_i64,
3200           .fniv = gen_srshr_vec,
3201           .fno = gen_helper_gvec_srshr_d,
3202           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3203           .opt_opc = vecop_list,
3204           .vece = MO_64 },
3205     };
3206 
3207     /* tszimm encoding produces immediates in the range [1..esize] */
3208     tcg_debug_assert(shift > 0);
3209     tcg_debug_assert(shift <= (8 << vece));
3210 
3211     if (shift == (8 << vece)) {
3212         /*
3213          * Shifts larger than the element size are architecturally valid.
3214          * Signed results in all sign bits.  With rounding, this produces
3215          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3216          * I.e. always zero.
3217          */
3218         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3219     } else {
3220         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3221     }
3222 }
3223 
3224 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3225 {
3226     TCGv_i64 t = tcg_temp_new_i64();
3227 
3228     gen_srshr8_i64(t, a, sh);
3229     tcg_gen_vec_add8_i64(d, d, t);
3230 }
3231 
3232 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3233 {
3234     TCGv_i64 t = tcg_temp_new_i64();
3235 
3236     gen_srshr16_i64(t, a, sh);
3237     tcg_gen_vec_add16_i64(d, d, t);
3238 }
3239 
3240 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3241 {
3242     TCGv_i32 t = tcg_temp_new_i32();
3243 
3244     gen_srshr32_i32(t, a, sh);
3245     tcg_gen_add_i32(d, d, t);
3246 }
3247 
3248 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3249 {
3250     TCGv_i64 t = tcg_temp_new_i64();
3251 
3252     gen_srshr64_i64(t, a, sh);
3253     tcg_gen_add_i64(d, d, t);
3254 }
3255 
3256 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3257 {
3258     TCGv_vec t = tcg_temp_new_vec_matching(d);
3259 
3260     gen_srshr_vec(vece, t, a, sh);
3261     tcg_gen_add_vec(vece, d, d, t);
3262 }
3263 
3264 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3265                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3266 {
3267     static const TCGOpcode vecop_list[] = {
3268         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3269     };
3270     static const GVecGen2i ops[4] = {
3271         { .fni8 = gen_srsra8_i64,
3272           .fniv = gen_srsra_vec,
3273           .fno = gen_helper_gvec_srsra_b,
3274           .opt_opc = vecop_list,
3275           .load_dest = true,
3276           .vece = MO_8 },
3277         { .fni8 = gen_srsra16_i64,
3278           .fniv = gen_srsra_vec,
3279           .fno = gen_helper_gvec_srsra_h,
3280           .opt_opc = vecop_list,
3281           .load_dest = true,
3282           .vece = MO_16 },
3283         { .fni4 = gen_srsra32_i32,
3284           .fniv = gen_srsra_vec,
3285           .fno = gen_helper_gvec_srsra_s,
3286           .opt_opc = vecop_list,
3287           .load_dest = true,
3288           .vece = MO_32 },
3289         { .fni8 = gen_srsra64_i64,
3290           .fniv = gen_srsra_vec,
3291           .fno = gen_helper_gvec_srsra_d,
3292           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3293           .opt_opc = vecop_list,
3294           .load_dest = true,
3295           .vece = MO_64 },
3296     };
3297 
3298     /* tszimm encoding produces immediates in the range [1..esize] */
3299     tcg_debug_assert(shift > 0);
3300     tcg_debug_assert(shift <= (8 << vece));
3301 
3302     /*
3303      * Shifts larger than the element size are architecturally valid.
3304      * Signed results in all sign bits.  With rounding, this produces
3305      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3306      * I.e. always zero.  With accumulation, this leaves D unchanged.
3307      */
3308     if (shift == (8 << vece)) {
3309         /* Nop, but we do need to clear the tail. */
3310         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3311     } else {
3312         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3313     }
3314 }
3315 
3316 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3317 {
3318     TCGv_i64 t = tcg_temp_new_i64();
3319 
3320     tcg_gen_shri_i64(t, a, sh - 1);
3321     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3322     tcg_gen_vec_shr8i_i64(d, a, sh);
3323     tcg_gen_vec_add8_i64(d, d, t);
3324 }
3325 
3326 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3327 {
3328     TCGv_i64 t = tcg_temp_new_i64();
3329 
3330     tcg_gen_shri_i64(t, a, sh - 1);
3331     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3332     tcg_gen_vec_shr16i_i64(d, a, sh);
3333     tcg_gen_vec_add16_i64(d, d, t);
3334 }
3335 
3336 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3337 {
3338     TCGv_i32 t;
3339 
3340     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3341     if (sh == 32) {
3342         tcg_gen_extract_i32(d, a, sh - 1, 1);
3343         return;
3344     }
3345     t = tcg_temp_new_i32();
3346     tcg_gen_extract_i32(t, a, sh - 1, 1);
3347     tcg_gen_shri_i32(d, a, sh);
3348     tcg_gen_add_i32(d, d, t);
3349 }
3350 
3351 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3352 {
3353     TCGv_i64 t = tcg_temp_new_i64();
3354 
3355     tcg_gen_extract_i64(t, a, sh - 1, 1);
3356     tcg_gen_shri_i64(d, a, sh);
3357     tcg_gen_add_i64(d, d, t);
3358 }
3359 
3360 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3361 {
3362     TCGv_vec t = tcg_temp_new_vec_matching(d);
3363     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3364 
3365     tcg_gen_shri_vec(vece, t, a, shift - 1);
3366     tcg_gen_dupi_vec(vece, ones, 1);
3367     tcg_gen_and_vec(vece, t, t, ones);
3368     tcg_gen_shri_vec(vece, d, a, shift);
3369     tcg_gen_add_vec(vece, d, d, t);
3370 }
3371 
3372 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3373                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3374 {
3375     static const TCGOpcode vecop_list[] = {
3376         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3377     };
3378     static const GVecGen2i ops[4] = {
3379         { .fni8 = gen_urshr8_i64,
3380           .fniv = gen_urshr_vec,
3381           .fno = gen_helper_gvec_urshr_b,
3382           .opt_opc = vecop_list,
3383           .vece = MO_8 },
3384         { .fni8 = gen_urshr16_i64,
3385           .fniv = gen_urshr_vec,
3386           .fno = gen_helper_gvec_urshr_h,
3387           .opt_opc = vecop_list,
3388           .vece = MO_16 },
3389         { .fni4 = gen_urshr32_i32,
3390           .fniv = gen_urshr_vec,
3391           .fno = gen_helper_gvec_urshr_s,
3392           .opt_opc = vecop_list,
3393           .vece = MO_32 },
3394         { .fni8 = gen_urshr64_i64,
3395           .fniv = gen_urshr_vec,
3396           .fno = gen_helper_gvec_urshr_d,
3397           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3398           .opt_opc = vecop_list,
3399           .vece = MO_64 },
3400     };
3401 
3402     /* tszimm encoding produces immediates in the range [1..esize] */
3403     tcg_debug_assert(shift > 0);
3404     tcg_debug_assert(shift <= (8 << vece));
3405 
3406     if (shift == (8 << vece)) {
3407         /*
3408          * Shifts larger than the element size are architecturally valid.
3409          * Unsigned results in zero.  With rounding, this produces a
3410          * copy of the most significant bit.
3411          */
3412         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3413     } else {
3414         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3415     }
3416 }
3417 
3418 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3419 {
3420     TCGv_i64 t = tcg_temp_new_i64();
3421 
3422     if (sh == 8) {
3423         tcg_gen_vec_shr8i_i64(t, a, 7);
3424     } else {
3425         gen_urshr8_i64(t, a, sh);
3426     }
3427     tcg_gen_vec_add8_i64(d, d, t);
3428 }
3429 
3430 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3431 {
3432     TCGv_i64 t = tcg_temp_new_i64();
3433 
3434     if (sh == 16) {
3435         tcg_gen_vec_shr16i_i64(t, a, 15);
3436     } else {
3437         gen_urshr16_i64(t, a, sh);
3438     }
3439     tcg_gen_vec_add16_i64(d, d, t);
3440 }
3441 
3442 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3443 {
3444     TCGv_i32 t = tcg_temp_new_i32();
3445 
3446     if (sh == 32) {
3447         tcg_gen_shri_i32(t, a, 31);
3448     } else {
3449         gen_urshr32_i32(t, a, sh);
3450     }
3451     tcg_gen_add_i32(d, d, t);
3452 }
3453 
3454 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3455 {
3456     TCGv_i64 t = tcg_temp_new_i64();
3457 
3458     if (sh == 64) {
3459         tcg_gen_shri_i64(t, a, 63);
3460     } else {
3461         gen_urshr64_i64(t, a, sh);
3462     }
3463     tcg_gen_add_i64(d, d, t);
3464 }
3465 
3466 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3467 {
3468     TCGv_vec t = tcg_temp_new_vec_matching(d);
3469 
3470     if (sh == (8 << vece)) {
3471         tcg_gen_shri_vec(vece, t, a, sh - 1);
3472     } else {
3473         gen_urshr_vec(vece, t, a, sh);
3474     }
3475     tcg_gen_add_vec(vece, d, d, t);
3476 }
3477 
3478 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3479                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3480 {
3481     static const TCGOpcode vecop_list[] = {
3482         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3483     };
3484     static const GVecGen2i ops[4] = {
3485         { .fni8 = gen_ursra8_i64,
3486           .fniv = gen_ursra_vec,
3487           .fno = gen_helper_gvec_ursra_b,
3488           .opt_opc = vecop_list,
3489           .load_dest = true,
3490           .vece = MO_8 },
3491         { .fni8 = gen_ursra16_i64,
3492           .fniv = gen_ursra_vec,
3493           .fno = gen_helper_gvec_ursra_h,
3494           .opt_opc = vecop_list,
3495           .load_dest = true,
3496           .vece = MO_16 },
3497         { .fni4 = gen_ursra32_i32,
3498           .fniv = gen_ursra_vec,
3499           .fno = gen_helper_gvec_ursra_s,
3500           .opt_opc = vecop_list,
3501           .load_dest = true,
3502           .vece = MO_32 },
3503         { .fni8 = gen_ursra64_i64,
3504           .fniv = gen_ursra_vec,
3505           .fno = gen_helper_gvec_ursra_d,
3506           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3507           .opt_opc = vecop_list,
3508           .load_dest = true,
3509           .vece = MO_64 },
3510     };
3511 
3512     /* tszimm encoding produces immediates in the range [1..esize] */
3513     tcg_debug_assert(shift > 0);
3514     tcg_debug_assert(shift <= (8 << vece));
3515 
3516     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3517 }
3518 
3519 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3520 {
3521     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3522     TCGv_i64 t = tcg_temp_new_i64();
3523 
3524     tcg_gen_shri_i64(t, a, shift);
3525     tcg_gen_andi_i64(t, t, mask);
3526     tcg_gen_andi_i64(d, d, ~mask);
3527     tcg_gen_or_i64(d, d, t);
3528 }
3529 
3530 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3531 {
3532     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3533     TCGv_i64 t = tcg_temp_new_i64();
3534 
3535     tcg_gen_shri_i64(t, a, shift);
3536     tcg_gen_andi_i64(t, t, mask);
3537     tcg_gen_andi_i64(d, d, ~mask);
3538     tcg_gen_or_i64(d, d, t);
3539 }
3540 
3541 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3542 {
3543     tcg_gen_shri_i32(a, a, shift);
3544     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3545 }
3546 
3547 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3548 {
3549     tcg_gen_shri_i64(a, a, shift);
3550     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3551 }
3552 
3553 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3554 {
3555     TCGv_vec t = tcg_temp_new_vec_matching(d);
3556     TCGv_vec m = tcg_temp_new_vec_matching(d);
3557 
3558     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3559     tcg_gen_shri_vec(vece, t, a, sh);
3560     tcg_gen_and_vec(vece, d, d, m);
3561     tcg_gen_or_vec(vece, d, d, t);
3562 }
3563 
3564 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3565                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3566 {
3567     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3568     const GVecGen2i ops[4] = {
3569         { .fni8 = gen_shr8_ins_i64,
3570           .fniv = gen_shr_ins_vec,
3571           .fno = gen_helper_gvec_sri_b,
3572           .load_dest = true,
3573           .opt_opc = vecop_list,
3574           .vece = MO_8 },
3575         { .fni8 = gen_shr16_ins_i64,
3576           .fniv = gen_shr_ins_vec,
3577           .fno = gen_helper_gvec_sri_h,
3578           .load_dest = true,
3579           .opt_opc = vecop_list,
3580           .vece = MO_16 },
3581         { .fni4 = gen_shr32_ins_i32,
3582           .fniv = gen_shr_ins_vec,
3583           .fno = gen_helper_gvec_sri_s,
3584           .load_dest = true,
3585           .opt_opc = vecop_list,
3586           .vece = MO_32 },
3587         { .fni8 = gen_shr64_ins_i64,
3588           .fniv = gen_shr_ins_vec,
3589           .fno = gen_helper_gvec_sri_d,
3590           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3591           .load_dest = true,
3592           .opt_opc = vecop_list,
3593           .vece = MO_64 },
3594     };
3595 
3596     /* tszimm encoding produces immediates in the range [1..esize]. */
3597     tcg_debug_assert(shift > 0);
3598     tcg_debug_assert(shift <= (8 << vece));
3599 
3600     /* Shift of esize leaves destination unchanged. */
3601     if (shift < (8 << vece)) {
3602         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3603     } else {
3604         /* Nop, but we do need to clear the tail. */
3605         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3606     }
3607 }
3608 
3609 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3610 {
3611     uint64_t mask = dup_const(MO_8, 0xff << shift);
3612     TCGv_i64 t = tcg_temp_new_i64();
3613 
3614     tcg_gen_shli_i64(t, a, shift);
3615     tcg_gen_andi_i64(t, t, mask);
3616     tcg_gen_andi_i64(d, d, ~mask);
3617     tcg_gen_or_i64(d, d, t);
3618 }
3619 
3620 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3621 {
3622     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3623     TCGv_i64 t = tcg_temp_new_i64();
3624 
3625     tcg_gen_shli_i64(t, a, shift);
3626     tcg_gen_andi_i64(t, t, mask);
3627     tcg_gen_andi_i64(d, d, ~mask);
3628     tcg_gen_or_i64(d, d, t);
3629 }
3630 
3631 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3632 {
3633     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3634 }
3635 
3636 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3637 {
3638     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3639 }
3640 
3641 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3642 {
3643     TCGv_vec t = tcg_temp_new_vec_matching(d);
3644     TCGv_vec m = tcg_temp_new_vec_matching(d);
3645 
3646     tcg_gen_shli_vec(vece, t, a, sh);
3647     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3648     tcg_gen_and_vec(vece, d, d, m);
3649     tcg_gen_or_vec(vece, d, d, t);
3650 }
3651 
3652 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3653                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3654 {
3655     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3656     const GVecGen2i ops[4] = {
3657         { .fni8 = gen_shl8_ins_i64,
3658           .fniv = gen_shl_ins_vec,
3659           .fno = gen_helper_gvec_sli_b,
3660           .load_dest = true,
3661           .opt_opc = vecop_list,
3662           .vece = MO_8 },
3663         { .fni8 = gen_shl16_ins_i64,
3664           .fniv = gen_shl_ins_vec,
3665           .fno = gen_helper_gvec_sli_h,
3666           .load_dest = true,
3667           .opt_opc = vecop_list,
3668           .vece = MO_16 },
3669         { .fni4 = gen_shl32_ins_i32,
3670           .fniv = gen_shl_ins_vec,
3671           .fno = gen_helper_gvec_sli_s,
3672           .load_dest = true,
3673           .opt_opc = vecop_list,
3674           .vece = MO_32 },
3675         { .fni8 = gen_shl64_ins_i64,
3676           .fniv = gen_shl_ins_vec,
3677           .fno = gen_helper_gvec_sli_d,
3678           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3679           .load_dest = true,
3680           .opt_opc = vecop_list,
3681           .vece = MO_64 },
3682     };
3683 
3684     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3685     tcg_debug_assert(shift >= 0);
3686     tcg_debug_assert(shift < (8 << vece));
3687 
3688     if (shift == 0) {
3689         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3690     } else {
3691         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3692     }
3693 }
3694 
3695 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3696 {
3697     gen_helper_neon_mul_u8(a, a, b);
3698     gen_helper_neon_add_u8(d, d, a);
3699 }
3700 
3701 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3702 {
3703     gen_helper_neon_mul_u8(a, a, b);
3704     gen_helper_neon_sub_u8(d, d, a);
3705 }
3706 
3707 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3708 {
3709     gen_helper_neon_mul_u16(a, a, b);
3710     gen_helper_neon_add_u16(d, d, a);
3711 }
3712 
3713 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3714 {
3715     gen_helper_neon_mul_u16(a, a, b);
3716     gen_helper_neon_sub_u16(d, d, a);
3717 }
3718 
3719 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3720 {
3721     tcg_gen_mul_i32(a, a, b);
3722     tcg_gen_add_i32(d, d, a);
3723 }
3724 
3725 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3726 {
3727     tcg_gen_mul_i32(a, a, b);
3728     tcg_gen_sub_i32(d, d, a);
3729 }
3730 
3731 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3732 {
3733     tcg_gen_mul_i64(a, a, b);
3734     tcg_gen_add_i64(d, d, a);
3735 }
3736 
3737 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3738 {
3739     tcg_gen_mul_i64(a, a, b);
3740     tcg_gen_sub_i64(d, d, a);
3741 }
3742 
3743 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3744 {
3745     tcg_gen_mul_vec(vece, a, a, b);
3746     tcg_gen_add_vec(vece, d, d, a);
3747 }
3748 
3749 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3750 {
3751     tcg_gen_mul_vec(vece, a, a, b);
3752     tcg_gen_sub_vec(vece, d, d, a);
3753 }
3754 
3755 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3756  * these tables are shared with AArch64 which does support them.
3757  */
3758 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3759                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3760 {
3761     static const TCGOpcode vecop_list[] = {
3762         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3763     };
3764     static const GVecGen3 ops[4] = {
3765         { .fni4 = gen_mla8_i32,
3766           .fniv = gen_mla_vec,
3767           .load_dest = true,
3768           .opt_opc = vecop_list,
3769           .vece = MO_8 },
3770         { .fni4 = gen_mla16_i32,
3771           .fniv = gen_mla_vec,
3772           .load_dest = true,
3773           .opt_opc = vecop_list,
3774           .vece = MO_16 },
3775         { .fni4 = gen_mla32_i32,
3776           .fniv = gen_mla_vec,
3777           .load_dest = true,
3778           .opt_opc = vecop_list,
3779           .vece = MO_32 },
3780         { .fni8 = gen_mla64_i64,
3781           .fniv = gen_mla_vec,
3782           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3783           .load_dest = true,
3784           .opt_opc = vecop_list,
3785           .vece = MO_64 },
3786     };
3787     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3788 }
3789 
3790 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3791                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3792 {
3793     static const TCGOpcode vecop_list[] = {
3794         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3795     };
3796     static const GVecGen3 ops[4] = {
3797         { .fni4 = gen_mls8_i32,
3798           .fniv = gen_mls_vec,
3799           .load_dest = true,
3800           .opt_opc = vecop_list,
3801           .vece = MO_8 },
3802         { .fni4 = gen_mls16_i32,
3803           .fniv = gen_mls_vec,
3804           .load_dest = true,
3805           .opt_opc = vecop_list,
3806           .vece = MO_16 },
3807         { .fni4 = gen_mls32_i32,
3808           .fniv = gen_mls_vec,
3809           .load_dest = true,
3810           .opt_opc = vecop_list,
3811           .vece = MO_32 },
3812         { .fni8 = gen_mls64_i64,
3813           .fniv = gen_mls_vec,
3814           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3815           .load_dest = true,
3816           .opt_opc = vecop_list,
3817           .vece = MO_64 },
3818     };
3819     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3820 }
3821 
3822 /* CMTST : test is "if (X & Y != 0)". */
3823 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3824 {
3825     tcg_gen_and_i32(d, a, b);
3826     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3827 }
3828 
3829 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3830 {
3831     tcg_gen_and_i64(d, a, b);
3832     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3833 }
3834 
3835 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3836 {
3837     tcg_gen_and_vec(vece, d, a, b);
3838     tcg_gen_dupi_vec(vece, a, 0);
3839     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3840 }
3841 
3842 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3843                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3844 {
3845     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3846     static const GVecGen3 ops[4] = {
3847         { .fni4 = gen_helper_neon_tst_u8,
3848           .fniv = gen_cmtst_vec,
3849           .opt_opc = vecop_list,
3850           .vece = MO_8 },
3851         { .fni4 = gen_helper_neon_tst_u16,
3852           .fniv = gen_cmtst_vec,
3853           .opt_opc = vecop_list,
3854           .vece = MO_16 },
3855         { .fni4 = gen_cmtst_i32,
3856           .fniv = gen_cmtst_vec,
3857           .opt_opc = vecop_list,
3858           .vece = MO_32 },
3859         { .fni8 = gen_cmtst_i64,
3860           .fniv = gen_cmtst_vec,
3861           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3862           .opt_opc = vecop_list,
3863           .vece = MO_64 },
3864     };
3865     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3866 }
3867 
3868 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3869 {
3870     TCGv_i32 lval = tcg_temp_new_i32();
3871     TCGv_i32 rval = tcg_temp_new_i32();
3872     TCGv_i32 lsh = tcg_temp_new_i32();
3873     TCGv_i32 rsh = tcg_temp_new_i32();
3874     TCGv_i32 zero = tcg_constant_i32(0);
3875     TCGv_i32 max = tcg_constant_i32(32);
3876 
3877     /*
3878      * Rely on the TCG guarantee that out of range shifts produce
3879      * unspecified results, not undefined behaviour (i.e. no trap).
3880      * Discard out-of-range results after the fact.
3881      */
3882     tcg_gen_ext8s_i32(lsh, shift);
3883     tcg_gen_neg_i32(rsh, lsh);
3884     tcg_gen_shl_i32(lval, src, lsh);
3885     tcg_gen_shr_i32(rval, src, rsh);
3886     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3887     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3888 }
3889 
3890 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3891 {
3892     TCGv_i64 lval = tcg_temp_new_i64();
3893     TCGv_i64 rval = tcg_temp_new_i64();
3894     TCGv_i64 lsh = tcg_temp_new_i64();
3895     TCGv_i64 rsh = tcg_temp_new_i64();
3896     TCGv_i64 zero = tcg_constant_i64(0);
3897     TCGv_i64 max = tcg_constant_i64(64);
3898 
3899     /*
3900      * Rely on the TCG guarantee that out of range shifts produce
3901      * unspecified results, not undefined behaviour (i.e. no trap).
3902      * Discard out-of-range results after the fact.
3903      */
3904     tcg_gen_ext8s_i64(lsh, shift);
3905     tcg_gen_neg_i64(rsh, lsh);
3906     tcg_gen_shl_i64(lval, src, lsh);
3907     tcg_gen_shr_i64(rval, src, rsh);
3908     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3909     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3910 }
3911 
3912 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3913                          TCGv_vec src, TCGv_vec shift)
3914 {
3915     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3916     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3917     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3918     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3919     TCGv_vec msk, max;
3920 
3921     tcg_gen_neg_vec(vece, rsh, shift);
3922     if (vece == MO_8) {
3923         tcg_gen_mov_vec(lsh, shift);
3924     } else {
3925         msk = tcg_temp_new_vec_matching(dst);
3926         tcg_gen_dupi_vec(vece, msk, 0xff);
3927         tcg_gen_and_vec(vece, lsh, shift, msk);
3928         tcg_gen_and_vec(vece, rsh, rsh, msk);
3929     }
3930 
3931     /*
3932      * Rely on the TCG guarantee that out of range shifts produce
3933      * unspecified results, not undefined behaviour (i.e. no trap).
3934      * Discard out-of-range results after the fact.
3935      */
3936     tcg_gen_shlv_vec(vece, lval, src, lsh);
3937     tcg_gen_shrv_vec(vece, rval, src, rsh);
3938 
3939     max = tcg_temp_new_vec_matching(dst);
3940     tcg_gen_dupi_vec(vece, max, 8 << vece);
3941 
3942     /*
3943      * The choice of LT (signed) and GEU (unsigned) are biased toward
3944      * the instructions of the x86_64 host.  For MO_8, the whole byte
3945      * is significant so we must use an unsigned compare; otherwise we
3946      * have already masked to a byte and so a signed compare works.
3947      * Other tcg hosts have a full set of comparisons and do not care.
3948      */
3949     if (vece == MO_8) {
3950         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3951         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3952         tcg_gen_andc_vec(vece, lval, lval, lsh);
3953         tcg_gen_andc_vec(vece, rval, rval, rsh);
3954     } else {
3955         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3956         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3957         tcg_gen_and_vec(vece, lval, lval, lsh);
3958         tcg_gen_and_vec(vece, rval, rval, rsh);
3959     }
3960     tcg_gen_or_vec(vece, dst, lval, rval);
3961 }
3962 
3963 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3964                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3965 {
3966     static const TCGOpcode vecop_list[] = {
3967         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3968         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3969     };
3970     static const GVecGen3 ops[4] = {
3971         { .fniv = gen_ushl_vec,
3972           .fno = gen_helper_gvec_ushl_b,
3973           .opt_opc = vecop_list,
3974           .vece = MO_8 },
3975         { .fniv = gen_ushl_vec,
3976           .fno = gen_helper_gvec_ushl_h,
3977           .opt_opc = vecop_list,
3978           .vece = MO_16 },
3979         { .fni4 = gen_ushl_i32,
3980           .fniv = gen_ushl_vec,
3981           .opt_opc = vecop_list,
3982           .vece = MO_32 },
3983         { .fni8 = gen_ushl_i64,
3984           .fniv = gen_ushl_vec,
3985           .opt_opc = vecop_list,
3986           .vece = MO_64 },
3987     };
3988     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3989 }
3990 
3991 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3992 {
3993     TCGv_i32 lval = tcg_temp_new_i32();
3994     TCGv_i32 rval = tcg_temp_new_i32();
3995     TCGv_i32 lsh = tcg_temp_new_i32();
3996     TCGv_i32 rsh = tcg_temp_new_i32();
3997     TCGv_i32 zero = tcg_constant_i32(0);
3998     TCGv_i32 max = tcg_constant_i32(31);
3999 
4000     /*
4001      * Rely on the TCG guarantee that out of range shifts produce
4002      * unspecified results, not undefined behaviour (i.e. no trap).
4003      * Discard out-of-range results after the fact.
4004      */
4005     tcg_gen_ext8s_i32(lsh, shift);
4006     tcg_gen_neg_i32(rsh, lsh);
4007     tcg_gen_shl_i32(lval, src, lsh);
4008     tcg_gen_umin_i32(rsh, rsh, max);
4009     tcg_gen_sar_i32(rval, src, rsh);
4010     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4011     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4012 }
4013 
4014 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4015 {
4016     TCGv_i64 lval = tcg_temp_new_i64();
4017     TCGv_i64 rval = tcg_temp_new_i64();
4018     TCGv_i64 lsh = tcg_temp_new_i64();
4019     TCGv_i64 rsh = tcg_temp_new_i64();
4020     TCGv_i64 zero = tcg_constant_i64(0);
4021     TCGv_i64 max = tcg_constant_i64(63);
4022 
4023     /*
4024      * Rely on the TCG guarantee that out of range shifts produce
4025      * unspecified results, not undefined behaviour (i.e. no trap).
4026      * Discard out-of-range results after the fact.
4027      */
4028     tcg_gen_ext8s_i64(lsh, shift);
4029     tcg_gen_neg_i64(rsh, lsh);
4030     tcg_gen_shl_i64(lval, src, lsh);
4031     tcg_gen_umin_i64(rsh, rsh, max);
4032     tcg_gen_sar_i64(rval, src, rsh);
4033     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4034     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4035 }
4036 
4037 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4038                          TCGv_vec src, TCGv_vec shift)
4039 {
4040     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4041     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4042     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4043     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4044     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4045 
4046     /*
4047      * Rely on the TCG guarantee that out of range shifts produce
4048      * unspecified results, not undefined behaviour (i.e. no trap).
4049      * Discard out-of-range results after the fact.
4050      */
4051     tcg_gen_neg_vec(vece, rsh, shift);
4052     if (vece == MO_8) {
4053         tcg_gen_mov_vec(lsh, shift);
4054     } else {
4055         tcg_gen_dupi_vec(vece, tmp, 0xff);
4056         tcg_gen_and_vec(vece, lsh, shift, tmp);
4057         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4058     }
4059 
4060     /* Bound rsh so out of bound right shift gets -1.  */
4061     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4062     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4063     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4064 
4065     tcg_gen_shlv_vec(vece, lval, src, lsh);
4066     tcg_gen_sarv_vec(vece, rval, src, rsh);
4067 
4068     /* Select in-bound left shift.  */
4069     tcg_gen_andc_vec(vece, lval, lval, tmp);
4070 
4071     /* Select between left and right shift.  */
4072     if (vece == MO_8) {
4073         tcg_gen_dupi_vec(vece, tmp, 0);
4074         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4075     } else {
4076         tcg_gen_dupi_vec(vece, tmp, 0x80);
4077         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4078     }
4079 }
4080 
4081 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4082                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4083 {
4084     static const TCGOpcode vecop_list[] = {
4085         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4086         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4087     };
4088     static const GVecGen3 ops[4] = {
4089         { .fniv = gen_sshl_vec,
4090           .fno = gen_helper_gvec_sshl_b,
4091           .opt_opc = vecop_list,
4092           .vece = MO_8 },
4093         { .fniv = gen_sshl_vec,
4094           .fno = gen_helper_gvec_sshl_h,
4095           .opt_opc = vecop_list,
4096           .vece = MO_16 },
4097         { .fni4 = gen_sshl_i32,
4098           .fniv = gen_sshl_vec,
4099           .opt_opc = vecop_list,
4100           .vece = MO_32 },
4101         { .fni8 = gen_sshl_i64,
4102           .fniv = gen_sshl_vec,
4103           .opt_opc = vecop_list,
4104           .vece = MO_64 },
4105     };
4106     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4107 }
4108 
4109 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4110                           TCGv_vec a, TCGv_vec b)
4111 {
4112     TCGv_vec x = tcg_temp_new_vec_matching(t);
4113     tcg_gen_add_vec(vece, x, a, b);
4114     tcg_gen_usadd_vec(vece, t, a, b);
4115     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4116     tcg_gen_or_vec(vece, sat, sat, x);
4117 }
4118 
4119 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4120                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4121 {
4122     static const TCGOpcode vecop_list[] = {
4123         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4124     };
4125     static const GVecGen4 ops[4] = {
4126         { .fniv = gen_uqadd_vec,
4127           .fno = gen_helper_gvec_uqadd_b,
4128           .write_aofs = true,
4129           .opt_opc = vecop_list,
4130           .vece = MO_8 },
4131         { .fniv = gen_uqadd_vec,
4132           .fno = gen_helper_gvec_uqadd_h,
4133           .write_aofs = true,
4134           .opt_opc = vecop_list,
4135           .vece = MO_16 },
4136         { .fniv = gen_uqadd_vec,
4137           .fno = gen_helper_gvec_uqadd_s,
4138           .write_aofs = true,
4139           .opt_opc = vecop_list,
4140           .vece = MO_32 },
4141         { .fniv = gen_uqadd_vec,
4142           .fno = gen_helper_gvec_uqadd_d,
4143           .write_aofs = true,
4144           .opt_opc = vecop_list,
4145           .vece = MO_64 },
4146     };
4147     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4148                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4149 }
4150 
4151 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4152                           TCGv_vec a, TCGv_vec b)
4153 {
4154     TCGv_vec x = tcg_temp_new_vec_matching(t);
4155     tcg_gen_add_vec(vece, x, a, b);
4156     tcg_gen_ssadd_vec(vece, t, a, b);
4157     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4158     tcg_gen_or_vec(vece, sat, sat, x);
4159 }
4160 
4161 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4162                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4163 {
4164     static const TCGOpcode vecop_list[] = {
4165         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4166     };
4167     static const GVecGen4 ops[4] = {
4168         { .fniv = gen_sqadd_vec,
4169           .fno = gen_helper_gvec_sqadd_b,
4170           .opt_opc = vecop_list,
4171           .write_aofs = true,
4172           .vece = MO_8 },
4173         { .fniv = gen_sqadd_vec,
4174           .fno = gen_helper_gvec_sqadd_h,
4175           .opt_opc = vecop_list,
4176           .write_aofs = true,
4177           .vece = MO_16 },
4178         { .fniv = gen_sqadd_vec,
4179           .fno = gen_helper_gvec_sqadd_s,
4180           .opt_opc = vecop_list,
4181           .write_aofs = true,
4182           .vece = MO_32 },
4183         { .fniv = gen_sqadd_vec,
4184           .fno = gen_helper_gvec_sqadd_d,
4185           .opt_opc = vecop_list,
4186           .write_aofs = true,
4187           .vece = MO_64 },
4188     };
4189     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4190                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4191 }
4192 
4193 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4194                           TCGv_vec a, TCGv_vec b)
4195 {
4196     TCGv_vec x = tcg_temp_new_vec_matching(t);
4197     tcg_gen_sub_vec(vece, x, a, b);
4198     tcg_gen_ussub_vec(vece, t, a, b);
4199     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4200     tcg_gen_or_vec(vece, sat, sat, x);
4201 }
4202 
4203 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4204                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4205 {
4206     static const TCGOpcode vecop_list[] = {
4207         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4208     };
4209     static const GVecGen4 ops[4] = {
4210         { .fniv = gen_uqsub_vec,
4211           .fno = gen_helper_gvec_uqsub_b,
4212           .opt_opc = vecop_list,
4213           .write_aofs = true,
4214           .vece = MO_8 },
4215         { .fniv = gen_uqsub_vec,
4216           .fno = gen_helper_gvec_uqsub_h,
4217           .opt_opc = vecop_list,
4218           .write_aofs = true,
4219           .vece = MO_16 },
4220         { .fniv = gen_uqsub_vec,
4221           .fno = gen_helper_gvec_uqsub_s,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_32 },
4225         { .fniv = gen_uqsub_vec,
4226           .fno = gen_helper_gvec_uqsub_d,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_64 },
4230     };
4231     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4232                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4233 }
4234 
4235 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4236                           TCGv_vec a, TCGv_vec b)
4237 {
4238     TCGv_vec x = tcg_temp_new_vec_matching(t);
4239     tcg_gen_sub_vec(vece, x, a, b);
4240     tcg_gen_sssub_vec(vece, t, a, b);
4241     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4242     tcg_gen_or_vec(vece, sat, sat, x);
4243 }
4244 
4245 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4246                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4247 {
4248     static const TCGOpcode vecop_list[] = {
4249         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4250     };
4251     static const GVecGen4 ops[4] = {
4252         { .fniv = gen_sqsub_vec,
4253           .fno = gen_helper_gvec_sqsub_b,
4254           .opt_opc = vecop_list,
4255           .write_aofs = true,
4256           .vece = MO_8 },
4257         { .fniv = gen_sqsub_vec,
4258           .fno = gen_helper_gvec_sqsub_h,
4259           .opt_opc = vecop_list,
4260           .write_aofs = true,
4261           .vece = MO_16 },
4262         { .fniv = gen_sqsub_vec,
4263           .fno = gen_helper_gvec_sqsub_s,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_32 },
4267         { .fniv = gen_sqsub_vec,
4268           .fno = gen_helper_gvec_sqsub_d,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_64 },
4272     };
4273     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4274                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4275 }
4276 
4277 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4278 {
4279     TCGv_i32 t = tcg_temp_new_i32();
4280 
4281     tcg_gen_sub_i32(t, a, b);
4282     tcg_gen_sub_i32(d, b, a);
4283     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4284 }
4285 
4286 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4287 {
4288     TCGv_i64 t = tcg_temp_new_i64();
4289 
4290     tcg_gen_sub_i64(t, a, b);
4291     tcg_gen_sub_i64(d, b, a);
4292     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4293 }
4294 
4295 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4296 {
4297     TCGv_vec t = tcg_temp_new_vec_matching(d);
4298 
4299     tcg_gen_smin_vec(vece, t, a, b);
4300     tcg_gen_smax_vec(vece, d, a, b);
4301     tcg_gen_sub_vec(vece, d, d, t);
4302 }
4303 
4304 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4305                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4306 {
4307     static const TCGOpcode vecop_list[] = {
4308         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4309     };
4310     static const GVecGen3 ops[4] = {
4311         { .fniv = gen_sabd_vec,
4312           .fno = gen_helper_gvec_sabd_b,
4313           .opt_opc = vecop_list,
4314           .vece = MO_8 },
4315         { .fniv = gen_sabd_vec,
4316           .fno = gen_helper_gvec_sabd_h,
4317           .opt_opc = vecop_list,
4318           .vece = MO_16 },
4319         { .fni4 = gen_sabd_i32,
4320           .fniv = gen_sabd_vec,
4321           .fno = gen_helper_gvec_sabd_s,
4322           .opt_opc = vecop_list,
4323           .vece = MO_32 },
4324         { .fni8 = gen_sabd_i64,
4325           .fniv = gen_sabd_vec,
4326           .fno = gen_helper_gvec_sabd_d,
4327           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4328           .opt_opc = vecop_list,
4329           .vece = MO_64 },
4330     };
4331     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4332 }
4333 
4334 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4335 {
4336     TCGv_i32 t = tcg_temp_new_i32();
4337 
4338     tcg_gen_sub_i32(t, a, b);
4339     tcg_gen_sub_i32(d, b, a);
4340     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4341 }
4342 
4343 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4344 {
4345     TCGv_i64 t = tcg_temp_new_i64();
4346 
4347     tcg_gen_sub_i64(t, a, b);
4348     tcg_gen_sub_i64(d, b, a);
4349     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4350 }
4351 
4352 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4353 {
4354     TCGv_vec t = tcg_temp_new_vec_matching(d);
4355 
4356     tcg_gen_umin_vec(vece, t, a, b);
4357     tcg_gen_umax_vec(vece, d, a, b);
4358     tcg_gen_sub_vec(vece, d, d, t);
4359 }
4360 
4361 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4362                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4363 {
4364     static const TCGOpcode vecop_list[] = {
4365         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4366     };
4367     static const GVecGen3 ops[4] = {
4368         { .fniv = gen_uabd_vec,
4369           .fno = gen_helper_gvec_uabd_b,
4370           .opt_opc = vecop_list,
4371           .vece = MO_8 },
4372         { .fniv = gen_uabd_vec,
4373           .fno = gen_helper_gvec_uabd_h,
4374           .opt_opc = vecop_list,
4375           .vece = MO_16 },
4376         { .fni4 = gen_uabd_i32,
4377           .fniv = gen_uabd_vec,
4378           .fno = gen_helper_gvec_uabd_s,
4379           .opt_opc = vecop_list,
4380           .vece = MO_32 },
4381         { .fni8 = gen_uabd_i64,
4382           .fniv = gen_uabd_vec,
4383           .fno = gen_helper_gvec_uabd_d,
4384           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4385           .opt_opc = vecop_list,
4386           .vece = MO_64 },
4387     };
4388     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4389 }
4390 
4391 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4392 {
4393     TCGv_i32 t = tcg_temp_new_i32();
4394     gen_sabd_i32(t, a, b);
4395     tcg_gen_add_i32(d, d, t);
4396 }
4397 
4398 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4399 {
4400     TCGv_i64 t = tcg_temp_new_i64();
4401     gen_sabd_i64(t, a, b);
4402     tcg_gen_add_i64(d, d, t);
4403 }
4404 
4405 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4406 {
4407     TCGv_vec t = tcg_temp_new_vec_matching(d);
4408     gen_sabd_vec(vece, t, a, b);
4409     tcg_gen_add_vec(vece, d, d, t);
4410 }
4411 
4412 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4413                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4414 {
4415     static const TCGOpcode vecop_list[] = {
4416         INDEX_op_sub_vec, INDEX_op_add_vec,
4417         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4418     };
4419     static const GVecGen3 ops[4] = {
4420         { .fniv = gen_saba_vec,
4421           .fno = gen_helper_gvec_saba_b,
4422           .opt_opc = vecop_list,
4423           .load_dest = true,
4424           .vece = MO_8 },
4425         { .fniv = gen_saba_vec,
4426           .fno = gen_helper_gvec_saba_h,
4427           .opt_opc = vecop_list,
4428           .load_dest = true,
4429           .vece = MO_16 },
4430         { .fni4 = gen_saba_i32,
4431           .fniv = gen_saba_vec,
4432           .fno = gen_helper_gvec_saba_s,
4433           .opt_opc = vecop_list,
4434           .load_dest = true,
4435           .vece = MO_32 },
4436         { .fni8 = gen_saba_i64,
4437           .fniv = gen_saba_vec,
4438           .fno = gen_helper_gvec_saba_d,
4439           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4440           .opt_opc = vecop_list,
4441           .load_dest = true,
4442           .vece = MO_64 },
4443     };
4444     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4445 }
4446 
4447 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4448 {
4449     TCGv_i32 t = tcg_temp_new_i32();
4450     gen_uabd_i32(t, a, b);
4451     tcg_gen_add_i32(d, d, t);
4452 }
4453 
4454 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4455 {
4456     TCGv_i64 t = tcg_temp_new_i64();
4457     gen_uabd_i64(t, a, b);
4458     tcg_gen_add_i64(d, d, t);
4459 }
4460 
4461 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4462 {
4463     TCGv_vec t = tcg_temp_new_vec_matching(d);
4464     gen_uabd_vec(vece, t, a, b);
4465     tcg_gen_add_vec(vece, d, d, t);
4466 }
4467 
4468 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4469                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4470 {
4471     static const TCGOpcode vecop_list[] = {
4472         INDEX_op_sub_vec, INDEX_op_add_vec,
4473         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4474     };
4475     static const GVecGen3 ops[4] = {
4476         { .fniv = gen_uaba_vec,
4477           .fno = gen_helper_gvec_uaba_b,
4478           .opt_opc = vecop_list,
4479           .load_dest = true,
4480           .vece = MO_8 },
4481         { .fniv = gen_uaba_vec,
4482           .fno = gen_helper_gvec_uaba_h,
4483           .opt_opc = vecop_list,
4484           .load_dest = true,
4485           .vece = MO_16 },
4486         { .fni4 = gen_uaba_i32,
4487           .fniv = gen_uaba_vec,
4488           .fno = gen_helper_gvec_uaba_s,
4489           .opt_opc = vecop_list,
4490           .load_dest = true,
4491           .vece = MO_32 },
4492         { .fni8 = gen_uaba_i64,
4493           .fniv = gen_uaba_vec,
4494           .fno = gen_helper_gvec_uaba_d,
4495           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4496           .opt_opc = vecop_list,
4497           .load_dest = true,
4498           .vece = MO_64 },
4499     };
4500     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4501 }
4502 
4503 static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
4504 {
4505     static const uint16_t mask[3] = {
4506         0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
4507         0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
4508         0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
4509     };
4510 
4511     if (crn >= 9 && crn <= 11) {
4512         return (mask[crn - 9] >> crm) & 1;
4513     }
4514     return false;
4515 }
4516 
4517 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4518                            int opc1, int crn, int crm, int opc2,
4519                            bool isread, int rt, int rt2)
4520 {
4521     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4522     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4523     TCGv_ptr tcg_ri = NULL;
4524     bool need_exit_tb = false;
4525     uint32_t syndrome;
4526 
4527     /*
4528      * Note that since we are an implementation which takes an
4529      * exception on a trapped conditional instruction only if the
4530      * instruction passes its condition code check, we can take
4531      * advantage of the clause in the ARM ARM that allows us to set
4532      * the COND field in the instruction to 0xE in all cases.
4533      * We could fish the actual condition out of the insn (ARM)
4534      * or the condexec bits (Thumb) but it isn't necessary.
4535      */
4536     switch (cpnum) {
4537     case 14:
4538         if (is64) {
4539             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4540                                          isread, false);
4541         } else {
4542             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4543                                         rt, isread, false);
4544         }
4545         break;
4546     case 15:
4547         if (is64) {
4548             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4549                                          isread, false);
4550         } else {
4551             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4552                                         rt, isread, false);
4553         }
4554         break;
4555     default:
4556         /*
4557          * ARMv8 defines that only coprocessors 14 and 15 exist,
4558          * so this can only happen if this is an ARMv7 or earlier CPU,
4559          * in which case the syndrome information won't actually be
4560          * guest visible.
4561          */
4562         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4563         syndrome = syn_uncategorized();
4564         break;
4565     }
4566 
4567     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4568         /*
4569          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4570          * over the UNDEF for "no such register" or the UNDEF for "access
4571          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4572          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4573          * access_check_cp_reg(), after the checks for whether the access
4574          * configurably trapped to EL1.
4575          */
4576         uint32_t maskbit = is64 ? crm : crn;
4577 
4578         if (maskbit != 4 && maskbit != 14) {
4579             /* T4 and T14 are RES0 so never cause traps */
4580             TCGv_i32 t;
4581             DisasLabel over = gen_disas_label(s);
4582 
4583             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4584             tcg_gen_andi_i32(t, t, 1u << maskbit);
4585             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4586 
4587             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4588             /*
4589              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4590              * but since we're conditionally branching over it, we want
4591              * to assume continue-to-next-instruction.
4592              */
4593             s->base.is_jmp = DISAS_NEXT;
4594             set_disas_label(s, over);
4595         }
4596     }
4597 
4598     if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
4599         /*
4600          * Check for TIDCP trap, which must take precedence over the UNDEF
4601          * for "no such register" etc.  It shares precedence with HSTR,
4602          * but raises the same exception, so order doesn't matter.
4603          */
4604         switch (s->current_el) {
4605         case 0:
4606             if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
4607                 && dc_isar_feature(aa64_tidcp1, s)) {
4608                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
4609             }
4610             break;
4611         case 1:
4612             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
4613             break;
4614         }
4615     }
4616 
4617     if (!ri) {
4618         /*
4619          * Unknown register; this might be a guest error or a QEMU
4620          * unimplemented feature.
4621          */
4622         if (is64) {
4623             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4624                           "64 bit system register cp:%d opc1: %d crm:%d "
4625                           "(%s)\n",
4626                           isread ? "read" : "write", cpnum, opc1, crm,
4627                           s->ns ? "non-secure" : "secure");
4628         } else {
4629             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4630                           "system register cp:%d opc1:%d crn:%d crm:%d "
4631                           "opc2:%d (%s)\n",
4632                           isread ? "read" : "write", cpnum, opc1, crn,
4633                           crm, opc2, s->ns ? "non-secure" : "secure");
4634         }
4635         unallocated_encoding(s);
4636         return;
4637     }
4638 
4639     /* Check access permissions */
4640     if (!cp_access_ok(s->current_el, ri, isread)) {
4641         unallocated_encoding(s);
4642         return;
4643     }
4644 
4645     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4646         (ri->fgt && s->fgt_active) ||
4647         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4648         /*
4649          * Emit code to perform further access permissions checks at
4650          * runtime; this may result in an exception.
4651          * Note that on XScale all cp0..c13 registers do an access check
4652          * call in order to handle c15_cpar.
4653          */
4654         gen_set_condexec(s);
4655         gen_update_pc(s, 0);
4656         tcg_ri = tcg_temp_new_ptr();
4657         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
4658                                        tcg_constant_i32(key),
4659                                        tcg_constant_i32(syndrome),
4660                                        tcg_constant_i32(isread));
4661     } else if (ri->type & ARM_CP_RAISES_EXC) {
4662         /*
4663          * The readfn or writefn might raise an exception;
4664          * synchronize the CPU state in case it does.
4665          */
4666         gen_set_condexec(s);
4667         gen_update_pc(s, 0);
4668     }
4669 
4670     /* Handle special cases first */
4671     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4672     case 0:
4673         break;
4674     case ARM_CP_NOP:
4675         return;
4676     case ARM_CP_WFI:
4677         if (isread) {
4678             unallocated_encoding(s);
4679         } else {
4680             gen_update_pc(s, curr_insn_len(s));
4681             s->base.is_jmp = DISAS_WFI;
4682         }
4683         return;
4684     default:
4685         g_assert_not_reached();
4686     }
4687 
4688     if (ri->type & ARM_CP_IO) {
4689         /* I/O operations must end the TB here (whether read or write) */
4690         need_exit_tb = translator_io_start(&s->base);
4691     }
4692 
4693     if (isread) {
4694         /* Read */
4695         if (is64) {
4696             TCGv_i64 tmp64;
4697             TCGv_i32 tmp;
4698             if (ri->type & ARM_CP_CONST) {
4699                 tmp64 = tcg_constant_i64(ri->resetvalue);
4700             } else if (ri->readfn) {
4701                 if (!tcg_ri) {
4702                     tcg_ri = gen_lookup_cp_reg(key);
4703                 }
4704                 tmp64 = tcg_temp_new_i64();
4705                 gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri);
4706             } else {
4707                 tmp64 = tcg_temp_new_i64();
4708                 tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset);
4709             }
4710             tmp = tcg_temp_new_i32();
4711             tcg_gen_extrl_i64_i32(tmp, tmp64);
4712             store_reg(s, rt, tmp);
4713             tmp = tcg_temp_new_i32();
4714             tcg_gen_extrh_i64_i32(tmp, tmp64);
4715             store_reg(s, rt2, tmp);
4716         } else {
4717             TCGv_i32 tmp;
4718             if (ri->type & ARM_CP_CONST) {
4719                 tmp = tcg_constant_i32(ri->resetvalue);
4720             } else if (ri->readfn) {
4721                 if (!tcg_ri) {
4722                     tcg_ri = gen_lookup_cp_reg(key);
4723                 }
4724                 tmp = tcg_temp_new_i32();
4725                 gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri);
4726             } else {
4727                 tmp = load_cpu_offset(ri->fieldoffset);
4728             }
4729             if (rt == 15) {
4730                 /* Destination register of r15 for 32 bit loads sets
4731                  * the condition codes from the high 4 bits of the value
4732                  */
4733                 gen_set_nzcv(tmp);
4734             } else {
4735                 store_reg(s, rt, tmp);
4736             }
4737         }
4738     } else {
4739         /* Write */
4740         if (ri->type & ARM_CP_CONST) {
4741             /* If not forbidden by access permissions, treat as WI */
4742             return;
4743         }
4744 
4745         if (is64) {
4746             TCGv_i32 tmplo, tmphi;
4747             TCGv_i64 tmp64 = tcg_temp_new_i64();
4748             tmplo = load_reg(s, rt);
4749             tmphi = load_reg(s, rt2);
4750             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4751             if (ri->writefn) {
4752                 if (!tcg_ri) {
4753                     tcg_ri = gen_lookup_cp_reg(key);
4754                 }
4755                 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64);
4756             } else {
4757                 tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset);
4758             }
4759         } else {
4760             TCGv_i32 tmp = load_reg(s, rt);
4761             if (ri->writefn) {
4762                 if (!tcg_ri) {
4763                     tcg_ri = gen_lookup_cp_reg(key);
4764                 }
4765                 gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp);
4766             } else {
4767                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4768             }
4769         }
4770     }
4771 
4772     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4773         /*
4774          * A write to any coprocessor register that ends a TB
4775          * must rebuild the hflags for the next TB.
4776          */
4777         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4778         /*
4779          * We default to ending the TB on a coprocessor register write,
4780          * but allow this to be suppressed by the register definition
4781          * (usually only necessary to work around guest bugs).
4782          */
4783         need_exit_tb = true;
4784     }
4785     if (need_exit_tb) {
4786         gen_lookup_tb(s);
4787     }
4788 }
4789 
4790 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4791 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4792 {
4793     int cpnum = (insn >> 8) & 0xf;
4794 
4795     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4796         unallocated_encoding(s);
4797     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4798         if (disas_iwmmxt_insn(s, insn)) {
4799             unallocated_encoding(s);
4800         }
4801     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4802         if (disas_dsp_insn(s, insn)) {
4803             unallocated_encoding(s);
4804         }
4805     }
4806 }
4807 
4808 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4809 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4810 {
4811     TCGv_i32 tmp;
4812     tmp = tcg_temp_new_i32();
4813     tcg_gen_extrl_i64_i32(tmp, val);
4814     store_reg(s, rlow, tmp);
4815     tmp = tcg_temp_new_i32();
4816     tcg_gen_extrh_i64_i32(tmp, val);
4817     store_reg(s, rhigh, tmp);
4818 }
4819 
4820 /* load and add a 64-bit value from a register pair.  */
4821 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4822 {
4823     TCGv_i64 tmp;
4824     TCGv_i32 tmpl;
4825     TCGv_i32 tmph;
4826 
4827     /* Load 64-bit value rd:rn.  */
4828     tmpl = load_reg(s, rlow);
4829     tmph = load_reg(s, rhigh);
4830     tmp = tcg_temp_new_i64();
4831     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4832     tcg_gen_add_i64(val, val, tmp);
4833 }
4834 
4835 /* Set N and Z flags from hi|lo.  */
4836 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4837 {
4838     tcg_gen_mov_i32(cpu_NF, hi);
4839     tcg_gen_or_i32(cpu_ZF, lo, hi);
4840 }
4841 
4842 /* Load/Store exclusive instructions are implemented by remembering
4843    the value/address loaded, and seeing if these are the same
4844    when the store is performed.  This should be sufficient to implement
4845    the architecturally mandated semantics, and avoids having to monitor
4846    regular stores.  The compare vs the remembered value is done during
4847    the cmpxchg operation, but we must compare the addresses manually.  */
4848 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4849                                TCGv_i32 addr, int size)
4850 {
4851     TCGv_i32 tmp = tcg_temp_new_i32();
4852     MemOp opc = size | MO_ALIGN | s->be_data;
4853 
4854     s->is_ldex = true;
4855 
4856     if (size == 3) {
4857         TCGv_i32 tmp2 = tcg_temp_new_i32();
4858         TCGv_i64 t64 = tcg_temp_new_i64();
4859 
4860         /*
4861          * For AArch32, architecturally the 32-bit word at the lowest
4862          * address is always Rt and the one at addr+4 is Rt2, even if
4863          * the CPU is big-endian. That means we don't want to do a
4864          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4865          * architecturally 64-bit access, but instead do a 64-bit access
4866          * using MO_BE if appropriate and then split the two halves.
4867          */
4868         TCGv taddr = gen_aa32_addr(s, addr, opc);
4869 
4870         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4871         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4872         if (s->be_data == MO_BE) {
4873             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4874         } else {
4875             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4876         }
4877         store_reg(s, rt2, tmp2);
4878     } else {
4879         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4880         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4881     }
4882 
4883     store_reg(s, rt, tmp);
4884     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4885 }
4886 
4887 static void gen_clrex(DisasContext *s)
4888 {
4889     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4890 }
4891 
4892 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4893                                 TCGv_i32 addr, int size)
4894 {
4895     TCGv_i32 t0, t1, t2;
4896     TCGv_i64 extaddr;
4897     TCGv taddr;
4898     TCGLabel *done_label;
4899     TCGLabel *fail_label;
4900     MemOp opc = size | MO_ALIGN | s->be_data;
4901 
4902     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4903          [addr] = {Rt};
4904          {Rd} = 0;
4905        } else {
4906          {Rd} = 1;
4907        } */
4908     fail_label = gen_new_label();
4909     done_label = gen_new_label();
4910     extaddr = tcg_temp_new_i64();
4911     tcg_gen_extu_i32_i64(extaddr, addr);
4912     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4913 
4914     taddr = gen_aa32_addr(s, addr, opc);
4915     t0 = tcg_temp_new_i32();
4916     t1 = load_reg(s, rt);
4917     if (size == 3) {
4918         TCGv_i64 o64 = tcg_temp_new_i64();
4919         TCGv_i64 n64 = tcg_temp_new_i64();
4920 
4921         t2 = load_reg(s, rt2);
4922 
4923         /*
4924          * For AArch32, architecturally the 32-bit word at the lowest
4925          * address is always Rt and the one at addr+4 is Rt2, even if
4926          * the CPU is big-endian. Since we're going to treat this as a
4927          * single 64-bit BE store, we need to put the two halves in the
4928          * opposite order for BE to LE, so that they end up in the right
4929          * places.  We don't want gen_aa32_st_i64, because that checks
4930          * SCTLR_B as if for an architectural 64-bit access.
4931          */
4932         if (s->be_data == MO_BE) {
4933             tcg_gen_concat_i32_i64(n64, t2, t1);
4934         } else {
4935             tcg_gen_concat_i32_i64(n64, t1, t2);
4936         }
4937 
4938         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4939                                    get_mem_index(s), opc);
4940 
4941         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4942         tcg_gen_extrl_i64_i32(t0, o64);
4943     } else {
4944         t2 = tcg_temp_new_i32();
4945         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4946         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4947         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4948     }
4949     tcg_gen_mov_i32(cpu_R[rd], t0);
4950     tcg_gen_br(done_label);
4951 
4952     gen_set_label(fail_label);
4953     tcg_gen_movi_i32(cpu_R[rd], 1);
4954     gen_set_label(done_label);
4955     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4956 }
4957 
4958 /* gen_srs:
4959  * @env: CPUARMState
4960  * @s: DisasContext
4961  * @mode: mode field from insn (which stack to store to)
4962  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4963  * @writeback: true if writeback bit set
4964  *
4965  * Generate code for the SRS (Store Return State) insn.
4966  */
4967 static void gen_srs(DisasContext *s,
4968                     uint32_t mode, uint32_t amode, bool writeback)
4969 {
4970     int32_t offset;
4971     TCGv_i32 addr, tmp;
4972     bool undef = false;
4973 
4974     /* SRS is:
4975      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4976      *   and specified mode is monitor mode
4977      * - UNDEFINED in Hyp mode
4978      * - UNPREDICTABLE in User or System mode
4979      * - UNPREDICTABLE if the specified mode is:
4980      * -- not implemented
4981      * -- not a valid mode number
4982      * -- a mode that's at a higher exception level
4983      * -- Monitor, if we are Non-secure
4984      * For the UNPREDICTABLE cases we choose to UNDEF.
4985      */
4986     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4987         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4988         return;
4989     }
4990 
4991     if (s->current_el == 0 || s->current_el == 2) {
4992         undef = true;
4993     }
4994 
4995     switch (mode) {
4996     case ARM_CPU_MODE_USR:
4997     case ARM_CPU_MODE_FIQ:
4998     case ARM_CPU_MODE_IRQ:
4999     case ARM_CPU_MODE_SVC:
5000     case ARM_CPU_MODE_ABT:
5001     case ARM_CPU_MODE_UND:
5002     case ARM_CPU_MODE_SYS:
5003         break;
5004     case ARM_CPU_MODE_HYP:
5005         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5006             undef = true;
5007         }
5008         break;
5009     case ARM_CPU_MODE_MON:
5010         /* No need to check specifically for "are we non-secure" because
5011          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5012          * so if this isn't EL3 then we must be non-secure.
5013          */
5014         if (s->current_el != 3) {
5015             undef = true;
5016         }
5017         break;
5018     default:
5019         undef = true;
5020     }
5021 
5022     if (undef) {
5023         unallocated_encoding(s);
5024         return;
5025     }
5026 
5027     addr = tcg_temp_new_i32();
5028     /* get_r13_banked() will raise an exception if called from System mode */
5029     gen_set_condexec(s);
5030     gen_update_pc(s, 0);
5031     gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode));
5032     switch (amode) {
5033     case 0: /* DA */
5034         offset = -4;
5035         break;
5036     case 1: /* IA */
5037         offset = 0;
5038         break;
5039     case 2: /* DB */
5040         offset = -8;
5041         break;
5042     case 3: /* IB */
5043         offset = 4;
5044         break;
5045     default:
5046         g_assert_not_reached();
5047     }
5048     tcg_gen_addi_i32(addr, addr, offset);
5049     tmp = load_reg(s, 14);
5050     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5051     tmp = load_cpu_field(spsr);
5052     tcg_gen_addi_i32(addr, addr, 4);
5053     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5054     if (writeback) {
5055         switch (amode) {
5056         case 0:
5057             offset = -8;
5058             break;
5059         case 1:
5060             offset = 4;
5061             break;
5062         case 2:
5063             offset = -4;
5064             break;
5065         case 3:
5066             offset = 0;
5067             break;
5068         default:
5069             g_assert_not_reached();
5070         }
5071         tcg_gen_addi_i32(addr, addr, offset);
5072         gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr);
5073     }
5074     s->base.is_jmp = DISAS_UPDATE_EXIT;
5075 }
5076 
5077 /* Skip this instruction if the ARM condition is false */
5078 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5079 {
5080     arm_gen_condlabel(s);
5081     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5082 }
5083 
5084 
5085 /*
5086  * Constant expanders used by T16/T32 decode
5087  */
5088 
5089 /* Return only the rotation part of T32ExpandImm.  */
5090 static int t32_expandimm_rot(DisasContext *s, int x)
5091 {
5092     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5093 }
5094 
5095 /* Return the unrotated immediate from T32ExpandImm.  */
5096 static int t32_expandimm_imm(DisasContext *s, int x)
5097 {
5098     int imm = extract32(x, 0, 8);
5099 
5100     switch (extract32(x, 8, 4)) {
5101     case 0: /* XY */
5102         /* Nothing to do.  */
5103         break;
5104     case 1: /* 00XY00XY */
5105         imm *= 0x00010001;
5106         break;
5107     case 2: /* XY00XY00 */
5108         imm *= 0x01000100;
5109         break;
5110     case 3: /* XYXYXYXY */
5111         imm *= 0x01010101;
5112         break;
5113     default:
5114         /* Rotated constant.  */
5115         imm |= 0x80;
5116         break;
5117     }
5118     return imm;
5119 }
5120 
5121 static int t32_branch24(DisasContext *s, int x)
5122 {
5123     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5124     x ^= !(x < 0) * (3 << 21);
5125     /* Append the final zero.  */
5126     return x << 1;
5127 }
5128 
5129 static int t16_setflags(DisasContext *s)
5130 {
5131     return s->condexec_mask == 0;
5132 }
5133 
5134 static int t16_push_list(DisasContext *s, int x)
5135 {
5136     return (x & 0xff) | (x & 0x100) << (14 - 8);
5137 }
5138 
5139 static int t16_pop_list(DisasContext *s, int x)
5140 {
5141     return (x & 0xff) | (x & 0x100) << (15 - 8);
5142 }
5143 
5144 /*
5145  * Include the generated decoders.
5146  */
5147 
5148 #include "decode-a32.c.inc"
5149 #include "decode-a32-uncond.c.inc"
5150 #include "decode-t32.c.inc"
5151 #include "decode-t16.c.inc"
5152 
5153 static bool valid_cp(DisasContext *s, int cp)
5154 {
5155     /*
5156      * Return true if this coprocessor field indicates something
5157      * that's really a possible coprocessor.
5158      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5159      * and of those only cp14 and cp15 were used for registers.
5160      * cp10 and cp11 were used for VFP and Neon, whose decode is
5161      * dealt with elsewhere. With the advent of fp16, cp9 is also
5162      * now part of VFP.
5163      * For v8A and later, the encoding has been tightened so that
5164      * only cp14 and cp15 are valid, and other values aren't considered
5165      * to be in the coprocessor-instruction space at all. v8M still
5166      * permits coprocessors 0..7.
5167      * For XScale, we must not decode the XScale cp0, cp1 space as
5168      * a standard coprocessor insn, because we want to fall through to
5169      * the legacy disas_xscale_insn() decoder after decodetree is done.
5170      */
5171     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5172         return false;
5173     }
5174 
5175     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5176         !arm_dc_feature(s, ARM_FEATURE_M)) {
5177         return cp >= 14;
5178     }
5179     return cp < 8 || cp >= 14;
5180 }
5181 
5182 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5183 {
5184     if (!valid_cp(s, a->cp)) {
5185         return false;
5186     }
5187     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5188                    false, a->rt, 0);
5189     return true;
5190 }
5191 
5192 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5193 {
5194     if (!valid_cp(s, a->cp)) {
5195         return false;
5196     }
5197     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5198                    true, a->rt, 0);
5199     return true;
5200 }
5201 
5202 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5203 {
5204     if (!valid_cp(s, a->cp)) {
5205         return false;
5206     }
5207     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5208                    false, a->rt, a->rt2);
5209     return true;
5210 }
5211 
5212 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5213 {
5214     if (!valid_cp(s, a->cp)) {
5215         return false;
5216     }
5217     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5218                    true, a->rt, a->rt2);
5219     return true;
5220 }
5221 
5222 /* Helpers to swap operands for reverse-subtract.  */
5223 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5224 {
5225     tcg_gen_sub_i32(dst, b, a);
5226 }
5227 
5228 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5229 {
5230     gen_sub_CC(dst, b, a);
5231 }
5232 
5233 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5234 {
5235     gen_sub_carry(dest, b, a);
5236 }
5237 
5238 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5239 {
5240     gen_sbc_CC(dest, b, a);
5241 }
5242 
5243 /*
5244  * Helpers for the data processing routines.
5245  *
5246  * After the computation store the results back.
5247  * This may be suppressed altogether (STREG_NONE), require a runtime
5248  * check against the stack limits (STREG_SP_CHECK), or generate an
5249  * exception return.  Oh, or store into a register.
5250  *
5251  * Always return true, indicating success for a trans_* function.
5252  */
5253 typedef enum {
5254    STREG_NONE,
5255    STREG_NORMAL,
5256    STREG_SP_CHECK,
5257    STREG_EXC_RET,
5258 } StoreRegKind;
5259 
5260 static bool store_reg_kind(DisasContext *s, int rd,
5261                             TCGv_i32 val, StoreRegKind kind)
5262 {
5263     switch (kind) {
5264     case STREG_NONE:
5265         return true;
5266     case STREG_NORMAL:
5267         /* See ALUWritePC: Interworking only from a32 mode. */
5268         if (s->thumb) {
5269             store_reg(s, rd, val);
5270         } else {
5271             store_reg_bx(s, rd, val);
5272         }
5273         return true;
5274     case STREG_SP_CHECK:
5275         store_sp_checked(s, val);
5276         return true;
5277     case STREG_EXC_RET:
5278         gen_exception_return(s, val);
5279         return true;
5280     }
5281     g_assert_not_reached();
5282 }
5283 
5284 /*
5285  * Data Processing (register)
5286  *
5287  * Operate, with set flags, one register source,
5288  * one immediate shifted register source, and a destination.
5289  */
5290 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5291                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5292                          int logic_cc, StoreRegKind kind)
5293 {
5294     TCGv_i32 tmp1, tmp2;
5295 
5296     tmp2 = load_reg(s, a->rm);
5297     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5298     tmp1 = load_reg(s, a->rn);
5299 
5300     gen(tmp1, tmp1, tmp2);
5301 
5302     if (logic_cc) {
5303         gen_logic_CC(tmp1);
5304     }
5305     return store_reg_kind(s, a->rd, tmp1, kind);
5306 }
5307 
5308 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5309                          void (*gen)(TCGv_i32, TCGv_i32),
5310                          int logic_cc, StoreRegKind kind)
5311 {
5312     TCGv_i32 tmp;
5313 
5314     tmp = load_reg(s, a->rm);
5315     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5316 
5317     gen(tmp, tmp);
5318     if (logic_cc) {
5319         gen_logic_CC(tmp);
5320     }
5321     return store_reg_kind(s, a->rd, tmp, kind);
5322 }
5323 
5324 /*
5325  * Data-processing (register-shifted register)
5326  *
5327  * Operate, with set flags, one register source,
5328  * one register shifted register source, and a destination.
5329  */
5330 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5331                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5332                          int logic_cc, StoreRegKind kind)
5333 {
5334     TCGv_i32 tmp1, tmp2;
5335 
5336     tmp1 = load_reg(s, a->rs);
5337     tmp2 = load_reg(s, a->rm);
5338     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5339     tmp1 = load_reg(s, a->rn);
5340 
5341     gen(tmp1, tmp1, tmp2);
5342 
5343     if (logic_cc) {
5344         gen_logic_CC(tmp1);
5345     }
5346     return store_reg_kind(s, a->rd, tmp1, kind);
5347 }
5348 
5349 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5350                          void (*gen)(TCGv_i32, TCGv_i32),
5351                          int logic_cc, StoreRegKind kind)
5352 {
5353     TCGv_i32 tmp1, tmp2;
5354 
5355     tmp1 = load_reg(s, a->rs);
5356     tmp2 = load_reg(s, a->rm);
5357     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5358 
5359     gen(tmp2, tmp2);
5360     if (logic_cc) {
5361         gen_logic_CC(tmp2);
5362     }
5363     return store_reg_kind(s, a->rd, tmp2, kind);
5364 }
5365 
5366 /*
5367  * Data-processing (immediate)
5368  *
5369  * Operate, with set flags, one register source,
5370  * one rotated immediate, and a destination.
5371  *
5372  * Note that logic_cc && a->rot setting CF based on the msb of the
5373  * immediate is the reason why we must pass in the unrotated form
5374  * of the immediate.
5375  */
5376 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5377                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5378                          int logic_cc, StoreRegKind kind)
5379 {
5380     TCGv_i32 tmp1;
5381     uint32_t imm;
5382 
5383     imm = ror32(a->imm, a->rot);
5384     if (logic_cc && a->rot) {
5385         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5386     }
5387     tmp1 = load_reg(s, a->rn);
5388 
5389     gen(tmp1, tmp1, tcg_constant_i32(imm));
5390 
5391     if (logic_cc) {
5392         gen_logic_CC(tmp1);
5393     }
5394     return store_reg_kind(s, a->rd, tmp1, kind);
5395 }
5396 
5397 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5398                          void (*gen)(TCGv_i32, TCGv_i32),
5399                          int logic_cc, StoreRegKind kind)
5400 {
5401     TCGv_i32 tmp;
5402     uint32_t imm;
5403 
5404     imm = ror32(a->imm, a->rot);
5405     if (logic_cc && a->rot) {
5406         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5407     }
5408 
5409     tmp = tcg_temp_new_i32();
5410     gen(tmp, tcg_constant_i32(imm));
5411 
5412     if (logic_cc) {
5413         gen_logic_CC(tmp);
5414     }
5415     return store_reg_kind(s, a->rd, tmp, kind);
5416 }
5417 
5418 #define DO_ANY3(NAME, OP, L, K)                                         \
5419     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5420     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5421     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5422     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5423     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5424     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5425 
5426 #define DO_ANY2(NAME, OP, L, K)                                         \
5427     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5428     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5429     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5430     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5431     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5432     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5433 
5434 #define DO_CMP2(NAME, OP, L)                                            \
5435     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5436     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5437     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5438     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5439     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5440     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5441 
5442 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5443 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5444 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5445 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5446 
5447 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5448 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5449 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5450 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5451 
5452 DO_CMP2(TST, tcg_gen_and_i32, true)
5453 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5454 DO_CMP2(CMN, gen_add_CC, false)
5455 DO_CMP2(CMP, gen_sub_CC, false)
5456 
5457 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5458         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5459 
5460 /*
5461  * Note for the computation of StoreRegKind we return out of the
5462  * middle of the functions that are expanded by DO_ANY3, and that
5463  * we modify a->s via that parameter before it is used by OP.
5464  */
5465 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5466         ({
5467             StoreRegKind ret = STREG_NORMAL;
5468             if (a->rd == 15 && a->s) {
5469                 /*
5470                  * See ALUExceptionReturn:
5471                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5472                  * In Hyp mode, UNDEFINED.
5473                  */
5474                 if (IS_USER(s) || s->current_el == 2) {
5475                     unallocated_encoding(s);
5476                     return true;
5477                 }
5478                 /* There is no writeback of nzcv to PSTATE.  */
5479                 a->s = 0;
5480                 ret = STREG_EXC_RET;
5481             } else if (a->rd == 13 && a->rn == 13) {
5482                 ret = STREG_SP_CHECK;
5483             }
5484             ret;
5485         }))
5486 
5487 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5488         ({
5489             StoreRegKind ret = STREG_NORMAL;
5490             if (a->rd == 15 && a->s) {
5491                 /*
5492                  * See ALUExceptionReturn:
5493                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5494                  * In Hyp mode, UNDEFINED.
5495                  */
5496                 if (IS_USER(s) || s->current_el == 2) {
5497                     unallocated_encoding(s);
5498                     return true;
5499                 }
5500                 /* There is no writeback of nzcv to PSTATE.  */
5501                 a->s = 0;
5502                 ret = STREG_EXC_RET;
5503             } else if (a->rd == 13) {
5504                 ret = STREG_SP_CHECK;
5505             }
5506             ret;
5507         }))
5508 
5509 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5510 
5511 /*
5512  * ORN is only available with T32, so there is no register-shifted-register
5513  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5514  */
5515 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5516 {
5517     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5518 }
5519 
5520 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5521 {
5522     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5523 }
5524 
5525 #undef DO_ANY3
5526 #undef DO_ANY2
5527 #undef DO_CMP2
5528 
5529 static bool trans_ADR(DisasContext *s, arg_ri *a)
5530 {
5531     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5532     return true;
5533 }
5534 
5535 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5536 {
5537     if (!ENABLE_ARCH_6T2) {
5538         return false;
5539     }
5540 
5541     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5542     return true;
5543 }
5544 
5545 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5546 {
5547     TCGv_i32 tmp;
5548 
5549     if (!ENABLE_ARCH_6T2) {
5550         return false;
5551     }
5552 
5553     tmp = load_reg(s, a->rd);
5554     tcg_gen_ext16u_i32(tmp, tmp);
5555     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5556     store_reg(s, a->rd, tmp);
5557     return true;
5558 }
5559 
5560 /*
5561  * v8.1M MVE wide-shifts
5562  */
5563 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5564                           WideShiftImmFn *fn)
5565 {
5566     TCGv_i64 rda;
5567     TCGv_i32 rdalo, rdahi;
5568 
5569     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5570         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5571         return false;
5572     }
5573     if (a->rdahi == 15) {
5574         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5575         return false;
5576     }
5577     if (!dc_isar_feature(aa32_mve, s) ||
5578         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5579         a->rdahi == 13) {
5580         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5581         unallocated_encoding(s);
5582         return true;
5583     }
5584 
5585     if (a->shim == 0) {
5586         a->shim = 32;
5587     }
5588 
5589     rda = tcg_temp_new_i64();
5590     rdalo = load_reg(s, a->rdalo);
5591     rdahi = load_reg(s, a->rdahi);
5592     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5593 
5594     fn(rda, rda, a->shim);
5595 
5596     tcg_gen_extrl_i64_i32(rdalo, rda);
5597     tcg_gen_extrh_i64_i32(rdahi, rda);
5598     store_reg(s, a->rdalo, rdalo);
5599     store_reg(s, a->rdahi, rdahi);
5600 
5601     return true;
5602 }
5603 
5604 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5605 {
5606     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5607 }
5608 
5609 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5610 {
5611     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5612 }
5613 
5614 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5615 {
5616     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5617 }
5618 
5619 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5620 {
5621     gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift));
5622 }
5623 
5624 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5625 {
5626     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5627 }
5628 
5629 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5630 {
5631     gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift));
5632 }
5633 
5634 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5635 {
5636     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5637 }
5638 
5639 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5640 {
5641     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5642 }
5643 
5644 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5645 {
5646     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5647 }
5648 
5649 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5650 {
5651     TCGv_i64 rda;
5652     TCGv_i32 rdalo, rdahi;
5653 
5654     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5655         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5656         return false;
5657     }
5658     if (a->rdahi == 15) {
5659         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5660         return false;
5661     }
5662     if (!dc_isar_feature(aa32_mve, s) ||
5663         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5664         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5665         a->rm == a->rdahi || a->rm == a->rdalo) {
5666         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5667         unallocated_encoding(s);
5668         return true;
5669     }
5670 
5671     rda = tcg_temp_new_i64();
5672     rdalo = load_reg(s, a->rdalo);
5673     rdahi = load_reg(s, a->rdahi);
5674     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5675 
5676     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5677     fn(rda, tcg_env, rda, cpu_R[a->rm]);
5678 
5679     tcg_gen_extrl_i64_i32(rdalo, rda);
5680     tcg_gen_extrh_i64_i32(rdahi, rda);
5681     store_reg(s, a->rdalo, rdalo);
5682     store_reg(s, a->rdahi, rdahi);
5683 
5684     return true;
5685 }
5686 
5687 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5688 {
5689     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5690 }
5691 
5692 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5693 {
5694     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5695 }
5696 
5697 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5698 {
5699     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5700 }
5701 
5702 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5703 {
5704     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5705 }
5706 
5707 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5708 {
5709     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5710 }
5711 
5712 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5713 {
5714     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5715 }
5716 
5717 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5718 {
5719     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5720         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5721         return false;
5722     }
5723     if (!dc_isar_feature(aa32_mve, s) ||
5724         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5725         a->rda == 13 || a->rda == 15) {
5726         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5727         unallocated_encoding(s);
5728         return true;
5729     }
5730 
5731     if (a->shim == 0) {
5732         a->shim = 32;
5733     }
5734     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5735 
5736     return true;
5737 }
5738 
5739 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5740 {
5741     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5742 }
5743 
5744 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5745 {
5746     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5747 }
5748 
5749 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5750 {
5751     gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift));
5752 }
5753 
5754 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5755 {
5756     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5757 }
5758 
5759 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5760 {
5761     gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift));
5762 }
5763 
5764 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5765 {
5766     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5767 }
5768 
5769 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5770 {
5771     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5772         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5773         return false;
5774     }
5775     if (!dc_isar_feature(aa32_mve, s) ||
5776         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5777         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5778         a->rm == a->rda) {
5779         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5780         unallocated_encoding(s);
5781         return true;
5782     }
5783 
5784     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5785     fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]);
5786     return true;
5787 }
5788 
5789 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5790 {
5791     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5792 }
5793 
5794 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5795 {
5796     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5797 }
5798 
5799 /*
5800  * Multiply and multiply accumulate
5801  */
5802 
5803 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5804 {
5805     TCGv_i32 t1, t2;
5806 
5807     t1 = load_reg(s, a->rn);
5808     t2 = load_reg(s, a->rm);
5809     tcg_gen_mul_i32(t1, t1, t2);
5810     if (add) {
5811         t2 = load_reg(s, a->ra);
5812         tcg_gen_add_i32(t1, t1, t2);
5813     }
5814     if (a->s) {
5815         gen_logic_CC(t1);
5816     }
5817     store_reg(s, a->rd, t1);
5818     return true;
5819 }
5820 
5821 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5822 {
5823     return op_mla(s, a, false);
5824 }
5825 
5826 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5827 {
5828     return op_mla(s, a, true);
5829 }
5830 
5831 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5832 {
5833     TCGv_i32 t1, t2;
5834 
5835     if (!ENABLE_ARCH_6T2) {
5836         return false;
5837     }
5838     t1 = load_reg(s, a->rn);
5839     t2 = load_reg(s, a->rm);
5840     tcg_gen_mul_i32(t1, t1, t2);
5841     t2 = load_reg(s, a->ra);
5842     tcg_gen_sub_i32(t1, t2, t1);
5843     store_reg(s, a->rd, t1);
5844     return true;
5845 }
5846 
5847 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5848 {
5849     TCGv_i32 t0, t1, t2, t3;
5850 
5851     t0 = load_reg(s, a->rm);
5852     t1 = load_reg(s, a->rn);
5853     if (uns) {
5854         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5855     } else {
5856         tcg_gen_muls2_i32(t0, t1, t0, t1);
5857     }
5858     if (add) {
5859         t2 = load_reg(s, a->ra);
5860         t3 = load_reg(s, a->rd);
5861         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5862     }
5863     if (a->s) {
5864         gen_logicq_cc(t0, t1);
5865     }
5866     store_reg(s, a->ra, t0);
5867     store_reg(s, a->rd, t1);
5868     return true;
5869 }
5870 
5871 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5872 {
5873     return op_mlal(s, a, true, false);
5874 }
5875 
5876 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5877 {
5878     return op_mlal(s, a, false, false);
5879 }
5880 
5881 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5882 {
5883     return op_mlal(s, a, true, true);
5884 }
5885 
5886 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5887 {
5888     return op_mlal(s, a, false, true);
5889 }
5890 
5891 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5892 {
5893     TCGv_i32 t0, t1, t2, zero;
5894 
5895     if (s->thumb
5896         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5897         : !ENABLE_ARCH_6) {
5898         return false;
5899     }
5900 
5901     t0 = load_reg(s, a->rm);
5902     t1 = load_reg(s, a->rn);
5903     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5904     zero = tcg_constant_i32(0);
5905     t2 = load_reg(s, a->ra);
5906     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5907     t2 = load_reg(s, a->rd);
5908     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5909     store_reg(s, a->ra, t0);
5910     store_reg(s, a->rd, t1);
5911     return true;
5912 }
5913 
5914 /*
5915  * Saturating addition and subtraction
5916  */
5917 
5918 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5919 {
5920     TCGv_i32 t0, t1;
5921 
5922     if (s->thumb
5923         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5924         : !ENABLE_ARCH_5TE) {
5925         return false;
5926     }
5927 
5928     t0 = load_reg(s, a->rm);
5929     t1 = load_reg(s, a->rn);
5930     if (doub) {
5931         gen_helper_add_saturate(t1, tcg_env, t1, t1);
5932     }
5933     if (add) {
5934         gen_helper_add_saturate(t0, tcg_env, t0, t1);
5935     } else {
5936         gen_helper_sub_saturate(t0, tcg_env, t0, t1);
5937     }
5938     store_reg(s, a->rd, t0);
5939     return true;
5940 }
5941 
5942 #define DO_QADDSUB(NAME, ADD, DOUB) \
5943 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5944 {                                                        \
5945     return op_qaddsub(s, a, ADD, DOUB);                  \
5946 }
5947 
5948 DO_QADDSUB(QADD, true, false)
5949 DO_QADDSUB(QSUB, false, false)
5950 DO_QADDSUB(QDADD, true, true)
5951 DO_QADDSUB(QDSUB, false, true)
5952 
5953 #undef DO_QADDSUB
5954 
5955 /*
5956  * Halfword multiply and multiply accumulate
5957  */
5958 
5959 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5960                        int add_long, bool nt, bool mt)
5961 {
5962     TCGv_i32 t0, t1, tl, th;
5963 
5964     if (s->thumb
5965         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5966         : !ENABLE_ARCH_5TE) {
5967         return false;
5968     }
5969 
5970     t0 = load_reg(s, a->rn);
5971     t1 = load_reg(s, a->rm);
5972     gen_mulxy(t0, t1, nt, mt);
5973 
5974     switch (add_long) {
5975     case 0:
5976         store_reg(s, a->rd, t0);
5977         break;
5978     case 1:
5979         t1 = load_reg(s, a->ra);
5980         gen_helper_add_setq(t0, tcg_env, t0, t1);
5981         store_reg(s, a->rd, t0);
5982         break;
5983     case 2:
5984         tl = load_reg(s, a->ra);
5985         th = load_reg(s, a->rd);
5986         /* Sign-extend the 32-bit product to 64 bits.  */
5987         t1 = tcg_temp_new_i32();
5988         tcg_gen_sari_i32(t1, t0, 31);
5989         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5990         store_reg(s, a->ra, tl);
5991         store_reg(s, a->rd, th);
5992         break;
5993     default:
5994         g_assert_not_reached();
5995     }
5996     return true;
5997 }
5998 
5999 #define DO_SMLAX(NAME, add, nt, mt) \
6000 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6001 {                                                          \
6002     return op_smlaxxx(s, a, add, nt, mt);                  \
6003 }
6004 
6005 DO_SMLAX(SMULBB, 0, 0, 0)
6006 DO_SMLAX(SMULBT, 0, 0, 1)
6007 DO_SMLAX(SMULTB, 0, 1, 0)
6008 DO_SMLAX(SMULTT, 0, 1, 1)
6009 
6010 DO_SMLAX(SMLABB, 1, 0, 0)
6011 DO_SMLAX(SMLABT, 1, 0, 1)
6012 DO_SMLAX(SMLATB, 1, 1, 0)
6013 DO_SMLAX(SMLATT, 1, 1, 1)
6014 
6015 DO_SMLAX(SMLALBB, 2, 0, 0)
6016 DO_SMLAX(SMLALBT, 2, 0, 1)
6017 DO_SMLAX(SMLALTB, 2, 1, 0)
6018 DO_SMLAX(SMLALTT, 2, 1, 1)
6019 
6020 #undef DO_SMLAX
6021 
6022 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6023 {
6024     TCGv_i32 t0, t1;
6025 
6026     if (!ENABLE_ARCH_5TE) {
6027         return false;
6028     }
6029 
6030     t0 = load_reg(s, a->rn);
6031     t1 = load_reg(s, a->rm);
6032     /*
6033      * Since the nominal result is product<47:16>, shift the 16-bit
6034      * input up by 16 bits, so that the result is at product<63:32>.
6035      */
6036     if (mt) {
6037         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6038     } else {
6039         tcg_gen_shli_i32(t1, t1, 16);
6040     }
6041     tcg_gen_muls2_i32(t0, t1, t0, t1);
6042     if (add) {
6043         t0 = load_reg(s, a->ra);
6044         gen_helper_add_setq(t1, tcg_env, t1, t0);
6045     }
6046     store_reg(s, a->rd, t1);
6047     return true;
6048 }
6049 
6050 #define DO_SMLAWX(NAME, add, mt) \
6051 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6052 {                                                          \
6053     return op_smlawx(s, a, add, mt);                       \
6054 }
6055 
6056 DO_SMLAWX(SMULWB, 0, 0)
6057 DO_SMLAWX(SMULWT, 0, 1)
6058 DO_SMLAWX(SMLAWB, 1, 0)
6059 DO_SMLAWX(SMLAWT, 1, 1)
6060 
6061 #undef DO_SMLAWX
6062 
6063 /*
6064  * MSR (immediate) and hints
6065  */
6066 
6067 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6068 {
6069     /*
6070      * When running single-threaded TCG code, use the helper to ensure that
6071      * the next round-robin scheduled vCPU gets a crack.  When running in
6072      * MTTCG we don't generate jumps to the helper as it won't affect the
6073      * scheduling of other vCPUs.
6074      */
6075     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6076         gen_update_pc(s, curr_insn_len(s));
6077         s->base.is_jmp = DISAS_YIELD;
6078     }
6079     return true;
6080 }
6081 
6082 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6083 {
6084     /*
6085      * When running single-threaded TCG code, use the helper to ensure that
6086      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6087      * just skip this instruction.  Currently the SEV/SEVL instructions,
6088      * which are *one* of many ways to wake the CPU from WFE, are not
6089      * implemented so we can't sleep like WFI does.
6090      */
6091     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6092         gen_update_pc(s, curr_insn_len(s));
6093         s->base.is_jmp = DISAS_WFE;
6094     }
6095     return true;
6096 }
6097 
6098 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6099 {
6100     /* For WFI, halt the vCPU until an IRQ. */
6101     gen_update_pc(s, curr_insn_len(s));
6102     s->base.is_jmp = DISAS_WFI;
6103     return true;
6104 }
6105 
6106 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6107 {
6108     /*
6109      * For M-profile, minimal-RAS ESB can be a NOP.
6110      * Without RAS, we must implement this as NOP.
6111      */
6112     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6113         /*
6114          * QEMU does not have a source of physical SErrors,
6115          * so we are only concerned with virtual SErrors.
6116          * The pseudocode in the ARM for this case is
6117          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6118          *      AArch32.vESBOperation();
6119          * Most of the condition can be evaluated at translation time.
6120          * Test for EL2 present, and defer test for SEL2 to runtime.
6121          */
6122         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6123             gen_helper_vesb(tcg_env);
6124         }
6125     }
6126     return true;
6127 }
6128 
6129 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6130 {
6131     return true;
6132 }
6133 
6134 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6135 {
6136     uint32_t val = ror32(a->imm, a->rot * 2);
6137     uint32_t mask = msr_mask(s, a->mask, a->r);
6138 
6139     if (gen_set_psr_im(s, mask, a->r, val)) {
6140         unallocated_encoding(s);
6141     }
6142     return true;
6143 }
6144 
6145 /*
6146  * Cyclic Redundancy Check
6147  */
6148 
6149 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6150 {
6151     TCGv_i32 t1, t2, t3;
6152 
6153     if (!dc_isar_feature(aa32_crc32, s)) {
6154         return false;
6155     }
6156 
6157     t1 = load_reg(s, a->rn);
6158     t2 = load_reg(s, a->rm);
6159     switch (sz) {
6160     case MO_8:
6161         gen_uxtb(t2);
6162         break;
6163     case MO_16:
6164         gen_uxth(t2);
6165         break;
6166     case MO_32:
6167         break;
6168     default:
6169         g_assert_not_reached();
6170     }
6171     t3 = tcg_constant_i32(1 << sz);
6172     if (c) {
6173         gen_helper_crc32c(t1, t1, t2, t3);
6174     } else {
6175         gen_helper_crc32(t1, t1, t2, t3);
6176     }
6177     store_reg(s, a->rd, t1);
6178     return true;
6179 }
6180 
6181 #define DO_CRC32(NAME, c, sz) \
6182 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6183     { return op_crc32(s, a, c, sz); }
6184 
6185 DO_CRC32(CRC32B, false, MO_8)
6186 DO_CRC32(CRC32H, false, MO_16)
6187 DO_CRC32(CRC32W, false, MO_32)
6188 DO_CRC32(CRC32CB, true, MO_8)
6189 DO_CRC32(CRC32CH, true, MO_16)
6190 DO_CRC32(CRC32CW, true, MO_32)
6191 
6192 #undef DO_CRC32
6193 
6194 /*
6195  * Miscellaneous instructions
6196  */
6197 
6198 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6199 {
6200     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6201         return false;
6202     }
6203     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6204     return true;
6205 }
6206 
6207 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6208 {
6209     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6210         return false;
6211     }
6212     gen_msr_banked(s, a->r, a->sysm, a->rn);
6213     return true;
6214 }
6215 
6216 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6217 {
6218     TCGv_i32 tmp;
6219 
6220     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6221         return false;
6222     }
6223     if (a->r) {
6224         if (IS_USER(s)) {
6225             unallocated_encoding(s);
6226             return true;
6227         }
6228         tmp = load_cpu_field(spsr);
6229     } else {
6230         tmp = tcg_temp_new_i32();
6231         gen_helper_cpsr_read(tmp, tcg_env);
6232     }
6233     store_reg(s, a->rd, tmp);
6234     return true;
6235 }
6236 
6237 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6238 {
6239     TCGv_i32 tmp;
6240     uint32_t mask = msr_mask(s, a->mask, a->r);
6241 
6242     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6243         return false;
6244     }
6245     tmp = load_reg(s, a->rn);
6246     if (gen_set_psr(s, mask, a->r, tmp)) {
6247         unallocated_encoding(s);
6248     }
6249     return true;
6250 }
6251 
6252 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6253 {
6254     TCGv_i32 tmp;
6255 
6256     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6257         return false;
6258     }
6259     tmp = tcg_temp_new_i32();
6260     gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm));
6261     store_reg(s, a->rd, tmp);
6262     return true;
6263 }
6264 
6265 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6266 {
6267     TCGv_i32 addr, reg;
6268 
6269     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6270         return false;
6271     }
6272     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6273     reg = load_reg(s, a->rn);
6274     gen_helper_v7m_msr(tcg_env, addr, reg);
6275     /* If we wrote to CONTROL, the EL might have changed */
6276     gen_rebuild_hflags(s, true);
6277     gen_lookup_tb(s);
6278     return true;
6279 }
6280 
6281 static bool trans_BX(DisasContext *s, arg_BX *a)
6282 {
6283     if (!ENABLE_ARCH_4T) {
6284         return false;
6285     }
6286     gen_bx_excret(s, load_reg(s, a->rm));
6287     return true;
6288 }
6289 
6290 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6291 {
6292     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6293         return false;
6294     }
6295     /*
6296      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6297      * TBFLAGS bit on a basically-never-happens case, so call a helper
6298      * function to check for the trap and raise the exception if needed
6299      * (passing it the register number for the syndrome value).
6300      * v8A doesn't have this HSTR bit.
6301      */
6302     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6303         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6304         s->current_el < 2 && s->ns) {
6305         gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm));
6306     }
6307     /* Trivial implementation equivalent to bx.  */
6308     gen_bx(s, load_reg(s, a->rm));
6309     return true;
6310 }
6311 
6312 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6313 {
6314     TCGv_i32 tmp;
6315 
6316     if (!ENABLE_ARCH_5) {
6317         return false;
6318     }
6319     tmp = load_reg(s, a->rm);
6320     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6321     gen_bx(s, tmp);
6322     return true;
6323 }
6324 
6325 /*
6326  * BXNS/BLXNS: only exist for v8M with the security extensions,
6327  * and always UNDEF if NonSecure.  We don't implement these in
6328  * the user-only mode either (in theory you can use them from
6329  * Secure User mode but they are too tied in to system emulation).
6330  */
6331 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6332 {
6333     if (!s->v8m_secure || IS_USER_ONLY) {
6334         unallocated_encoding(s);
6335     } else {
6336         gen_bxns(s, a->rm);
6337     }
6338     return true;
6339 }
6340 
6341 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6342 {
6343     if (!s->v8m_secure || IS_USER_ONLY) {
6344         unallocated_encoding(s);
6345     } else {
6346         gen_blxns(s, a->rm);
6347     }
6348     return true;
6349 }
6350 
6351 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6352 {
6353     TCGv_i32 tmp;
6354 
6355     if (!ENABLE_ARCH_5) {
6356         return false;
6357     }
6358     tmp = load_reg(s, a->rm);
6359     tcg_gen_clzi_i32(tmp, tmp, 32);
6360     store_reg(s, a->rd, tmp);
6361     return true;
6362 }
6363 
6364 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6365 {
6366     TCGv_i32 tmp;
6367 
6368     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6369         return false;
6370     }
6371     if (IS_USER(s)) {
6372         unallocated_encoding(s);
6373         return true;
6374     }
6375     if (s->current_el == 2) {
6376         /* ERET from Hyp uses ELR_Hyp, not LR */
6377         tmp = load_cpu_field_low32(elr_el[2]);
6378     } else {
6379         tmp = load_reg(s, 14);
6380     }
6381     gen_exception_return(s, tmp);
6382     return true;
6383 }
6384 
6385 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6386 {
6387     gen_hlt(s, a->imm);
6388     return true;
6389 }
6390 
6391 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6392 {
6393     if (!ENABLE_ARCH_5) {
6394         return false;
6395     }
6396     /* BKPT is OK with ECI set and leaves it untouched */
6397     s->eci_handled = true;
6398     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6399         semihosting_enabled(s->current_el == 0) &&
6400         (a->imm == 0xab)) {
6401         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6402     } else {
6403         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6404     }
6405     return true;
6406 }
6407 
6408 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6409 {
6410     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6411         return false;
6412     }
6413     if (IS_USER(s)) {
6414         unallocated_encoding(s);
6415     } else {
6416         gen_hvc(s, a->imm);
6417     }
6418     return true;
6419 }
6420 
6421 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6422 {
6423     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6424         return false;
6425     }
6426     if (IS_USER(s)) {
6427         unallocated_encoding(s);
6428     } else {
6429         gen_smc(s);
6430     }
6431     return true;
6432 }
6433 
6434 static bool trans_SG(DisasContext *s, arg_SG *a)
6435 {
6436     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6437         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6438         return false;
6439     }
6440     /*
6441      * SG (v8M only)
6442      * The bulk of the behaviour for this instruction is implemented
6443      * in v7m_handle_execute_nsc(), which deals with the insn when
6444      * it is executed by a CPU in non-secure state from memory
6445      * which is Secure & NonSecure-Callable.
6446      * Here we only need to handle the remaining cases:
6447      *  * in NS memory (including the "security extension not
6448      *    implemented" case) : NOP
6449      *  * in S memory but CPU already secure (clear IT bits)
6450      * We know that the attribute for the memory this insn is
6451      * in must match the current CPU state, because otherwise
6452      * get_phys_addr_pmsav8 would have generated an exception.
6453      */
6454     if (s->v8m_secure) {
6455         /* Like the IT insn, we don't need to generate any code */
6456         s->condexec_cond = 0;
6457         s->condexec_mask = 0;
6458     }
6459     return true;
6460 }
6461 
6462 static bool trans_TT(DisasContext *s, arg_TT *a)
6463 {
6464     TCGv_i32 addr, tmp;
6465 
6466     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6467         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6468         return false;
6469     }
6470     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6471         /* We UNDEF for these UNPREDICTABLE cases */
6472         unallocated_encoding(s);
6473         return true;
6474     }
6475     if (a->A && !s->v8m_secure) {
6476         /* This case is UNDEFINED.  */
6477         unallocated_encoding(s);
6478         return true;
6479     }
6480 
6481     addr = load_reg(s, a->rn);
6482     tmp = tcg_temp_new_i32();
6483     gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6484     store_reg(s, a->rd, tmp);
6485     return true;
6486 }
6487 
6488 /*
6489  * Load/store register index
6490  */
6491 
6492 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6493 {
6494     ISSInfo ret;
6495 
6496     /* ISS not valid if writeback */
6497     if (p && !w) {
6498         ret = rd;
6499         if (curr_insn_len(s) == 2) {
6500             ret |= ISSIs16Bit;
6501         }
6502     } else {
6503         ret = ISSInvalid;
6504     }
6505     return ret;
6506 }
6507 
6508 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6509 {
6510     TCGv_i32 addr = load_reg(s, a->rn);
6511 
6512     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6513         gen_helper_v8m_stackcheck(tcg_env, addr);
6514     }
6515 
6516     if (a->p) {
6517         TCGv_i32 ofs = load_reg(s, a->rm);
6518         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6519         if (a->u) {
6520             tcg_gen_add_i32(addr, addr, ofs);
6521         } else {
6522             tcg_gen_sub_i32(addr, addr, ofs);
6523         }
6524     }
6525     return addr;
6526 }
6527 
6528 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6529                             TCGv_i32 addr, int address_offset)
6530 {
6531     if (!a->p) {
6532         TCGv_i32 ofs = load_reg(s, a->rm);
6533         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6534         if (a->u) {
6535             tcg_gen_add_i32(addr, addr, ofs);
6536         } else {
6537             tcg_gen_sub_i32(addr, addr, ofs);
6538         }
6539     } else if (!a->w) {
6540         return;
6541     }
6542     tcg_gen_addi_i32(addr, addr, address_offset);
6543     store_reg(s, a->rn, addr);
6544 }
6545 
6546 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6547                        MemOp mop, int mem_idx)
6548 {
6549     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6550     TCGv_i32 addr, tmp;
6551 
6552     addr = op_addr_rr_pre(s, a);
6553 
6554     tmp = tcg_temp_new_i32();
6555     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6556     disas_set_da_iss(s, mop, issinfo);
6557 
6558     /*
6559      * Perform base writeback before the loaded value to
6560      * ensure correct behavior with overlapping index registers.
6561      */
6562     op_addr_rr_post(s, a, addr, 0);
6563     store_reg_from_load(s, a->rt, tmp);
6564     return true;
6565 }
6566 
6567 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6568                         MemOp mop, int mem_idx)
6569 {
6570     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6571     TCGv_i32 addr, tmp;
6572 
6573     /*
6574      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6575      * is either UNPREDICTABLE or has defined behaviour
6576      */
6577     if (s->thumb && a->rn == 15) {
6578         return false;
6579     }
6580 
6581     addr = op_addr_rr_pre(s, a);
6582 
6583     tmp = load_reg(s, a->rt);
6584     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6585     disas_set_da_iss(s, mop, issinfo);
6586 
6587     op_addr_rr_post(s, a, addr, 0);
6588     return true;
6589 }
6590 
6591 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6592 {
6593     int mem_idx = get_mem_index(s);
6594     TCGv_i32 addr, tmp;
6595 
6596     if (!ENABLE_ARCH_5TE) {
6597         return false;
6598     }
6599     if (a->rt & 1) {
6600         unallocated_encoding(s);
6601         return true;
6602     }
6603     addr = op_addr_rr_pre(s, a);
6604 
6605     tmp = tcg_temp_new_i32();
6606     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6607     store_reg(s, a->rt, tmp);
6608 
6609     tcg_gen_addi_i32(addr, addr, 4);
6610 
6611     tmp = tcg_temp_new_i32();
6612     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6613     store_reg(s, a->rt + 1, tmp);
6614 
6615     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6616     op_addr_rr_post(s, a, addr, -4);
6617     return true;
6618 }
6619 
6620 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6621 {
6622     int mem_idx = get_mem_index(s);
6623     TCGv_i32 addr, tmp;
6624 
6625     if (!ENABLE_ARCH_5TE) {
6626         return false;
6627     }
6628     if (a->rt & 1) {
6629         unallocated_encoding(s);
6630         return true;
6631     }
6632     addr = op_addr_rr_pre(s, a);
6633 
6634     tmp = load_reg(s, a->rt);
6635     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6636 
6637     tcg_gen_addi_i32(addr, addr, 4);
6638 
6639     tmp = load_reg(s, a->rt + 1);
6640     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6641 
6642     op_addr_rr_post(s, a, addr, -4);
6643     return true;
6644 }
6645 
6646 /*
6647  * Load/store immediate index
6648  */
6649 
6650 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6651 {
6652     int ofs = a->imm;
6653 
6654     if (!a->u) {
6655         ofs = -ofs;
6656     }
6657 
6658     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6659         /*
6660          * Stackcheck. Here we know 'addr' is the current SP;
6661          * U is set if we're moving SP up, else down. It is
6662          * UNKNOWN whether the limit check triggers when SP starts
6663          * below the limit and ends up above it; we chose to do so.
6664          */
6665         if (!a->u) {
6666             TCGv_i32 newsp = tcg_temp_new_i32();
6667             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6668             gen_helper_v8m_stackcheck(tcg_env, newsp);
6669         } else {
6670             gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]);
6671         }
6672     }
6673 
6674     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6675 }
6676 
6677 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6678                             TCGv_i32 addr, int address_offset)
6679 {
6680     if (!a->p) {
6681         if (a->u) {
6682             address_offset += a->imm;
6683         } else {
6684             address_offset -= a->imm;
6685         }
6686     } else if (!a->w) {
6687         return;
6688     }
6689     tcg_gen_addi_i32(addr, addr, address_offset);
6690     store_reg(s, a->rn, addr);
6691 }
6692 
6693 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6694                        MemOp mop, int mem_idx)
6695 {
6696     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6697     TCGv_i32 addr, tmp;
6698 
6699     addr = op_addr_ri_pre(s, a);
6700 
6701     tmp = tcg_temp_new_i32();
6702     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6703     disas_set_da_iss(s, mop, issinfo);
6704 
6705     /*
6706      * Perform base writeback before the loaded value to
6707      * ensure correct behavior with overlapping index registers.
6708      */
6709     op_addr_ri_post(s, a, addr, 0);
6710     store_reg_from_load(s, a->rt, tmp);
6711     return true;
6712 }
6713 
6714 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6715                         MemOp mop, int mem_idx)
6716 {
6717     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6718     TCGv_i32 addr, tmp;
6719 
6720     /*
6721      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6722      * is either UNPREDICTABLE or has defined behaviour
6723      */
6724     if (s->thumb && a->rn == 15) {
6725         return false;
6726     }
6727 
6728     addr = op_addr_ri_pre(s, a);
6729 
6730     tmp = load_reg(s, a->rt);
6731     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6732     disas_set_da_iss(s, mop, issinfo);
6733 
6734     op_addr_ri_post(s, a, addr, 0);
6735     return true;
6736 }
6737 
6738 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6739 {
6740     int mem_idx = get_mem_index(s);
6741     TCGv_i32 addr, tmp;
6742 
6743     addr = op_addr_ri_pre(s, a);
6744 
6745     tmp = tcg_temp_new_i32();
6746     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6747     store_reg(s, a->rt, tmp);
6748 
6749     tcg_gen_addi_i32(addr, addr, 4);
6750 
6751     tmp = tcg_temp_new_i32();
6752     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6753     store_reg(s, rt2, tmp);
6754 
6755     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6756     op_addr_ri_post(s, a, addr, -4);
6757     return true;
6758 }
6759 
6760 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6761 {
6762     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6763         return false;
6764     }
6765     return op_ldrd_ri(s, a, a->rt + 1);
6766 }
6767 
6768 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6769 {
6770     arg_ldst_ri b = {
6771         .u = a->u, .w = a->w, .p = a->p,
6772         .rn = a->rn, .rt = a->rt, .imm = a->imm
6773     };
6774     return op_ldrd_ri(s, &b, a->rt2);
6775 }
6776 
6777 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6778 {
6779     int mem_idx = get_mem_index(s);
6780     TCGv_i32 addr, tmp;
6781 
6782     addr = op_addr_ri_pre(s, a);
6783 
6784     tmp = load_reg(s, a->rt);
6785     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6786 
6787     tcg_gen_addi_i32(addr, addr, 4);
6788 
6789     tmp = load_reg(s, rt2);
6790     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6791 
6792     op_addr_ri_post(s, a, addr, -4);
6793     return true;
6794 }
6795 
6796 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6797 {
6798     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6799         return false;
6800     }
6801     return op_strd_ri(s, a, a->rt + 1);
6802 }
6803 
6804 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6805 {
6806     arg_ldst_ri b = {
6807         .u = a->u, .w = a->w, .p = a->p,
6808         .rn = a->rn, .rt = a->rt, .imm = a->imm
6809     };
6810     return op_strd_ri(s, &b, a->rt2);
6811 }
6812 
6813 #define DO_LDST(NAME, WHICH, MEMOP) \
6814 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6815 {                                                                     \
6816     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6817 }                                                                     \
6818 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6819 {                                                                     \
6820     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6821 }                                                                     \
6822 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6823 {                                                                     \
6824     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6825 }                                                                     \
6826 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6827 {                                                                     \
6828     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6829 }
6830 
6831 DO_LDST(LDR, load, MO_UL)
6832 DO_LDST(LDRB, load, MO_UB)
6833 DO_LDST(LDRH, load, MO_UW)
6834 DO_LDST(LDRSB, load, MO_SB)
6835 DO_LDST(LDRSH, load, MO_SW)
6836 
6837 DO_LDST(STR, store, MO_UL)
6838 DO_LDST(STRB, store, MO_UB)
6839 DO_LDST(STRH, store, MO_UW)
6840 
6841 #undef DO_LDST
6842 
6843 /*
6844  * Synchronization primitives
6845  */
6846 
6847 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6848 {
6849     TCGv_i32 addr, tmp;
6850     TCGv taddr;
6851 
6852     opc |= s->be_data;
6853     addr = load_reg(s, a->rn);
6854     taddr = gen_aa32_addr(s, addr, opc);
6855 
6856     tmp = load_reg(s, a->rt2);
6857     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6858 
6859     store_reg(s, a->rt, tmp);
6860     return true;
6861 }
6862 
6863 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6864 {
6865     return op_swp(s, a, MO_UL | MO_ALIGN);
6866 }
6867 
6868 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6869 {
6870     return op_swp(s, a, MO_UB);
6871 }
6872 
6873 /*
6874  * Load/Store Exclusive and Load-Acquire/Store-Release
6875  */
6876 
6877 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6878 {
6879     TCGv_i32 addr;
6880     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6881     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6882 
6883     /* We UNDEF for these UNPREDICTABLE cases.  */
6884     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6885         || a->rd == a->rn || a->rd == a->rt
6886         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6887         || (mop == MO_64
6888             && (a->rt2 == 15
6889                 || a->rd == a->rt2
6890                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6891         unallocated_encoding(s);
6892         return true;
6893     }
6894 
6895     if (rel) {
6896         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6897     }
6898 
6899     addr = tcg_temp_new_i32();
6900     load_reg_var(s, addr, a->rn);
6901     tcg_gen_addi_i32(addr, addr, a->imm);
6902 
6903     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6904     return true;
6905 }
6906 
6907 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6908 {
6909     if (!ENABLE_ARCH_6) {
6910         return false;
6911     }
6912     return op_strex(s, a, MO_32, false);
6913 }
6914 
6915 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6916 {
6917     if (!ENABLE_ARCH_6K) {
6918         return false;
6919     }
6920     /* We UNDEF for these UNPREDICTABLE cases.  */
6921     if (a->rt & 1) {
6922         unallocated_encoding(s);
6923         return true;
6924     }
6925     a->rt2 = a->rt + 1;
6926     return op_strex(s, a, MO_64, false);
6927 }
6928 
6929 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6930 {
6931     return op_strex(s, a, MO_64, false);
6932 }
6933 
6934 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6935 {
6936     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6937         return false;
6938     }
6939     return op_strex(s, a, MO_8, false);
6940 }
6941 
6942 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6943 {
6944     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6945         return false;
6946     }
6947     return op_strex(s, a, MO_16, false);
6948 }
6949 
6950 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6951 {
6952     if (!ENABLE_ARCH_8) {
6953         return false;
6954     }
6955     return op_strex(s, a, MO_32, true);
6956 }
6957 
6958 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6959 {
6960     if (!ENABLE_ARCH_8) {
6961         return false;
6962     }
6963     /* We UNDEF for these UNPREDICTABLE cases.  */
6964     if (a->rt & 1) {
6965         unallocated_encoding(s);
6966         return true;
6967     }
6968     a->rt2 = a->rt + 1;
6969     return op_strex(s, a, MO_64, true);
6970 }
6971 
6972 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6973 {
6974     if (!ENABLE_ARCH_8) {
6975         return false;
6976     }
6977     return op_strex(s, a, MO_64, true);
6978 }
6979 
6980 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6981 {
6982     if (!ENABLE_ARCH_8) {
6983         return false;
6984     }
6985     return op_strex(s, a, MO_8, true);
6986 }
6987 
6988 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6989 {
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     return op_strex(s, a, MO_16, true);
6994 }
6995 
6996 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6997 {
6998     TCGv_i32 addr, tmp;
6999 
7000     if (!ENABLE_ARCH_8) {
7001         return false;
7002     }
7003     /* We UNDEF for these UNPREDICTABLE cases.  */
7004     if (a->rn == 15 || a->rt == 15) {
7005         unallocated_encoding(s);
7006         return true;
7007     }
7008 
7009     addr = load_reg(s, a->rn);
7010     tmp = load_reg(s, a->rt);
7011     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7012     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7013     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7014 
7015     return true;
7016 }
7017 
7018 static bool trans_STL(DisasContext *s, arg_STL *a)
7019 {
7020     return op_stl(s, a, MO_UL);
7021 }
7022 
7023 static bool trans_STLB(DisasContext *s, arg_STL *a)
7024 {
7025     return op_stl(s, a, MO_UB);
7026 }
7027 
7028 static bool trans_STLH(DisasContext *s, arg_STL *a)
7029 {
7030     return op_stl(s, a, MO_UW);
7031 }
7032 
7033 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7034 {
7035     TCGv_i32 addr;
7036     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7037     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7038 
7039     /* We UNDEF for these UNPREDICTABLE cases.  */
7040     if (a->rn == 15 || a->rt == 15
7041         || (!v8a && s->thumb && a->rt == 13)
7042         || (mop == MO_64
7043             && (a->rt2 == 15 || a->rt == a->rt2
7044                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7045         unallocated_encoding(s);
7046         return true;
7047     }
7048 
7049     addr = tcg_temp_new_i32();
7050     load_reg_var(s, addr, a->rn);
7051     tcg_gen_addi_i32(addr, addr, a->imm);
7052 
7053     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7054 
7055     if (acq) {
7056         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7057     }
7058     return true;
7059 }
7060 
7061 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7062 {
7063     if (!ENABLE_ARCH_6) {
7064         return false;
7065     }
7066     return op_ldrex(s, a, MO_32, false);
7067 }
7068 
7069 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7070 {
7071     if (!ENABLE_ARCH_6K) {
7072         return false;
7073     }
7074     /* We UNDEF for these UNPREDICTABLE cases.  */
7075     if (a->rt & 1) {
7076         unallocated_encoding(s);
7077         return true;
7078     }
7079     a->rt2 = a->rt + 1;
7080     return op_ldrex(s, a, MO_64, false);
7081 }
7082 
7083 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7084 {
7085     return op_ldrex(s, a, MO_64, false);
7086 }
7087 
7088 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7089 {
7090     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7091         return false;
7092     }
7093     return op_ldrex(s, a, MO_8, false);
7094 }
7095 
7096 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7097 {
7098     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7099         return false;
7100     }
7101     return op_ldrex(s, a, MO_16, false);
7102 }
7103 
7104 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7105 {
7106     if (!ENABLE_ARCH_8) {
7107         return false;
7108     }
7109     return op_ldrex(s, a, MO_32, true);
7110 }
7111 
7112 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7113 {
7114     if (!ENABLE_ARCH_8) {
7115         return false;
7116     }
7117     /* We UNDEF for these UNPREDICTABLE cases.  */
7118     if (a->rt & 1) {
7119         unallocated_encoding(s);
7120         return true;
7121     }
7122     a->rt2 = a->rt + 1;
7123     return op_ldrex(s, a, MO_64, true);
7124 }
7125 
7126 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7127 {
7128     if (!ENABLE_ARCH_8) {
7129         return false;
7130     }
7131     return op_ldrex(s, a, MO_64, true);
7132 }
7133 
7134 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7135 {
7136     if (!ENABLE_ARCH_8) {
7137         return false;
7138     }
7139     return op_ldrex(s, a, MO_8, true);
7140 }
7141 
7142 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7143 {
7144     if (!ENABLE_ARCH_8) {
7145         return false;
7146     }
7147     return op_ldrex(s, a, MO_16, true);
7148 }
7149 
7150 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7151 {
7152     TCGv_i32 addr, tmp;
7153 
7154     if (!ENABLE_ARCH_8) {
7155         return false;
7156     }
7157     /* We UNDEF for these UNPREDICTABLE cases.  */
7158     if (a->rn == 15 || a->rt == 15) {
7159         unallocated_encoding(s);
7160         return true;
7161     }
7162 
7163     addr = load_reg(s, a->rn);
7164     tmp = tcg_temp_new_i32();
7165     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7166     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7167 
7168     store_reg(s, a->rt, tmp);
7169     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7170     return true;
7171 }
7172 
7173 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7174 {
7175     return op_lda(s, a, MO_UL);
7176 }
7177 
7178 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7179 {
7180     return op_lda(s, a, MO_UB);
7181 }
7182 
7183 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7184 {
7185     return op_lda(s, a, MO_UW);
7186 }
7187 
7188 /*
7189  * Media instructions
7190  */
7191 
7192 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7193 {
7194     TCGv_i32 t1, t2;
7195 
7196     if (!ENABLE_ARCH_6) {
7197         return false;
7198     }
7199 
7200     t1 = load_reg(s, a->rn);
7201     t2 = load_reg(s, a->rm);
7202     gen_helper_usad8(t1, t1, t2);
7203     if (a->ra != 15) {
7204         t2 = load_reg(s, a->ra);
7205         tcg_gen_add_i32(t1, t1, t2);
7206     }
7207     store_reg(s, a->rd, t1);
7208     return true;
7209 }
7210 
7211 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7212 {
7213     TCGv_i32 tmp;
7214     int width = a->widthm1 + 1;
7215     int shift = a->lsb;
7216 
7217     if (!ENABLE_ARCH_6T2) {
7218         return false;
7219     }
7220     if (shift + width > 32) {
7221         /* UNPREDICTABLE; we choose to UNDEF */
7222         unallocated_encoding(s);
7223         return true;
7224     }
7225 
7226     tmp = load_reg(s, a->rn);
7227     if (u) {
7228         tcg_gen_extract_i32(tmp, tmp, shift, width);
7229     } else {
7230         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7231     }
7232     store_reg(s, a->rd, tmp);
7233     return true;
7234 }
7235 
7236 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7237 {
7238     return op_bfx(s, a, false);
7239 }
7240 
7241 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7242 {
7243     return op_bfx(s, a, true);
7244 }
7245 
7246 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7247 {
7248     int msb = a->msb, lsb = a->lsb;
7249     TCGv_i32 t_in, t_rd;
7250     int width;
7251 
7252     if (!ENABLE_ARCH_6T2) {
7253         return false;
7254     }
7255     if (msb < lsb) {
7256         /* UNPREDICTABLE; we choose to UNDEF */
7257         unallocated_encoding(s);
7258         return true;
7259     }
7260 
7261     width = msb + 1 - lsb;
7262     if (a->rn == 15) {
7263         /* BFC */
7264         t_in = tcg_constant_i32(0);
7265     } else {
7266         /* BFI */
7267         t_in = load_reg(s, a->rn);
7268     }
7269     t_rd = load_reg(s, a->rd);
7270     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7271     store_reg(s, a->rd, t_rd);
7272     return true;
7273 }
7274 
7275 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7276 {
7277     unallocated_encoding(s);
7278     return true;
7279 }
7280 
7281 /*
7282  * Parallel addition and subtraction
7283  */
7284 
7285 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7286                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7287 {
7288     TCGv_i32 t0, t1;
7289 
7290     if (s->thumb
7291         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7292         : !ENABLE_ARCH_6) {
7293         return false;
7294     }
7295 
7296     t0 = load_reg(s, a->rn);
7297     t1 = load_reg(s, a->rm);
7298 
7299     gen(t0, t0, t1);
7300 
7301     store_reg(s, a->rd, t0);
7302     return true;
7303 }
7304 
7305 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7306                              void (*gen)(TCGv_i32, TCGv_i32,
7307                                          TCGv_i32, TCGv_ptr))
7308 {
7309     TCGv_i32 t0, t1;
7310     TCGv_ptr ge;
7311 
7312     if (s->thumb
7313         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7314         : !ENABLE_ARCH_6) {
7315         return false;
7316     }
7317 
7318     t0 = load_reg(s, a->rn);
7319     t1 = load_reg(s, a->rm);
7320 
7321     ge = tcg_temp_new_ptr();
7322     tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE));
7323     gen(t0, t0, t1, ge);
7324 
7325     store_reg(s, a->rd, t0);
7326     return true;
7327 }
7328 
7329 #define DO_PAR_ADDSUB(NAME, helper) \
7330 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7331 {                                                       \
7332     return op_par_addsub(s, a, helper);                 \
7333 }
7334 
7335 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7336 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7337 {                                                       \
7338     return op_par_addsub_ge(s, a, helper);              \
7339 }
7340 
7341 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7342 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7343 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7344 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7345 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7346 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7347 
7348 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7349 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7350 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7351 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7352 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7353 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7354 
7355 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7356 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7357 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7358 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7359 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7360 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7361 
7362 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7363 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7364 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7365 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7366 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7367 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7368 
7369 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7370 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7371 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7372 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7373 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7374 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7375 
7376 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7377 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7378 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7379 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7380 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7381 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7382 
7383 #undef DO_PAR_ADDSUB
7384 #undef DO_PAR_ADDSUB_GE
7385 
7386 /*
7387  * Packing, unpacking, saturation, and reversal
7388  */
7389 
7390 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7391 {
7392     TCGv_i32 tn, tm;
7393     int shift = a->imm;
7394 
7395     if (s->thumb
7396         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7397         : !ENABLE_ARCH_6) {
7398         return false;
7399     }
7400 
7401     tn = load_reg(s, a->rn);
7402     tm = load_reg(s, a->rm);
7403     if (a->tb) {
7404         /* PKHTB */
7405         if (shift == 0) {
7406             shift = 31;
7407         }
7408         tcg_gen_sari_i32(tm, tm, shift);
7409         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7410     } else {
7411         /* PKHBT */
7412         tcg_gen_shli_i32(tm, tm, shift);
7413         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7414     }
7415     store_reg(s, a->rd, tn);
7416     return true;
7417 }
7418 
7419 static bool op_sat(DisasContext *s, arg_sat *a,
7420                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7421 {
7422     TCGv_i32 tmp;
7423     int shift = a->imm;
7424 
7425     if (!ENABLE_ARCH_6) {
7426         return false;
7427     }
7428 
7429     tmp = load_reg(s, a->rn);
7430     if (a->sh) {
7431         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7432     } else {
7433         tcg_gen_shli_i32(tmp, tmp, shift);
7434     }
7435 
7436     gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm));
7437 
7438     store_reg(s, a->rd, tmp);
7439     return true;
7440 }
7441 
7442 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7443 {
7444     return op_sat(s, a, gen_helper_ssat);
7445 }
7446 
7447 static bool trans_USAT(DisasContext *s, arg_sat *a)
7448 {
7449     return op_sat(s, a, gen_helper_usat);
7450 }
7451 
7452 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7453 {
7454     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7455         return false;
7456     }
7457     return op_sat(s, a, gen_helper_ssat16);
7458 }
7459 
7460 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7461 {
7462     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7463         return false;
7464     }
7465     return op_sat(s, a, gen_helper_usat16);
7466 }
7467 
7468 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7469                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7470                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7471 {
7472     TCGv_i32 tmp;
7473 
7474     if (!ENABLE_ARCH_6) {
7475         return false;
7476     }
7477 
7478     tmp = load_reg(s, a->rm);
7479     /*
7480      * TODO: In many cases we could do a shift instead of a rotate.
7481      * Combined with a simple extend, that becomes an extract.
7482      */
7483     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7484     gen_extract(tmp, tmp);
7485 
7486     if (a->rn != 15) {
7487         TCGv_i32 tmp2 = load_reg(s, a->rn);
7488         gen_add(tmp, tmp, tmp2);
7489     }
7490     store_reg(s, a->rd, tmp);
7491     return true;
7492 }
7493 
7494 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7495 {
7496     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7497 }
7498 
7499 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7500 {
7501     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7502 }
7503 
7504 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7505 {
7506     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7507         return false;
7508     }
7509     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7510 }
7511 
7512 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7513 {
7514     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7515 }
7516 
7517 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7518 {
7519     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7520 }
7521 
7522 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7523 {
7524     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7525         return false;
7526     }
7527     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7528 }
7529 
7530 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7531 {
7532     TCGv_i32 t1, t2, t3;
7533 
7534     if (s->thumb
7535         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7536         : !ENABLE_ARCH_6) {
7537         return false;
7538     }
7539 
7540     t1 = load_reg(s, a->rn);
7541     t2 = load_reg(s, a->rm);
7542     t3 = tcg_temp_new_i32();
7543     tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE));
7544     gen_helper_sel_flags(t1, t3, t1, t2);
7545     store_reg(s, a->rd, t1);
7546     return true;
7547 }
7548 
7549 static bool op_rr(DisasContext *s, arg_rr *a,
7550                   void (*gen)(TCGv_i32, TCGv_i32))
7551 {
7552     TCGv_i32 tmp;
7553 
7554     tmp = load_reg(s, a->rm);
7555     gen(tmp, tmp);
7556     store_reg(s, a->rd, tmp);
7557     return true;
7558 }
7559 
7560 static bool trans_REV(DisasContext *s, arg_rr *a)
7561 {
7562     if (!ENABLE_ARCH_6) {
7563         return false;
7564     }
7565     return op_rr(s, a, tcg_gen_bswap32_i32);
7566 }
7567 
7568 static bool trans_REV16(DisasContext *s, arg_rr *a)
7569 {
7570     if (!ENABLE_ARCH_6) {
7571         return false;
7572     }
7573     return op_rr(s, a, gen_rev16);
7574 }
7575 
7576 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7577 {
7578     if (!ENABLE_ARCH_6) {
7579         return false;
7580     }
7581     return op_rr(s, a, gen_revsh);
7582 }
7583 
7584 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7585 {
7586     if (!ENABLE_ARCH_6T2) {
7587         return false;
7588     }
7589     return op_rr(s, a, gen_helper_rbit);
7590 }
7591 
7592 /*
7593  * Signed multiply, signed and unsigned divide
7594  */
7595 
7596 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7597 {
7598     TCGv_i32 t1, t2;
7599 
7600     if (!ENABLE_ARCH_6) {
7601         return false;
7602     }
7603 
7604     t1 = load_reg(s, a->rn);
7605     t2 = load_reg(s, a->rm);
7606     if (m_swap) {
7607         gen_swap_half(t2, t2);
7608     }
7609     gen_smul_dual(t1, t2);
7610 
7611     if (sub) {
7612         /*
7613          * This subtraction cannot overflow, so we can do a simple
7614          * 32-bit subtraction and then a possible 32-bit saturating
7615          * addition of Ra.
7616          */
7617         tcg_gen_sub_i32(t1, t1, t2);
7618 
7619         if (a->ra != 15) {
7620             t2 = load_reg(s, a->ra);
7621             gen_helper_add_setq(t1, tcg_env, t1, t2);
7622         }
7623     } else if (a->ra == 15) {
7624         /* Single saturation-checking addition */
7625         gen_helper_add_setq(t1, tcg_env, t1, t2);
7626     } else {
7627         /*
7628          * We need to add the products and Ra together and then
7629          * determine whether the final result overflowed. Doing
7630          * this as two separate add-and-check-overflow steps incorrectly
7631          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7632          * Do all the arithmetic at 64-bits and then check for overflow.
7633          */
7634         TCGv_i64 p64, q64;
7635         TCGv_i32 t3, qf, one;
7636 
7637         p64 = tcg_temp_new_i64();
7638         q64 = tcg_temp_new_i64();
7639         tcg_gen_ext_i32_i64(p64, t1);
7640         tcg_gen_ext_i32_i64(q64, t2);
7641         tcg_gen_add_i64(p64, p64, q64);
7642         load_reg_var(s, t2, a->ra);
7643         tcg_gen_ext_i32_i64(q64, t2);
7644         tcg_gen_add_i64(p64, p64, q64);
7645 
7646         tcg_gen_extr_i64_i32(t1, t2, p64);
7647         /*
7648          * t1 is the low half of the result which goes into Rd.
7649          * We have overflow and must set Q if the high half (t2)
7650          * is different from the sign-extension of t1.
7651          */
7652         t3 = tcg_temp_new_i32();
7653         tcg_gen_sari_i32(t3, t1, 31);
7654         qf = load_cpu_field(QF);
7655         one = tcg_constant_i32(1);
7656         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7657         store_cpu_field(qf, QF);
7658     }
7659     store_reg(s, a->rd, t1);
7660     return true;
7661 }
7662 
7663 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7664 {
7665     return op_smlad(s, a, false, false);
7666 }
7667 
7668 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7669 {
7670     return op_smlad(s, a, true, false);
7671 }
7672 
7673 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7674 {
7675     return op_smlad(s, a, false, true);
7676 }
7677 
7678 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7679 {
7680     return op_smlad(s, a, true, true);
7681 }
7682 
7683 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7684 {
7685     TCGv_i32 t1, t2;
7686     TCGv_i64 l1, l2;
7687 
7688     if (!ENABLE_ARCH_6) {
7689         return false;
7690     }
7691 
7692     t1 = load_reg(s, a->rn);
7693     t2 = load_reg(s, a->rm);
7694     if (m_swap) {
7695         gen_swap_half(t2, t2);
7696     }
7697     gen_smul_dual(t1, t2);
7698 
7699     l1 = tcg_temp_new_i64();
7700     l2 = tcg_temp_new_i64();
7701     tcg_gen_ext_i32_i64(l1, t1);
7702     tcg_gen_ext_i32_i64(l2, t2);
7703 
7704     if (sub) {
7705         tcg_gen_sub_i64(l1, l1, l2);
7706     } else {
7707         tcg_gen_add_i64(l1, l1, l2);
7708     }
7709 
7710     gen_addq(s, l1, a->ra, a->rd);
7711     gen_storeq_reg(s, a->ra, a->rd, l1);
7712     return true;
7713 }
7714 
7715 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7716 {
7717     return op_smlald(s, a, false, false);
7718 }
7719 
7720 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7721 {
7722     return op_smlald(s, a, true, false);
7723 }
7724 
7725 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7726 {
7727     return op_smlald(s, a, false, true);
7728 }
7729 
7730 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7731 {
7732     return op_smlald(s, a, true, true);
7733 }
7734 
7735 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7736 {
7737     TCGv_i32 t1, t2;
7738 
7739     if (s->thumb
7740         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7741         : !ENABLE_ARCH_6) {
7742         return false;
7743     }
7744 
7745     t1 = load_reg(s, a->rn);
7746     t2 = load_reg(s, a->rm);
7747     tcg_gen_muls2_i32(t2, t1, t1, t2);
7748 
7749     if (a->ra != 15) {
7750         TCGv_i32 t3 = load_reg(s, a->ra);
7751         if (sub) {
7752             /*
7753              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7754              * a non-zero multiplicand lowpart, and the correct result
7755              * lowpart for rounding.
7756              */
7757             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7758         } else {
7759             tcg_gen_add_i32(t1, t1, t3);
7760         }
7761     }
7762     if (round) {
7763         /*
7764          * Adding 0x80000000 to the 64-bit quantity means that we have
7765          * carry in to the high word when the low word has the msb set.
7766          */
7767         tcg_gen_shri_i32(t2, t2, 31);
7768         tcg_gen_add_i32(t1, t1, t2);
7769     }
7770     store_reg(s, a->rd, t1);
7771     return true;
7772 }
7773 
7774 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7775 {
7776     return op_smmla(s, a, false, false);
7777 }
7778 
7779 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7780 {
7781     return op_smmla(s, a, true, false);
7782 }
7783 
7784 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7785 {
7786     return op_smmla(s, a, false, true);
7787 }
7788 
7789 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7790 {
7791     return op_smmla(s, a, true, true);
7792 }
7793 
7794 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7795 {
7796     TCGv_i32 t1, t2;
7797 
7798     if (s->thumb
7799         ? !dc_isar_feature(aa32_thumb_div, s)
7800         : !dc_isar_feature(aa32_arm_div, s)) {
7801         return false;
7802     }
7803 
7804     t1 = load_reg(s, a->rn);
7805     t2 = load_reg(s, a->rm);
7806     if (u) {
7807         gen_helper_udiv(t1, tcg_env, t1, t2);
7808     } else {
7809         gen_helper_sdiv(t1, tcg_env, t1, t2);
7810     }
7811     store_reg(s, a->rd, t1);
7812     return true;
7813 }
7814 
7815 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7816 {
7817     return op_div(s, a, false);
7818 }
7819 
7820 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7821 {
7822     return op_div(s, a, true);
7823 }
7824 
7825 /*
7826  * Block data transfer
7827  */
7828 
7829 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7830 {
7831     TCGv_i32 addr = load_reg(s, a->rn);
7832 
7833     if (a->b) {
7834         if (a->i) {
7835             /* pre increment */
7836             tcg_gen_addi_i32(addr, addr, 4);
7837         } else {
7838             /* pre decrement */
7839             tcg_gen_addi_i32(addr, addr, -(n * 4));
7840         }
7841     } else if (!a->i && n != 1) {
7842         /* post decrement */
7843         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7844     }
7845 
7846     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7847         /*
7848          * If the writeback is incrementing SP rather than
7849          * decrementing it, and the initial SP is below the
7850          * stack limit but the final written-back SP would
7851          * be above, then we must not perform any memory
7852          * accesses, but it is IMPDEF whether we generate
7853          * an exception. We choose to do so in this case.
7854          * At this point 'addr' is the lowest address, so
7855          * either the original SP (if incrementing) or our
7856          * final SP (if decrementing), so that's what we check.
7857          */
7858         gen_helper_v8m_stackcheck(tcg_env, addr);
7859     }
7860 
7861     return addr;
7862 }
7863 
7864 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7865                                TCGv_i32 addr, int n)
7866 {
7867     if (a->w) {
7868         /* write back */
7869         if (!a->b) {
7870             if (a->i) {
7871                 /* post increment */
7872                 tcg_gen_addi_i32(addr, addr, 4);
7873             } else {
7874                 /* post decrement */
7875                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7876             }
7877         } else if (!a->i && n != 1) {
7878             /* pre decrement */
7879             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7880         }
7881         store_reg(s, a->rn, addr);
7882     }
7883 }
7884 
7885 static bool op_stm(DisasContext *s, arg_ldst_block *a)
7886 {
7887     int i, j, n, list, mem_idx;
7888     bool user = a->u;
7889     TCGv_i32 addr, tmp;
7890 
7891     if (user) {
7892         /* STM (user) */
7893         if (IS_USER(s)) {
7894             /* Only usable in supervisor mode.  */
7895             unallocated_encoding(s);
7896             return true;
7897         }
7898     }
7899 
7900     list = a->list;
7901     n = ctpop16(list);
7902     /*
7903      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7904      * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE,
7905      * but hardware treats it like the A32 version and implements the
7906      * single-register-store, and some in-the-wild (buggy) software
7907      * assumes that, so we don't UNDEF on that case.
7908      */
7909     if (n < 1 || a->rn == 15) {
7910         unallocated_encoding(s);
7911         return true;
7912     }
7913 
7914     s->eci_handled = true;
7915 
7916     addr = op_addr_block_pre(s, a, n);
7917     mem_idx = get_mem_index(s);
7918 
7919     for (i = j = 0; i < 16; i++) {
7920         if (!(list & (1 << i))) {
7921             continue;
7922         }
7923 
7924         if (user && i != 15) {
7925             tmp = tcg_temp_new_i32();
7926             gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i));
7927         } else {
7928             tmp = load_reg(s, i);
7929         }
7930         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7931 
7932         /* No need to add after the last transfer.  */
7933         if (++j != n) {
7934             tcg_gen_addi_i32(addr, addr, 4);
7935         }
7936     }
7937 
7938     op_addr_block_post(s, a, addr, n);
7939     clear_eci_state(s);
7940     return true;
7941 }
7942 
7943 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7944 {
7945     return op_stm(s, a);
7946 }
7947 
7948 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7949 {
7950     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7951     if (a->w && (a->list & (1 << a->rn))) {
7952         unallocated_encoding(s);
7953         return true;
7954     }
7955     return op_stm(s, a);
7956 }
7957 
7958 static bool do_ldm(DisasContext *s, arg_ldst_block *a)
7959 {
7960     int i, j, n, list, mem_idx;
7961     bool loaded_base;
7962     bool user = a->u;
7963     bool exc_return = false;
7964     TCGv_i32 addr, tmp, loaded_var;
7965 
7966     if (user) {
7967         /* LDM (user), LDM (exception return) */
7968         if (IS_USER(s)) {
7969             /* Only usable in supervisor mode.  */
7970             unallocated_encoding(s);
7971             return true;
7972         }
7973         if (extract32(a->list, 15, 1)) {
7974             exc_return = true;
7975             user = false;
7976         } else {
7977             /* LDM (user) does not allow writeback.  */
7978             if (a->w) {
7979                 unallocated_encoding(s);
7980                 return true;
7981             }
7982         }
7983     }
7984 
7985     list = a->list;
7986     n = ctpop16(list);
7987     /*
7988      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7989      * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE,
7990      * but hardware treats it like the A32 version and implements the
7991      * single-register-load, and some in-the-wild (buggy) software
7992      * assumes that, so we don't UNDEF on that case.
7993      */
7994     if (n < 1 || a->rn == 15) {
7995         unallocated_encoding(s);
7996         return true;
7997     }
7998 
7999     s->eci_handled = true;
8000 
8001     addr = op_addr_block_pre(s, a, n);
8002     mem_idx = get_mem_index(s);
8003     loaded_base = false;
8004     loaded_var = NULL;
8005 
8006     for (i = j = 0; i < 16; i++) {
8007         if (!(list & (1 << i))) {
8008             continue;
8009         }
8010 
8011         tmp = tcg_temp_new_i32();
8012         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8013         if (user) {
8014             gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp);
8015         } else if (i == a->rn) {
8016             loaded_var = tmp;
8017             loaded_base = true;
8018         } else if (i == 15 && exc_return) {
8019             store_pc_exc_ret(s, tmp);
8020         } else {
8021             store_reg_from_load(s, i, tmp);
8022         }
8023 
8024         /* No need to add after the last transfer.  */
8025         if (++j != n) {
8026             tcg_gen_addi_i32(addr, addr, 4);
8027         }
8028     }
8029 
8030     op_addr_block_post(s, a, addr, n);
8031 
8032     if (loaded_base) {
8033         /* Note that we reject base == pc above.  */
8034         store_reg(s, a->rn, loaded_var);
8035     }
8036 
8037     if (exc_return) {
8038         /* Restore CPSR from SPSR.  */
8039         tmp = load_cpu_field(spsr);
8040         translator_io_start(&s->base);
8041         gen_helper_cpsr_write_eret(tcg_env, tmp);
8042         /* Must exit loop to check un-masked IRQs */
8043         s->base.is_jmp = DISAS_EXIT;
8044     }
8045     clear_eci_state(s);
8046     return true;
8047 }
8048 
8049 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8050 {
8051     /*
8052      * Writeback register in register list is UNPREDICTABLE
8053      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8054      * an UNKNOWN value to the base register.
8055      */
8056     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8057         unallocated_encoding(s);
8058         return true;
8059     }
8060     return do_ldm(s, a);
8061 }
8062 
8063 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8064 {
8065     /* Writeback register in register list is UNPREDICTABLE for T32. */
8066     if (a->w && (a->list & (1 << a->rn))) {
8067         unallocated_encoding(s);
8068         return true;
8069     }
8070     return do_ldm(s, a);
8071 }
8072 
8073 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8074 {
8075     /* Writeback is conditional on the base register not being loaded.  */
8076     a->w = !(a->list & (1 << a->rn));
8077     return do_ldm(s, a);
8078 }
8079 
8080 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8081 {
8082     int i;
8083     TCGv_i32 zero;
8084 
8085     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8086         return false;
8087     }
8088 
8089     if (extract32(a->list, 13, 1)) {
8090         return false;
8091     }
8092 
8093     if (!a->list) {
8094         /* UNPREDICTABLE; we choose to UNDEF */
8095         return false;
8096     }
8097 
8098     s->eci_handled = true;
8099 
8100     zero = tcg_constant_i32(0);
8101     for (i = 0; i < 15; i++) {
8102         if (extract32(a->list, i, 1)) {
8103             /* Clear R[i] */
8104             tcg_gen_mov_i32(cpu_R[i], zero);
8105         }
8106     }
8107     if (extract32(a->list, 15, 1)) {
8108         /*
8109          * Clear APSR (by calling the MSR helper with the same argument
8110          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8111          */
8112         gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero);
8113     }
8114     clear_eci_state(s);
8115     return true;
8116 }
8117 
8118 /*
8119  * Branch, branch with link
8120  */
8121 
8122 static bool trans_B(DisasContext *s, arg_i *a)
8123 {
8124     gen_jmp(s, jmp_diff(s, a->imm));
8125     return true;
8126 }
8127 
8128 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8129 {
8130     /* This has cond from encoding, required to be outside IT block.  */
8131     if (a->cond >= 0xe) {
8132         return false;
8133     }
8134     if (s->condexec_mask) {
8135         unallocated_encoding(s);
8136         return true;
8137     }
8138     arm_skip_unless(s, a->cond);
8139     gen_jmp(s, jmp_diff(s, a->imm));
8140     return true;
8141 }
8142 
8143 static bool trans_BL(DisasContext *s, arg_i *a)
8144 {
8145     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8146     gen_jmp(s, jmp_diff(s, a->imm));
8147     return true;
8148 }
8149 
8150 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8151 {
8152     /*
8153      * BLX <imm> would be useless on M-profile; the encoding space
8154      * is used for other insns from v8.1M onward, and UNDEFs before that.
8155      */
8156     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8157         return false;
8158     }
8159 
8160     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8161     if (s->thumb && (a->imm & 2)) {
8162         return false;
8163     }
8164     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8165     store_cpu_field_constant(!s->thumb, thumb);
8166     /* This jump is computed from an aligned PC: subtract off the low bits. */
8167     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8168     return true;
8169 }
8170 
8171 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8172 {
8173     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8174     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8175     return true;
8176 }
8177 
8178 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8179 {
8180     TCGv_i32 tmp = tcg_temp_new_i32();
8181 
8182     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8183     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8184     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8185     gen_bx(s, tmp);
8186     return true;
8187 }
8188 
8189 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8190 {
8191     TCGv_i32 tmp;
8192 
8193     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8194     if (!ENABLE_ARCH_5) {
8195         return false;
8196     }
8197     tmp = tcg_temp_new_i32();
8198     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8199     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8200     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8201     gen_bx(s, tmp);
8202     return true;
8203 }
8204 
8205 static bool trans_BF(DisasContext *s, arg_BF *a)
8206 {
8207     /*
8208      * M-profile branch future insns. The architecture permits an
8209      * implementation to implement these as NOPs (equivalent to
8210      * discarding the LO_BRANCH_INFO cache immediately), and we
8211      * take that IMPDEF option because for QEMU a "real" implementation
8212      * would be complicated and wouldn't execute any faster.
8213      */
8214     if (!dc_isar_feature(aa32_lob, s)) {
8215         return false;
8216     }
8217     if (a->boff == 0) {
8218         /* SEE "Related encodings" (loop insns) */
8219         return false;
8220     }
8221     /* Handle as NOP */
8222     return true;
8223 }
8224 
8225 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8226 {
8227     /* M-profile low-overhead loop start */
8228     TCGv_i32 tmp;
8229 
8230     if (!dc_isar_feature(aa32_lob, s)) {
8231         return false;
8232     }
8233     if (a->rn == 13 || a->rn == 15) {
8234         /*
8235          * For DLSTP rn == 15 is a related encoding (LCTP); the
8236          * other cases caught by this condition are all
8237          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8238          */
8239         return false;
8240     }
8241 
8242     if (a->size != 4) {
8243         /* DLSTP */
8244         if (!dc_isar_feature(aa32_mve, s)) {
8245             return false;
8246         }
8247         if (!vfp_access_check(s)) {
8248             return true;
8249         }
8250     }
8251 
8252     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8253     tmp = load_reg(s, a->rn);
8254     store_reg(s, 14, tmp);
8255     if (a->size != 4) {
8256         /* DLSTP: set FPSCR.LTPSIZE */
8257         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8258         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8259     }
8260     return true;
8261 }
8262 
8263 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8264 {
8265     /* M-profile low-overhead while-loop start */
8266     TCGv_i32 tmp;
8267     DisasLabel nextlabel;
8268 
8269     if (!dc_isar_feature(aa32_lob, s)) {
8270         return false;
8271     }
8272     if (a->rn == 13 || a->rn == 15) {
8273         /*
8274          * For WLSTP rn == 15 is a related encoding (LE); the
8275          * other cases caught by this condition are all
8276          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8277          */
8278         return false;
8279     }
8280     if (s->condexec_mask) {
8281         /*
8282          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8283          * we choose to UNDEF, because otherwise our use of
8284          * gen_goto_tb(1) would clash with the use of TB exit 1
8285          * in the dc->condjmp condition-failed codepath in
8286          * arm_tr_tb_stop() and we'd get an assertion.
8287          */
8288         return false;
8289     }
8290     if (a->size != 4) {
8291         /* WLSTP */
8292         if (!dc_isar_feature(aa32_mve, s)) {
8293             return false;
8294         }
8295         /*
8296          * We need to check that the FPU is enabled here, but mustn't
8297          * call vfp_access_check() to do that because we don't want to
8298          * do the lazy state preservation in the "loop count is zero" case.
8299          * Do the check-and-raise-exception by hand.
8300          */
8301         if (s->fp_excp_el) {
8302             gen_exception_insn_el(s, 0, EXCP_NOCP,
8303                                   syn_uncategorized(), s->fp_excp_el);
8304             return true;
8305         }
8306     }
8307 
8308     nextlabel = gen_disas_label(s);
8309     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8310     tmp = load_reg(s, a->rn);
8311     store_reg(s, 14, tmp);
8312     if (a->size != 4) {
8313         /*
8314          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8315          * lazy state preservation, new FP context creation, etc,
8316          * that vfp_access_check() does. We know that the actual
8317          * access check will succeed (ie it won't generate code that
8318          * throws an exception) because we did that check by hand earlier.
8319          */
8320         bool ok = vfp_access_check(s);
8321         assert(ok);
8322         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8323         /*
8324          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8325          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8326          */
8327     }
8328     gen_jmp_tb(s, curr_insn_len(s), 1);
8329 
8330     set_disas_label(s, nextlabel);
8331     gen_jmp(s, jmp_diff(s, a->imm));
8332     return true;
8333 }
8334 
8335 static bool trans_LE(DisasContext *s, arg_LE *a)
8336 {
8337     /*
8338      * M-profile low-overhead loop end. The architecture permits an
8339      * implementation to discard the LO_BRANCH_INFO cache at any time,
8340      * and we take the IMPDEF option to never set it in the first place
8341      * (equivalent to always discarding it immediately), because for QEMU
8342      * a "real" implementation would be complicated and wouldn't execute
8343      * any faster.
8344      */
8345     TCGv_i32 tmp;
8346     DisasLabel loopend;
8347     bool fpu_active;
8348 
8349     if (!dc_isar_feature(aa32_lob, s)) {
8350         return false;
8351     }
8352     if (a->f && a->tp) {
8353         return false;
8354     }
8355     if (s->condexec_mask) {
8356         /*
8357          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8358          * we choose to UNDEF, because otherwise our use of
8359          * gen_goto_tb(1) would clash with the use of TB exit 1
8360          * in the dc->condjmp condition-failed codepath in
8361          * arm_tr_tb_stop() and we'd get an assertion.
8362          */
8363         return false;
8364     }
8365     if (a->tp) {
8366         /* LETP */
8367         if (!dc_isar_feature(aa32_mve, s)) {
8368             return false;
8369         }
8370         if (!vfp_access_check(s)) {
8371             s->eci_handled = true;
8372             return true;
8373         }
8374     }
8375 
8376     /* LE/LETP is OK with ECI set and leaves it untouched */
8377     s->eci_handled = true;
8378 
8379     /*
8380      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8381      * UsageFault exception for the LE insn in that case. Note that we
8382      * are not directly checking FPSCR.LTPSIZE but instead check the
8383      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8384      * not currently active (ie ActiveFPState() returns false). We
8385      * can identify not-active purely from our TB state flags, as the
8386      * FPU is active only if:
8387      *  the FPU is enabled
8388      *  AND lazy state preservation is not active
8389      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8390      *
8391      * Usually we don't need to care about this distinction between
8392      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8393      * will either take an exception or clear the conditions that make
8394      * the FPU not active. But LE is an unusual case of a non-FP insn
8395      * that looks at LTPSIZE.
8396      */
8397     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8398 
8399     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8400         /* Need to do a runtime check for LTPSIZE != 4 */
8401         DisasLabel skipexc = gen_disas_label(s);
8402         tmp = load_cpu_field(v7m.ltpsize);
8403         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8404         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8405         set_disas_label(s, skipexc);
8406     }
8407 
8408     if (a->f) {
8409         /* Loop-forever: just jump back to the loop start */
8410         gen_jmp(s, jmp_diff(s, -a->imm));
8411         return true;
8412     }
8413 
8414     /*
8415      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8416      * For LE, we know at this point that LTPSIZE must be 4 and the
8417      * loop decrement value is 1. For LETP we need to calculate the decrement
8418      * value from LTPSIZE.
8419      */
8420     loopend = gen_disas_label(s);
8421     if (!a->tp) {
8422         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8423         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8424     } else {
8425         /*
8426          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8427          * so that decr stays live after the brcondi.
8428          */
8429         TCGv_i32 decr = tcg_temp_new_i32();
8430         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8431         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8432         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8433 
8434         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8435 
8436         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8437     }
8438     /* Jump back to the loop start */
8439     gen_jmp(s, jmp_diff(s, -a->imm));
8440 
8441     set_disas_label(s, loopend);
8442     if (a->tp) {
8443         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8444         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8445     }
8446     /* End TB, continuing to following insn */
8447     gen_jmp_tb(s, curr_insn_len(s), 1);
8448     return true;
8449 }
8450 
8451 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8452 {
8453     /*
8454      * M-profile Loop Clear with Tail Predication. Since our implementation
8455      * doesn't cache branch information, all we need to do is reset
8456      * FPSCR.LTPSIZE to 4.
8457      */
8458 
8459     if (!dc_isar_feature(aa32_lob, s) ||
8460         !dc_isar_feature(aa32_mve, s)) {
8461         return false;
8462     }
8463 
8464     if (!vfp_access_check(s)) {
8465         return true;
8466     }
8467 
8468     store_cpu_field_constant(4, v7m.ltpsize);
8469     return true;
8470 }
8471 
8472 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8473 {
8474     /*
8475      * M-profile Create Vector Tail Predicate. This insn is itself
8476      * predicated and is subject to beatwise execution.
8477      */
8478     TCGv_i32 rn_shifted, masklen;
8479 
8480     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8481         return false;
8482     }
8483 
8484     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8485         return true;
8486     }
8487 
8488     /*
8489      * We pre-calculate the mask length here to avoid having
8490      * to have multiple helpers specialized for size.
8491      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8492      */
8493     rn_shifted = tcg_temp_new_i32();
8494     masklen = load_reg(s, a->rn);
8495     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8496     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8497                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8498                         rn_shifted, tcg_constant_i32(16));
8499     gen_helper_mve_vctp(tcg_env, masklen);
8500     /* This insn updates predication bits */
8501     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8502     mve_update_eci(s);
8503     return true;
8504 }
8505 
8506 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8507 {
8508     TCGv_i32 addr, tmp;
8509 
8510     tmp = load_reg(s, a->rm);
8511     if (half) {
8512         tcg_gen_add_i32(tmp, tmp, tmp);
8513     }
8514     addr = load_reg(s, a->rn);
8515     tcg_gen_add_i32(addr, addr, tmp);
8516 
8517     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8518 
8519     tcg_gen_add_i32(tmp, tmp, tmp);
8520     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8521     tcg_gen_add_i32(tmp, tmp, addr);
8522     store_reg(s, 15, tmp);
8523     return true;
8524 }
8525 
8526 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8527 {
8528     return op_tbranch(s, a, false);
8529 }
8530 
8531 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8532 {
8533     return op_tbranch(s, a, true);
8534 }
8535 
8536 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8537 {
8538     TCGv_i32 tmp = load_reg(s, a->rn);
8539 
8540     arm_gen_condlabel(s);
8541     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8542                         tmp, 0, s->condlabel.label);
8543     gen_jmp(s, jmp_diff(s, a->imm));
8544     return true;
8545 }
8546 
8547 /*
8548  * Supervisor call - both T32 & A32 come here so we need to check
8549  * which mode we are in when checking for semihosting.
8550  */
8551 
8552 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8553 {
8554     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8555 
8556     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8557         semihosting_enabled(s->current_el == 0) &&
8558         (a->imm == semihost_imm)) {
8559         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8560     } else {
8561         if (s->fgt_svc) {
8562             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8563             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8564         } else {
8565             gen_update_pc(s, curr_insn_len(s));
8566             s->svc_imm = a->imm;
8567             s->base.is_jmp = DISAS_SWI;
8568         }
8569     }
8570     return true;
8571 }
8572 
8573 /*
8574  * Unconditional system instructions
8575  */
8576 
8577 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8578 {
8579     static const int8_t pre_offset[4] = {
8580         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8581     };
8582     static const int8_t post_offset[4] = {
8583         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8584     };
8585     TCGv_i32 addr, t1, t2;
8586 
8587     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8588         return false;
8589     }
8590     if (IS_USER(s)) {
8591         unallocated_encoding(s);
8592         return true;
8593     }
8594 
8595     addr = load_reg(s, a->rn);
8596     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8597 
8598     /* Load PC into tmp and CPSR into tmp2.  */
8599     t1 = tcg_temp_new_i32();
8600     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8601     tcg_gen_addi_i32(addr, addr, 4);
8602     t2 = tcg_temp_new_i32();
8603     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8604 
8605     if (a->w) {
8606         /* Base writeback.  */
8607         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8608         store_reg(s, a->rn, addr);
8609     }
8610     gen_rfe(s, t1, t2);
8611     return true;
8612 }
8613 
8614 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8615 {
8616     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8617         return false;
8618     }
8619     gen_srs(s, a->mode, a->pu, a->w);
8620     return true;
8621 }
8622 
8623 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8624 {
8625     uint32_t mask, val;
8626 
8627     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8628         return false;
8629     }
8630     if (IS_USER(s)) {
8631         /* Implemented as NOP in user mode.  */
8632         return true;
8633     }
8634     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8635 
8636     mask = val = 0;
8637     if (a->imod & 2) {
8638         if (a->A) {
8639             mask |= CPSR_A;
8640         }
8641         if (a->I) {
8642             mask |= CPSR_I;
8643         }
8644         if (a->F) {
8645             mask |= CPSR_F;
8646         }
8647         if (a->imod & 1) {
8648             val |= mask;
8649         }
8650     }
8651     if (a->M) {
8652         mask |= CPSR_M;
8653         val |= a->mode;
8654     }
8655     if (mask) {
8656         gen_set_psr_im(s, mask, 0, val);
8657     }
8658     return true;
8659 }
8660 
8661 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8662 {
8663     TCGv_i32 tmp, addr;
8664 
8665     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8666         return false;
8667     }
8668     if (IS_USER(s)) {
8669         /* Implemented as NOP in user mode.  */
8670         return true;
8671     }
8672 
8673     tmp = tcg_constant_i32(a->im);
8674     /* FAULTMASK */
8675     if (a->F) {
8676         addr = tcg_constant_i32(19);
8677         gen_helper_v7m_msr(tcg_env, addr, tmp);
8678     }
8679     /* PRIMASK */
8680     if (a->I) {
8681         addr = tcg_constant_i32(16);
8682         gen_helper_v7m_msr(tcg_env, addr, tmp);
8683     }
8684     gen_rebuild_hflags(s, false);
8685     gen_lookup_tb(s);
8686     return true;
8687 }
8688 
8689 /*
8690  * Clear-Exclusive, Barriers
8691  */
8692 
8693 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8694 {
8695     if (s->thumb
8696         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8697         : !ENABLE_ARCH_6K) {
8698         return false;
8699     }
8700     gen_clrex(s);
8701     return true;
8702 }
8703 
8704 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8705 {
8706     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8707         return false;
8708     }
8709     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8710     return true;
8711 }
8712 
8713 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8714 {
8715     return trans_DSB(s, NULL);
8716 }
8717 
8718 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8719 {
8720     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8721         return false;
8722     }
8723     /*
8724      * We need to break the TB after this insn to execute
8725      * self-modifying code correctly and also to take
8726      * any pending interrupts immediately.
8727      */
8728     s->base.is_jmp = DISAS_TOO_MANY;
8729     return true;
8730 }
8731 
8732 static bool trans_SB(DisasContext *s, arg_SB *a)
8733 {
8734     if (!dc_isar_feature(aa32_sb, s)) {
8735         return false;
8736     }
8737     /*
8738      * TODO: There is no speculation barrier opcode
8739      * for TCG; MB and end the TB instead.
8740      */
8741     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8742     s->base.is_jmp = DISAS_TOO_MANY;
8743     return true;
8744 }
8745 
8746 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8747 {
8748     if (!ENABLE_ARCH_6) {
8749         return false;
8750     }
8751     if (a->E != (s->be_data == MO_BE)) {
8752         gen_helper_setend(tcg_env);
8753         s->base.is_jmp = DISAS_UPDATE_EXIT;
8754     }
8755     return true;
8756 }
8757 
8758 /*
8759  * Preload instructions
8760  * All are nops, contingent on the appropriate arch level.
8761  */
8762 
8763 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8764 {
8765     return ENABLE_ARCH_5TE;
8766 }
8767 
8768 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8769 {
8770     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8771 }
8772 
8773 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8774 {
8775     return ENABLE_ARCH_7;
8776 }
8777 
8778 /*
8779  * If-then
8780  */
8781 
8782 static bool trans_IT(DisasContext *s, arg_IT *a)
8783 {
8784     int cond_mask = a->cond_mask;
8785 
8786     /*
8787      * No actual code generated for this insn, just setup state.
8788      *
8789      * Combinations of firstcond and mask which set up an 0b1111
8790      * condition are UNPREDICTABLE; we take the CONSTRAINED
8791      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8792      * i.e. both meaning "execute always".
8793      */
8794     s->condexec_cond = (cond_mask >> 4) & 0xe;
8795     s->condexec_mask = cond_mask & 0x1f;
8796     return true;
8797 }
8798 
8799 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8800 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8801 {
8802     TCGv_i32 rn, rm;
8803     DisasCompare c;
8804 
8805     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8806         return false;
8807     }
8808 
8809     if (a->rm == 13) {
8810         /* SEE "Related encodings" (MVE shifts) */
8811         return false;
8812     }
8813 
8814     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8815         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8816         return false;
8817     }
8818 
8819     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8820     rn = tcg_temp_new_i32();
8821     rm = tcg_temp_new_i32();
8822     if (a->rn == 15) {
8823         tcg_gen_movi_i32(rn, 0);
8824     } else {
8825         load_reg_var(s, rn, a->rn);
8826     }
8827     if (a->rm == 15) {
8828         tcg_gen_movi_i32(rm, 0);
8829     } else {
8830         load_reg_var(s, rm, a->rm);
8831     }
8832 
8833     switch (a->op) {
8834     case 0: /* CSEL */
8835         break;
8836     case 1: /* CSINC */
8837         tcg_gen_addi_i32(rm, rm, 1);
8838         break;
8839     case 2: /* CSINV */
8840         tcg_gen_not_i32(rm, rm);
8841         break;
8842     case 3: /* CSNEG */
8843         tcg_gen_neg_i32(rm, rm);
8844         break;
8845     default:
8846         g_assert_not_reached();
8847     }
8848 
8849     arm_test_cc(&c, a->fcond);
8850     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8851 
8852     store_reg(s, a->rd, rn);
8853     return true;
8854 }
8855 
8856 /*
8857  * Legacy decoder.
8858  */
8859 
8860 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8861 {
8862     unsigned int cond = insn >> 28;
8863 
8864     /* M variants do not implement ARM mode; this must raise the INVSTATE
8865      * UsageFault exception.
8866      */
8867     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8868         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8869         return;
8870     }
8871 
8872     if (s->pstate_il) {
8873         /*
8874          * Illegal execution state. This has priority over BTI
8875          * exceptions, but comes after instruction abort exceptions.
8876          */
8877         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8878         return;
8879     }
8880 
8881     if (cond == 0xf) {
8882         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8883          * choose to UNDEF. In ARMv5 and above the space is used
8884          * for miscellaneous unconditional instructions.
8885          */
8886         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8887             unallocated_encoding(s);
8888             return;
8889         }
8890 
8891         /* Unconditional instructions.  */
8892         /* TODO: Perhaps merge these into one decodetree output file.  */
8893         if (disas_a32_uncond(s, insn) ||
8894             disas_vfp_uncond(s, insn) ||
8895             disas_neon_dp(s, insn) ||
8896             disas_neon_ls(s, insn) ||
8897             disas_neon_shared(s, insn)) {
8898             return;
8899         }
8900         /* fall back to legacy decoder */
8901 
8902         if ((insn & 0x0e000f00) == 0x0c000100) {
8903             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8904                 /* iWMMXt register transfer.  */
8905                 if (extract32(s->c15_cpar, 1, 1)) {
8906                     if (!disas_iwmmxt_insn(s, insn)) {
8907                         return;
8908                     }
8909                 }
8910             }
8911         }
8912         goto illegal_op;
8913     }
8914     if (cond != 0xe) {
8915         /* if not always execute, we generate a conditional jump to
8916            next instruction */
8917         arm_skip_unless(s, cond);
8918     }
8919 
8920     /* TODO: Perhaps merge these into one decodetree output file.  */
8921     if (disas_a32(s, insn) ||
8922         disas_vfp(s, insn)) {
8923         return;
8924     }
8925     /* fall back to legacy decoder */
8926     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8927     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8928         if (((insn & 0x0c000e00) == 0x0c000000)
8929             && ((insn & 0x03000000) != 0x03000000)) {
8930             /* Coprocessor insn, coprocessor 0 or 1 */
8931             disas_xscale_insn(s, insn);
8932             return;
8933         }
8934     }
8935 
8936 illegal_op:
8937     unallocated_encoding(s);
8938 }
8939 
8940 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8941 {
8942     /*
8943      * Return true if this is a 16 bit instruction. We must be precise
8944      * about this (matching the decode).
8945      */
8946     if ((insn >> 11) < 0x1d) {
8947         /* Definitely a 16-bit instruction */
8948         return true;
8949     }
8950 
8951     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8952      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8953      * end up actually treating this as two 16-bit insns, though,
8954      * if it's half of a bl/blx pair that might span a page boundary.
8955      */
8956     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8957         arm_dc_feature(s, ARM_FEATURE_M)) {
8958         /* Thumb2 cores (including all M profile ones) always treat
8959          * 32-bit insns as 32-bit.
8960          */
8961         return false;
8962     }
8963 
8964     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8965         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8966          * is not on the next page; we merge this into a 32-bit
8967          * insn.
8968          */
8969         return false;
8970     }
8971     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8972      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8973      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8974      *  -- handle as single 16 bit insn
8975      */
8976     return true;
8977 }
8978 
8979 /* Translate a 32-bit thumb instruction. */
8980 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8981 {
8982     /*
8983      * ARMv6-M supports a limited subset of Thumb2 instructions.
8984      * Other Thumb1 architectures allow only 32-bit
8985      * combined BL/BLX prefix and suffix.
8986      */
8987     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8988         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8989         int i;
8990         bool found = false;
8991         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8992                                                0xf3b08040 /* dsb */,
8993                                                0xf3b08050 /* dmb */,
8994                                                0xf3b08060 /* isb */,
8995                                                0xf3e08000 /* mrs */,
8996                                                0xf000d000 /* bl */};
8997         static const uint32_t armv6m_mask[] = {0xffe0d000,
8998                                                0xfff0d0f0,
8999                                                0xfff0d0f0,
9000                                                0xfff0d0f0,
9001                                                0xffe0d000,
9002                                                0xf800d000};
9003 
9004         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9005             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9006                 found = true;
9007                 break;
9008             }
9009         }
9010         if (!found) {
9011             goto illegal_op;
9012         }
9013     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9014         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9015             unallocated_encoding(s);
9016             return;
9017         }
9018     }
9019 
9020     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9021         /*
9022          * NOCP takes precedence over any UNDEF for (almost) the
9023          * entire wide range of coprocessor-space encodings, so check
9024          * for it first before proceeding to actually decode eg VFP
9025          * insns. This decode also handles the few insns which are
9026          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9027          */
9028         if (disas_m_nocp(s, insn)) {
9029             return;
9030         }
9031     }
9032 
9033     if ((insn & 0xef000000) == 0xef000000) {
9034         /*
9035          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9036          * transform into
9037          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9038          */
9039         uint32_t a32_insn = (insn & 0xe2ffffff) |
9040             ((insn & (1 << 28)) >> 4) | (1 << 28);
9041 
9042         if (disas_neon_dp(s, a32_insn)) {
9043             return;
9044         }
9045     }
9046 
9047     if ((insn & 0xff100000) == 0xf9000000) {
9048         /*
9049          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9050          * transform into
9051          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9052          */
9053         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9054 
9055         if (disas_neon_ls(s, a32_insn)) {
9056             return;
9057         }
9058     }
9059 
9060     /*
9061      * TODO: Perhaps merge these into one decodetree output file.
9062      * Note disas_vfp is written for a32 with cond field in the
9063      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9064      */
9065     if (disas_t32(s, insn) ||
9066         disas_vfp_uncond(s, insn) ||
9067         disas_neon_shared(s, insn) ||
9068         disas_mve(s, insn) ||
9069         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9070         return;
9071     }
9072 
9073 illegal_op:
9074     unallocated_encoding(s);
9075 }
9076 
9077 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9078 {
9079     if (!disas_t16(s, insn)) {
9080         unallocated_encoding(s);
9081     }
9082 }
9083 
9084 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9085 {
9086     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9087      * (False positives are OK, false negatives are not.)
9088      * We know this is a Thumb insn, and our caller ensures we are
9089      * only called if dc->base.pc_next is less than 4 bytes from the page
9090      * boundary, so we cross the page if the first 16 bits indicate
9091      * that this is a 32 bit insn.
9092      */
9093     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9094 
9095     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9096 }
9097 
9098 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9099 {
9100     DisasContext *dc = container_of(dcbase, DisasContext, base);
9101     CPUARMState *env = cpu_env(cs);
9102     ARMCPU *cpu = env_archcpu(env);
9103     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9104     uint32_t condexec, core_mmu_idx;
9105 
9106     dc->isar = &cpu->isar;
9107     dc->condjmp = 0;
9108     dc->pc_save = dc->base.pc_first;
9109     dc->aarch64 = false;
9110     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9111     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9112     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9113     /*
9114      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9115      * is always the IT bits. On M-profile, some of the reserved encodings
9116      * of IT are used instead to indicate either ICI or ECI, which
9117      * indicate partial progress of a restartable insn that was interrupted
9118      * partway through by an exception:
9119      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9120      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9121      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9122      * insn, behave normally".
9123      */
9124     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9125     dc->eci_handled = false;
9126     if (condexec & 0xf) {
9127         dc->condexec_mask = (condexec & 0xf) << 1;
9128         dc->condexec_cond = condexec >> 4;
9129     } else {
9130         if (arm_feature(env, ARM_FEATURE_M)) {
9131             dc->eci = condexec >> 4;
9132         }
9133     }
9134 
9135     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9136     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9137     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9138 #if !defined(CONFIG_USER_ONLY)
9139     dc->user = (dc->current_el == 0);
9140 #endif
9141     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9142     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9143     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9144     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9145     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9146 
9147     if (arm_feature(env, ARM_FEATURE_M)) {
9148         dc->vfp_enabled = 1;
9149         dc->be_data = MO_TE;
9150         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9151         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9152         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9153         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9154         dc->v7m_new_fp_ctxt_needed =
9155             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9156         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9157         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9158     } else {
9159         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9160         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9161         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9162         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9163         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9164             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9165         } else {
9166             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9167             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9168         }
9169         dc->sme_trap_nonstreaming =
9170             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9171     }
9172     dc->lse2 = false; /* applies only to aarch64 */
9173     dc->cp_regs = cpu->cp_regs;
9174     dc->features = env->features;
9175 
9176     /* Single step state. The code-generation logic here is:
9177      *  SS_ACTIVE == 0:
9178      *   generate code with no special handling for single-stepping (except
9179      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9180      *   this happens anyway because those changes are all system register or
9181      *   PSTATE writes).
9182      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9183      *   emit code for one insn
9184      *   emit code to clear PSTATE.SS
9185      *   emit code to generate software step exception for completed step
9186      *   end TB (as usual for having generated an exception)
9187      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9188      *   emit code to generate a software step exception
9189      *   end the TB
9190      */
9191     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9192     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9193     dc->is_ldex = false;
9194 
9195     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9196 
9197     /* If architectural single step active, limit to 1.  */
9198     if (dc->ss_active) {
9199         dc->base.max_insns = 1;
9200     }
9201 
9202     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9203        to those left on the page.  */
9204     if (!dc->thumb) {
9205         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9206         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9207     }
9208 
9209     cpu_V0 = tcg_temp_new_i64();
9210     cpu_V1 = tcg_temp_new_i64();
9211     cpu_M0 = tcg_temp_new_i64();
9212 }
9213 
9214 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9215 {
9216     DisasContext *dc = container_of(dcbase, DisasContext, base);
9217 
9218     /* A note on handling of the condexec (IT) bits:
9219      *
9220      * We want to avoid the overhead of having to write the updated condexec
9221      * bits back to the CPUARMState for every instruction in an IT block. So:
9222      * (1) if the condexec bits are not already zero then we write
9223      * zero back into the CPUARMState now. This avoids complications trying
9224      * to do it at the end of the block. (For example if we don't do this
9225      * it's hard to identify whether we can safely skip writing condexec
9226      * at the end of the TB, which we definitely want to do for the case
9227      * where a TB doesn't do anything with the IT state at all.)
9228      * (2) if we are going to leave the TB then we call gen_set_condexec()
9229      * which will write the correct value into CPUARMState if zero is wrong.
9230      * This is done both for leaving the TB at the end, and for leaving
9231      * it because of an exception we know will happen, which is done in
9232      * gen_exception_insn(). The latter is necessary because we need to
9233      * leave the TB with the PC/IT state just prior to execution of the
9234      * instruction which caused the exception.
9235      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9236      * then the CPUARMState will be wrong and we need to reset it.
9237      * This is handled in the same way as restoration of the
9238      * PC in these situations; we save the value of the condexec bits
9239      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9240      * then uses this to restore them after an exception.
9241      *
9242      * Note that there are no instructions which can read the condexec
9243      * bits, and none which can write non-static values to them, so
9244      * we don't need to care about whether CPUARMState is correct in the
9245      * middle of a TB.
9246      */
9247 
9248     /* Reset the conditional execution bits immediately. This avoids
9249        complications trying to do it at the end of the block.  */
9250     if (dc->condexec_mask || dc->condexec_cond) {
9251         store_cpu_field_constant(0, condexec_bits);
9252     }
9253 }
9254 
9255 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9256 {
9257     DisasContext *dc = container_of(dcbase, DisasContext, base);
9258     /*
9259      * The ECI/ICI bits share PSR bits with the IT bits, so we
9260      * need to reconstitute the bits from the split-out DisasContext
9261      * fields here.
9262      */
9263     uint32_t condexec_bits;
9264     target_ulong pc_arg = dc->base.pc_next;
9265 
9266     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9267         pc_arg &= ~TARGET_PAGE_MASK;
9268     }
9269     if (dc->eci) {
9270         condexec_bits = dc->eci << 4;
9271     } else {
9272         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9273     }
9274     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9275     dc->insn_start = tcg_last_op();
9276 }
9277 
9278 static bool arm_check_kernelpage(DisasContext *dc)
9279 {
9280 #ifdef CONFIG_USER_ONLY
9281     /* Intercept jump to the magic kernel page.  */
9282     if (dc->base.pc_next >= 0xffff0000) {
9283         /* We always get here via a jump, so know we are not in a
9284            conditional execution block.  */
9285         gen_exception_internal(EXCP_KERNEL_TRAP);
9286         dc->base.is_jmp = DISAS_NORETURN;
9287         return true;
9288     }
9289 #endif
9290     return false;
9291 }
9292 
9293 static bool arm_check_ss_active(DisasContext *dc)
9294 {
9295     if (dc->ss_active && !dc->pstate_ss) {
9296         /* Singlestep state is Active-pending.
9297          * If we're in this state at the start of a TB then either
9298          *  a) we just took an exception to an EL which is being debugged
9299          *     and this is the first insn in the exception handler
9300          *  b) debug exceptions were masked and we just unmasked them
9301          *     without changing EL (eg by clearing PSTATE.D)
9302          * In either case we're going to take a swstep exception in the
9303          * "did not step an insn" case, and so the syndrome ISV and EX
9304          * bits should be zero.
9305          */
9306         assert(dc->base.num_insns == 1);
9307         gen_swstep_exception(dc, 0, 0);
9308         dc->base.is_jmp = DISAS_NORETURN;
9309         return true;
9310     }
9311 
9312     return false;
9313 }
9314 
9315 static void arm_post_translate_insn(DisasContext *dc)
9316 {
9317     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9318         if (dc->pc_save != dc->condlabel.pc_save) {
9319             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9320         }
9321         gen_set_label(dc->condlabel.label);
9322         dc->condjmp = 0;
9323     }
9324 }
9325 
9326 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9327 {
9328     DisasContext *dc = container_of(dcbase, DisasContext, base);
9329     CPUARMState *env = cpu_env(cpu);
9330     uint32_t pc = dc->base.pc_next;
9331     unsigned int insn;
9332 
9333     /* Singlestep exceptions have the highest priority. */
9334     if (arm_check_ss_active(dc)) {
9335         dc->base.pc_next = pc + 4;
9336         return;
9337     }
9338 
9339     if (pc & 3) {
9340         /*
9341          * PC alignment fault.  This has priority over the instruction abort
9342          * that we would receive from a translation fault via arm_ldl_code
9343          * (or the execution of the kernelpage entrypoint). This should only
9344          * be possible after an indirect branch, at the start of the TB.
9345          */
9346         assert(dc->base.num_insns == 1);
9347         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9348         dc->base.is_jmp = DISAS_NORETURN;
9349         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9350         return;
9351     }
9352 
9353     if (arm_check_kernelpage(dc)) {
9354         dc->base.pc_next = pc + 4;
9355         return;
9356     }
9357 
9358     dc->pc_curr = pc;
9359     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9360     dc->insn = insn;
9361     dc->base.pc_next = pc + 4;
9362     disas_arm_insn(dc, insn);
9363 
9364     arm_post_translate_insn(dc);
9365 
9366     /* ARM is a fixed-length ISA.  We performed the cross-page check
9367        in init_disas_context by adjusting max_insns.  */
9368 }
9369 
9370 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9371 {
9372     /* Return true if this Thumb insn is always unconditional,
9373      * even inside an IT block. This is true of only a very few
9374      * instructions: BKPT, HLT, and SG.
9375      *
9376      * A larger class of instructions are UNPREDICTABLE if used
9377      * inside an IT block; we do not need to detect those here, because
9378      * what we do by default (perform the cc check and update the IT
9379      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9380      * choice for those situations.
9381      *
9382      * insn is either a 16-bit or a 32-bit instruction; the two are
9383      * distinguishable because for the 16-bit case the top 16 bits
9384      * are zeroes, and that isn't a valid 32-bit encoding.
9385      */
9386     if ((insn & 0xffffff00) == 0xbe00) {
9387         /* BKPT */
9388         return true;
9389     }
9390 
9391     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9392         !arm_dc_feature(s, ARM_FEATURE_M)) {
9393         /* HLT: v8A only. This is unconditional even when it is going to
9394          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9395          * For v7 cores this was a plain old undefined encoding and so
9396          * honours its cc check. (We might be using the encoding as
9397          * a semihosting trap, but we don't change the cc check behaviour
9398          * on that account, because a debugger connected to a real v7A
9399          * core and emulating semihosting traps by catching the UNDEF
9400          * exception would also only see cases where the cc check passed.
9401          * No guest code should be trying to do a HLT semihosting trap
9402          * in an IT block anyway.
9403          */
9404         return true;
9405     }
9406 
9407     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9408         arm_dc_feature(s, ARM_FEATURE_M)) {
9409         /* SG: v8M only */
9410         return true;
9411     }
9412 
9413     return false;
9414 }
9415 
9416 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9417 {
9418     DisasContext *dc = container_of(dcbase, DisasContext, base);
9419     CPUARMState *env = cpu_env(cpu);
9420     uint32_t pc = dc->base.pc_next;
9421     uint32_t insn;
9422     bool is_16bit;
9423     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9424     TCGOp *insn_eci_rewind = NULL;
9425     target_ulong insn_eci_pc_save = -1;
9426 
9427     /* Misaligned thumb PC is architecturally impossible. */
9428     assert((dc->base.pc_next & 1) == 0);
9429 
9430     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9431         dc->base.pc_next = pc + 2;
9432         return;
9433     }
9434 
9435     dc->pc_curr = pc;
9436     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9437     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9438     pc += 2;
9439     if (!is_16bit) {
9440         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9441         insn = insn << 16 | insn2;
9442         pc += 2;
9443     }
9444     dc->base.pc_next = pc;
9445     dc->insn = insn;
9446 
9447     if (dc->pstate_il) {
9448         /*
9449          * Illegal execution state. This has priority over BTI
9450          * exceptions, but comes after instruction abort exceptions.
9451          */
9452         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9453         return;
9454     }
9455 
9456     if (dc->eci) {
9457         /*
9458          * For M-profile continuable instructions, ECI/ICI handling
9459          * falls into these cases:
9460          *  - interrupt-continuable instructions
9461          *     These are the various load/store multiple insns (both
9462          *     integer and fp). The ICI bits indicate the register
9463          *     where the load/store can resume. We make the IMPDEF
9464          *     choice to always do "instruction restart", ie ignore
9465          *     the ICI value and always execute the ldm/stm from the
9466          *     start. So all we need to do is zero PSR.ICI if the
9467          *     insn executes.
9468          *  - MVE instructions subject to beat-wise execution
9469          *     Here the ECI bits indicate which beats have already been
9470          *     executed, and we must honour this. Each insn of this
9471          *     type will handle it correctly. We will update PSR.ECI
9472          *     in the helper function for the insn (some ECI values
9473          *     mean that the following insn also has been partially
9474          *     executed).
9475          *  - Special cases which don't advance ECI
9476          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9477          *     bits untouched.
9478          *  - all other insns (the common case)
9479          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9480          *     We place a rewind-marker here. Insns in the previous
9481          *     three categories will set a flag in the DisasContext.
9482          *     If the flag isn't set after we call disas_thumb_insn()
9483          *     or disas_thumb2_insn() then we know we have a "some other
9484          *     insn" case. We will rewind to the marker (ie throwing away
9485          *     all the generated code) and instead emit "take exception".
9486          */
9487         insn_eci_rewind = tcg_last_op();
9488         insn_eci_pc_save = dc->pc_save;
9489     }
9490 
9491     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9492         uint32_t cond = dc->condexec_cond;
9493 
9494         /*
9495          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9496          * "always"; 0xf is not "never".
9497          */
9498         if (cond < 0x0e) {
9499             arm_skip_unless(dc, cond);
9500         }
9501     }
9502 
9503     if (is_16bit) {
9504         disas_thumb_insn(dc, insn);
9505     } else {
9506         disas_thumb2_insn(dc, insn);
9507     }
9508 
9509     /* Advance the Thumb condexec condition.  */
9510     if (dc->condexec_mask) {
9511         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9512                              ((dc->condexec_mask >> 4) & 1));
9513         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9514         if (dc->condexec_mask == 0) {
9515             dc->condexec_cond = 0;
9516         }
9517     }
9518 
9519     if (dc->eci && !dc->eci_handled) {
9520         /*
9521          * Insn wasn't valid for ECI/ICI at all: undo what we
9522          * just generated and instead emit an exception
9523          */
9524         tcg_remove_ops_after(insn_eci_rewind);
9525         dc->pc_save = insn_eci_pc_save;
9526         dc->condjmp = 0;
9527         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9528     }
9529 
9530     arm_post_translate_insn(dc);
9531 
9532     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9533      * will touch a new page.  This ensures that prefetch aborts occur at
9534      * the right place.
9535      *
9536      * We want to stop the TB if the next insn starts in a new page,
9537      * or if it spans between this page and the next. This means that
9538      * if we're looking at the last halfword in the page we need to
9539      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9540      * or a 32-bit Thumb insn (which won't).
9541      * This is to avoid generating a silly TB with a single 16-bit insn
9542      * in it at the end of this page (which would execute correctly
9543      * but isn't very efficient).
9544      */
9545     if (dc->base.is_jmp == DISAS_NEXT
9546         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9547             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9548                 && insn_crosses_page(env, dc)))) {
9549         dc->base.is_jmp = DISAS_TOO_MANY;
9550     }
9551 }
9552 
9553 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9554 {
9555     DisasContext *dc = container_of(dcbase, DisasContext, base);
9556 
9557     /* At this stage dc->condjmp will only be set when the skipped
9558        instruction was a conditional branch or trap, and the PC has
9559        already been written.  */
9560     gen_set_condexec(dc);
9561     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9562         /* Exception return branches need some special case code at the
9563          * end of the TB, which is complex enough that it has to
9564          * handle the single-step vs not and the condition-failed
9565          * insn codepath itself.
9566          */
9567         gen_bx_excret_final_code(dc);
9568     } else if (unlikely(dc->ss_active)) {
9569         /* Unconditional and "condition passed" instruction codepath. */
9570         switch (dc->base.is_jmp) {
9571         case DISAS_SWI:
9572             gen_ss_advance(dc);
9573             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9574             break;
9575         case DISAS_HVC:
9576             gen_ss_advance(dc);
9577             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9578             break;
9579         case DISAS_SMC:
9580             gen_ss_advance(dc);
9581             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9582             break;
9583         case DISAS_NEXT:
9584         case DISAS_TOO_MANY:
9585         case DISAS_UPDATE_EXIT:
9586         case DISAS_UPDATE_NOCHAIN:
9587             gen_update_pc(dc, curr_insn_len(dc));
9588             /* fall through */
9589         default:
9590             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9591             gen_singlestep_exception(dc);
9592             break;
9593         case DISAS_NORETURN:
9594             break;
9595         }
9596     } else {
9597         /* While branches must always occur at the end of an IT block,
9598            there are a few other things that can cause us to terminate
9599            the TB in the middle of an IT block:
9600             - Exception generating instructions (bkpt, swi, undefined).
9601             - Page boundaries.
9602             - Hardware watchpoints.
9603            Hardware breakpoints have already been handled and skip this code.
9604          */
9605         switch (dc->base.is_jmp) {
9606         case DISAS_NEXT:
9607         case DISAS_TOO_MANY:
9608             gen_goto_tb(dc, 1, curr_insn_len(dc));
9609             break;
9610         case DISAS_UPDATE_NOCHAIN:
9611             gen_update_pc(dc, curr_insn_len(dc));
9612             /* fall through */
9613         case DISAS_JUMP:
9614             gen_goto_ptr();
9615             break;
9616         case DISAS_UPDATE_EXIT:
9617             gen_update_pc(dc, curr_insn_len(dc));
9618             /* fall through */
9619         default:
9620             /* indicate that the hash table must be used to find the next TB */
9621             tcg_gen_exit_tb(NULL, 0);
9622             break;
9623         case DISAS_NORETURN:
9624             /* nothing more to generate */
9625             break;
9626         case DISAS_WFI:
9627             gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc)));
9628             /*
9629              * The helper doesn't necessarily throw an exception, but we
9630              * must go back to the main loop to check for interrupts anyway.
9631              */
9632             tcg_gen_exit_tb(NULL, 0);
9633             break;
9634         case DISAS_WFE:
9635             gen_helper_wfe(tcg_env);
9636             break;
9637         case DISAS_YIELD:
9638             gen_helper_yield(tcg_env);
9639             break;
9640         case DISAS_SWI:
9641             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9642             break;
9643         case DISAS_HVC:
9644             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9645             break;
9646         case DISAS_SMC:
9647             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9648             break;
9649         }
9650     }
9651 
9652     if (dc->condjmp) {
9653         /* "Condition failed" instruction codepath for the branch/trap insn */
9654         set_disas_label(dc, dc->condlabel);
9655         gen_set_condexec(dc);
9656         if (unlikely(dc->ss_active)) {
9657             gen_update_pc(dc, curr_insn_len(dc));
9658             gen_singlestep_exception(dc);
9659         } else {
9660             gen_goto_tb(dc, 1, curr_insn_len(dc));
9661         }
9662     }
9663 }
9664 
9665 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9666                              CPUState *cpu, FILE *logfile)
9667 {
9668     DisasContext *dc = container_of(dcbase, DisasContext, base);
9669 
9670     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9671     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9672 }
9673 
9674 static const TranslatorOps arm_translator_ops = {
9675     .init_disas_context = arm_tr_init_disas_context,
9676     .tb_start           = arm_tr_tb_start,
9677     .insn_start         = arm_tr_insn_start,
9678     .translate_insn     = arm_tr_translate_insn,
9679     .tb_stop            = arm_tr_tb_stop,
9680     .disas_log          = arm_tr_disas_log,
9681 };
9682 
9683 static const TranslatorOps thumb_translator_ops = {
9684     .init_disas_context = arm_tr_init_disas_context,
9685     .tb_start           = arm_tr_tb_start,
9686     .insn_start         = arm_tr_insn_start,
9687     .translate_insn     = thumb_tr_translate_insn,
9688     .tb_stop            = arm_tr_tb_stop,
9689     .disas_log          = arm_tr_disas_log,
9690 };
9691 
9692 /* generate intermediate code for basic block 'tb'.  */
9693 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9694                            vaddr pc, void *host_pc)
9695 {
9696     DisasContext dc = { };
9697     const TranslatorOps *ops = &arm_translator_ops;
9698     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9699 
9700     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9701         ops = &thumb_translator_ops;
9702     }
9703 #ifdef TARGET_AARCH64
9704     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9705         ops = &aarch64_translator_ops;
9706     }
9707 #endif
9708 
9709     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9710 }
9711