xref: /openbmc/qemu/target/arm/tcg/translate.c (revision 800af0aae1cfa456701c5fa1ef273ce47585179c)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(tcg_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, tcg_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, tcg_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(tcg_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(tcg_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(tcg_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(tcg_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(tcg_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(tcg_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(tcg_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
904         /*
905          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
906          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
907          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
908          */
909         MO_ALIGN_16
910     };
911     g_assert(i < ARRAY_SIZE(mop_align));
912     return mop_align[i];
913 }
914 
915 /*
916  * Abstractions of "generate code to do a guest load/store for
917  * AArch32", where a vaddr is always 32 bits (and is zero
918  * extended if we're a 64 bit core) and  data is also
919  * 32 bits unless specifically doing a 64 bit access.
920  * These functions work like tcg_gen_qemu_{ld,st}* except
921  * that the address argument is TCGv_i32 rather than TCGv.
922  */
923 
924 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926     TCGv addr = tcg_temp_new();
927     tcg_gen_extu_i32_tl(addr, a32);
928 
929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932     }
933     return addr;
934 }
935 
936 /*
937  * Internal routines are used for NEON cases where the endianness
938  * and/or alignment has already been taken into account and manipulated.
939  */
940 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_ld_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951     tcg_gen_qemu_st_i32(val, addr, index, opc);
952 }
953 
954 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
955                               TCGv_i32 a32, int index, MemOp opc)
956 {
957     TCGv addr = gen_aa32_addr(s, a32, opc);
958 
959     tcg_gen_qemu_ld_i64(val, addr, index, opc);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
963         tcg_gen_rotri_i64(val, val, 32);
964     }
965 }
966 
967 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971 
972     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
973     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
974         TCGv_i64 tmp = tcg_temp_new_i64();
975         tcg_gen_rotri_i64(tmp, val, 32);
976         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
977     } else {
978         tcg_gen_qemu_st_i64(val, addr, index, opc);
979     }
980 }
981 
982 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                      int index, MemOp opc)
1002 {
1003     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004 }
1005 
1006 #define DO_GEN_LD(SUFF, OPC)                                            \
1007     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 #define DO_GEN_ST(SUFF, OPC)                                            \
1014     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                          TCGv_i32 a32, int index)       \
1016     {                                                                   \
1017         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018     }
1019 
1020 static inline void gen_hvc(DisasContext *s, int imm16)
1021 {
1022     /* The pre HVC helper handles cases when HVC gets trapped
1023      * as an undefined insn by runtime configuration (ie before
1024      * the insn really executes).
1025      */
1026     gen_update_pc(s, 0);
1027     gen_helper_pre_hvc(tcg_env);
1028     /* Otherwise we will treat this as a real exception which
1029      * happens after execution of the insn. (The distinction matters
1030      * for the PC value reported to the exception handler and also
1031      * for single stepping.)
1032      */
1033     s->svc_imm = imm16;
1034     gen_update_pc(s, curr_insn_len(s));
1035     s->base.is_jmp = DISAS_HVC;
1036 }
1037 
1038 static inline void gen_smc(DisasContext *s)
1039 {
1040     /* As with HVC, we may take an exception either before or after
1041      * the insn executes.
1042      */
1043     gen_update_pc(s, 0);
1044     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc()));
1045     gen_update_pc(s, curr_insn_len(s));
1046     s->base.is_jmp = DISAS_SMC;
1047 }
1048 
1049 static void gen_exception_internal_insn(DisasContext *s, int excp)
1050 {
1051     gen_set_condexec(s);
1052     gen_update_pc(s, 0);
1053     gen_exception_internal(excp);
1054     s->base.is_jmp = DISAS_NORETURN;
1055 }
1056 
1057 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058 {
1059     gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp),
1060                                           tcg_constant_i32(syndrome), tcg_el);
1061 }
1062 
1063 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064 {
1065     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066 }
1067 
1068 static void gen_exception(int excp, uint32_t syndrome)
1069 {
1070     gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp),
1071                                        tcg_constant_i32(syndrome));
1072 }
1073 
1074 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1076 {
1077     if (s->aarch64) {
1078         gen_a64_update_pc(s, pc_diff);
1079     } else {
1080         gen_set_condexec(s);
1081         gen_update_pc(s, pc_diff);
1082     }
1083     gen_exception_el_v(excp, syn, tcg_el);
1084     s->base.is_jmp = DISAS_NORETURN;
1085 }
1086 
1087 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                            uint32_t syn, uint32_t target_el)
1089 {
1090     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                             tcg_constant_i32(target_el));
1092 }
1093 
1094 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                         int excp, uint32_t syn)
1096 {
1097     if (s->aarch64) {
1098         gen_a64_update_pc(s, pc_diff);
1099     } else {
1100         gen_set_condexec(s);
1101         gen_update_pc(s, pc_diff);
1102     }
1103     gen_exception(excp, syn);
1104     s->base.is_jmp = DISAS_NORETURN;
1105 }
1106 
1107 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108 {
1109     gen_set_condexec(s);
1110     gen_update_pc(s, 0);
1111     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn));
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 void unallocated_encoding(DisasContext *s)
1116 {
1117     /* Unallocated and reserved encodings are uncategorized */
1118     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119 }
1120 
1121 /* Force a TB lookup after an instruction that changes the CPU state.  */
1122 void gen_lookup_tb(DisasContext *s)
1123 {
1124     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125     s->base.is_jmp = DISAS_EXIT;
1126 }
1127 
1128 static inline void gen_hlt(DisasContext *s, int imm)
1129 {
1130     /* HLT. This has two purposes.
1131      * Architecturally, it is an external halting debug instruction.
1132      * Since QEMU doesn't implement external debug, we treat this as
1133      * it is required for halting debug disabled: it will UNDEF.
1134      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136      * must trigger semihosting even for ARMv7 and earlier, where
1137      * HLT was an undefined encoding.
1138      * In system mode, we don't allow userspace access to
1139      * semihosting, to provide some semblance of security
1140      * (and for consistency with our 32-bit semihosting).
1141      */
1142     if (semihosting_enabled(s->current_el == 0) &&
1143         (imm == (s->thumb ? 0x3c : 0xf000))) {
1144         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145         return;
1146     }
1147 
1148     unallocated_encoding(s);
1149 }
1150 
1151 /*
1152  * Return the offset of a "full" NEON Dreg.
1153  */
1154 long neon_full_reg_offset(unsigned reg)
1155 {
1156     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157 }
1158 
1159 /*
1160  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161  * where 0 is the least significant end of the register.
1162  */
1163 long neon_element_offset(int reg, int element, MemOp memop)
1164 {
1165     int element_size = 1 << (memop & MO_SIZE);
1166     int ofs = element * element_size;
1167 #if HOST_BIG_ENDIAN
1168     /*
1169      * Calculate the offset assuming fully little-endian,
1170      * then XOR to account for the order of the 8-byte units.
1171      */
1172     if (element_size < 8) {
1173         ofs ^= 8 - element_size;
1174     }
1175 #endif
1176     return neon_full_reg_offset(reg) + ofs;
1177 }
1178 
1179 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180 long vfp_reg_offset(bool dp, unsigned reg)
1181 {
1182     if (dp) {
1183         return neon_element_offset(reg, 0, MO_64);
1184     } else {
1185         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186     }
1187 }
1188 
1189 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190 {
1191     long off = neon_element_offset(reg, ele, memop);
1192 
1193     switch (memop) {
1194     case MO_SB:
1195         tcg_gen_ld8s_i32(dest, tcg_env, off);
1196         break;
1197     case MO_UB:
1198         tcg_gen_ld8u_i32(dest, tcg_env, off);
1199         break;
1200     case MO_SW:
1201         tcg_gen_ld16s_i32(dest, tcg_env, off);
1202         break;
1203     case MO_UW:
1204         tcg_gen_ld16u_i32(dest, tcg_env, off);
1205         break;
1206     case MO_UL:
1207     case MO_SL:
1208         tcg_gen_ld_i32(dest, tcg_env, off);
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213 }
1214 
1215 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216 {
1217     long off = neon_element_offset(reg, ele, memop);
1218 
1219     switch (memop) {
1220     case MO_SL:
1221         tcg_gen_ld32s_i64(dest, tcg_env, off);
1222         break;
1223     case MO_UL:
1224         tcg_gen_ld32u_i64(dest, tcg_env, off);
1225         break;
1226     case MO_UQ:
1227         tcg_gen_ld_i64(dest, tcg_env, off);
1228         break;
1229     default:
1230         g_assert_not_reached();
1231     }
1232 }
1233 
1234 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_8:
1240         tcg_gen_st8_i32(src, tcg_env, off);
1241         break;
1242     case MO_16:
1243         tcg_gen_st16_i32(src, tcg_env, off);
1244         break;
1245     case MO_32:
1246         tcg_gen_st_i32(src, tcg_env, off);
1247         break;
1248     default:
1249         g_assert_not_reached();
1250     }
1251 }
1252 
1253 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254 {
1255     long off = neon_element_offset(reg, ele, memop);
1256 
1257     switch (memop) {
1258     case MO_32:
1259         tcg_gen_st32_i64(src, tcg_env, off);
1260         break;
1261     case MO_64:
1262         tcg_gen_st_i64(src, tcg_env, off);
1263         break;
1264     default:
1265         g_assert_not_reached();
1266     }
1267 }
1268 
1269 #define ARM_CP_RW_BIT   (1 << 20)
1270 
1271 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272 {
1273     tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274 }
1275 
1276 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277 {
1278     tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279 }
1280 
1281 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282 {
1283     TCGv_i32 var = tcg_temp_new_i32();
1284     tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285     return var;
1286 }
1287 
1288 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289 {
1290     tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291 }
1292 
1293 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294 {
1295     iwmmxt_store_reg(cpu_M0, rn);
1296 }
1297 
1298 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299 {
1300     iwmmxt_load_reg(cpu_M0, rn);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316 {
1317     iwmmxt_load_reg(cpu_V1, rn);
1318     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319 }
1320 
1321 #define IWMMXT_OP(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV(name) \
1329 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330 { \
1331     iwmmxt_load_reg(cpu_V1, rn); \
1332     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
1333 }
1334 
1335 #define IWMMXT_OP_ENV_SIZE(name) \
1336 IWMMXT_OP_ENV(name##b) \
1337 IWMMXT_OP_ENV(name##w) \
1338 IWMMXT_OP_ENV(name##l)
1339 
1340 #define IWMMXT_OP_ENV1(name) \
1341 static inline void gen_op_iwmmxt_##name##_M0(void) \
1342 { \
1343     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
1344 }
1345 
1346 IWMMXT_OP(maddsq)
1347 IWMMXT_OP(madduq)
1348 IWMMXT_OP(sadb)
1349 IWMMXT_OP(sadw)
1350 IWMMXT_OP(mulslw)
1351 IWMMXT_OP(mulshw)
1352 IWMMXT_OP(mululw)
1353 IWMMXT_OP(muluhw)
1354 IWMMXT_OP(macsw)
1355 IWMMXT_OP(macuw)
1356 
1357 IWMMXT_OP_ENV_SIZE(unpackl)
1358 IWMMXT_OP_ENV_SIZE(unpackh)
1359 
1360 IWMMXT_OP_ENV1(unpacklub)
1361 IWMMXT_OP_ENV1(unpackluw)
1362 IWMMXT_OP_ENV1(unpacklul)
1363 IWMMXT_OP_ENV1(unpackhub)
1364 IWMMXT_OP_ENV1(unpackhuw)
1365 IWMMXT_OP_ENV1(unpackhul)
1366 IWMMXT_OP_ENV1(unpacklsb)
1367 IWMMXT_OP_ENV1(unpacklsw)
1368 IWMMXT_OP_ENV1(unpacklsl)
1369 IWMMXT_OP_ENV1(unpackhsb)
1370 IWMMXT_OP_ENV1(unpackhsw)
1371 IWMMXT_OP_ENV1(unpackhsl)
1372 
1373 IWMMXT_OP_ENV_SIZE(cmpeq)
1374 IWMMXT_OP_ENV_SIZE(cmpgtu)
1375 IWMMXT_OP_ENV_SIZE(cmpgts)
1376 
1377 IWMMXT_OP_ENV_SIZE(mins)
1378 IWMMXT_OP_ENV_SIZE(minu)
1379 IWMMXT_OP_ENV_SIZE(maxs)
1380 IWMMXT_OP_ENV_SIZE(maxu)
1381 
1382 IWMMXT_OP_ENV_SIZE(subn)
1383 IWMMXT_OP_ENV_SIZE(addn)
1384 IWMMXT_OP_ENV_SIZE(subu)
1385 IWMMXT_OP_ENV_SIZE(addu)
1386 IWMMXT_OP_ENV_SIZE(subs)
1387 IWMMXT_OP_ENV_SIZE(adds)
1388 
1389 IWMMXT_OP_ENV(avgb0)
1390 IWMMXT_OP_ENV(avgb1)
1391 IWMMXT_OP_ENV(avgw0)
1392 IWMMXT_OP_ENV(avgw1)
1393 
1394 IWMMXT_OP_ENV(packuw)
1395 IWMMXT_OP_ENV(packul)
1396 IWMMXT_OP_ENV(packuq)
1397 IWMMXT_OP_ENV(packsw)
1398 IWMMXT_OP_ENV(packsl)
1399 IWMMXT_OP_ENV(packsq)
1400 
1401 static void gen_op_iwmmxt_set_mup(void)
1402 {
1403     TCGv_i32 tmp;
1404     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405     tcg_gen_ori_i32(tmp, tmp, 2);
1406     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407 }
1408 
1409 static void gen_op_iwmmxt_set_cup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 1);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_setpsr_nz(void)
1418 {
1419     TCGv_i32 tmp = tcg_temp_new_i32();
1420     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422 }
1423 
1424 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425 {
1426     iwmmxt_load_reg(cpu_V1, rn);
1427     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429 }
1430 
1431 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                      TCGv_i32 dest)
1433 {
1434     int rd;
1435     uint32_t offset;
1436     TCGv_i32 tmp;
1437 
1438     rd = (insn >> 16) & 0xf;
1439     tmp = load_reg(s, rd);
1440 
1441     offset = (insn & 0xff) << ((insn >> 7) & 2);
1442     if (insn & (1 << 24)) {
1443         /* Pre indexed */
1444         if (insn & (1 << 23))
1445             tcg_gen_addi_i32(tmp, tmp, offset);
1446         else
1447             tcg_gen_addi_i32(tmp, tmp, -offset);
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 21)) {
1450             store_reg(s, rd, tmp);
1451         }
1452     } else if (insn & (1 << 21)) {
1453         /* Post indexed */
1454         tcg_gen_mov_i32(dest, tmp);
1455         if (insn & (1 << 23))
1456             tcg_gen_addi_i32(tmp, tmp, offset);
1457         else
1458             tcg_gen_addi_i32(tmp, tmp, -offset);
1459         store_reg(s, rd, tmp);
1460     } else if (!(insn & (1 << 23)))
1461         return 1;
1462     return 0;
1463 }
1464 
1465 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466 {
1467     int rd = (insn >> 0) & 0xf;
1468     TCGv_i32 tmp;
1469 
1470     if (insn & (1 << 8)) {
1471         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472             return 1;
1473         } else {
1474             tmp = iwmmxt_load_creg(rd);
1475         }
1476     } else {
1477         tmp = tcg_temp_new_i32();
1478         iwmmxt_load_reg(cpu_V0, rd);
1479         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480     }
1481     tcg_gen_andi_i32(tmp, tmp, mask);
1482     tcg_gen_mov_i32(dest, tmp);
1483     return 0;
1484 }
1485 
1486 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487    (ie. an undefined instruction).  */
1488 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489 {
1490     int rd, wrd;
1491     int rdhi, rdlo, rd0, rd1, i;
1492     TCGv_i32 addr;
1493     TCGv_i32 tmp, tmp2, tmp3;
1494 
1495     if ((insn & 0x0e000e00) == 0x0c000000) {
1496         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497             wrd = insn & 0xf;
1498             rdlo = (insn >> 12) & 0xf;
1499             rdhi = (insn >> 16) & 0xf;
1500             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                 iwmmxt_load_reg(cpu_V0, wrd);
1502                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504             } else {                                    /* TMCRR */
1505                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                 iwmmxt_store_reg(cpu_V0, wrd);
1507                 gen_op_iwmmxt_set_mup();
1508             }
1509             return 0;
1510         }
1511 
1512         wrd = (insn >> 12) & 0xf;
1513         addr = tcg_temp_new_i32();
1514         if (gen_iwmmxt_address(s, insn, addr)) {
1515             return 1;
1516         }
1517         if (insn & ARM_CP_RW_BIT) {
1518             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                 tmp = tcg_temp_new_i32();
1520                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                 iwmmxt_store_creg(wrd, tmp);
1522             } else {
1523                 i = 1;
1524                 if (insn & (1 << 8)) {
1525                     if (insn & (1 << 22)) {             /* WLDRD */
1526                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                         i = 0;
1528                     } else {                            /* WLDRW wRd */
1529                         tmp = tcg_temp_new_i32();
1530                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 } else {
1533                     tmp = tcg_temp_new_i32();
1534                     if (insn & (1 << 22)) {             /* WLDRH */
1535                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                     } else {                            /* WLDRB */
1537                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                     }
1539                 }
1540                 if (i) {
1541                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                 }
1543                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1544             }
1545         } else {
1546             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                 tmp = iwmmxt_load_creg(wrd);
1548                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549             } else {
1550                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                 tmp = tcg_temp_new_i32();
1552                 if (insn & (1 << 8)) {
1553                     if (insn & (1 << 22)) {             /* WSTRD */
1554                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                     } else {                            /* WSTRW wRd */
1556                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                     }
1559                 } else {
1560                     if (insn & (1 << 22)) {             /* WSTRH */
1561                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                     } else {                            /* WSTRB */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 }
1568             }
1569         }
1570         return 0;
1571     }
1572 
1573     if ((insn & 0x0f000000) != 0x0e000000)
1574         return 1;
1575 
1576     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577     case 0x000:                                                 /* WOR */
1578         wrd = (insn >> 12) & 0xf;
1579         rd0 = (insn >> 0) & 0xf;
1580         rd1 = (insn >> 16) & 0xf;
1581         gen_op_iwmmxt_movq_M0_wRn(rd0);
1582         gen_op_iwmmxt_orq_M0_wRn(rd1);
1583         gen_op_iwmmxt_setpsr_nz();
1584         gen_op_iwmmxt_movq_wRn_M0(wrd);
1585         gen_op_iwmmxt_set_mup();
1586         gen_op_iwmmxt_set_cup();
1587         break;
1588     case 0x011:                                                 /* TMCR */
1589         if (insn & 0xf)
1590             return 1;
1591         rd = (insn >> 12) & 0xf;
1592         wrd = (insn >> 16) & 0xf;
1593         switch (wrd) {
1594         case ARM_IWMMXT_wCID:
1595         case ARM_IWMMXT_wCASF:
1596             break;
1597         case ARM_IWMMXT_wCon:
1598             gen_op_iwmmxt_set_cup();
1599             /* Fall through.  */
1600         case ARM_IWMMXT_wCSSF:
1601             tmp = iwmmxt_load_creg(wrd);
1602             tmp2 = load_reg(s, rd);
1603             tcg_gen_andc_i32(tmp, tmp, tmp2);
1604             iwmmxt_store_creg(wrd, tmp);
1605             break;
1606         case ARM_IWMMXT_wCGR0:
1607         case ARM_IWMMXT_wCGR1:
1608         case ARM_IWMMXT_wCGR2:
1609         case ARM_IWMMXT_wCGR3:
1610             gen_op_iwmmxt_set_cup();
1611             tmp = load_reg(s, rd);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         default:
1615             return 1;
1616         }
1617         break;
1618     case 0x100:                                                 /* WXOR */
1619         wrd = (insn >> 12) & 0xf;
1620         rd0 = (insn >> 0) & 0xf;
1621         rd1 = (insn >> 16) & 0xf;
1622         gen_op_iwmmxt_movq_M0_wRn(rd0);
1623         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624         gen_op_iwmmxt_setpsr_nz();
1625         gen_op_iwmmxt_movq_wRn_M0(wrd);
1626         gen_op_iwmmxt_set_mup();
1627         gen_op_iwmmxt_set_cup();
1628         break;
1629     case 0x111:                                                 /* TMRC */
1630         if (insn & 0xf)
1631             return 1;
1632         rd = (insn >> 12) & 0xf;
1633         wrd = (insn >> 16) & 0xf;
1634         tmp = iwmmxt_load_creg(wrd);
1635         store_reg(s, rd, tmp);
1636         break;
1637     case 0x300:                                                 /* WANDN */
1638         wrd = (insn >> 12) & 0xf;
1639         rd0 = (insn >> 0) & 0xf;
1640         rd1 = (insn >> 16) & 0xf;
1641         gen_op_iwmmxt_movq_M0_wRn(rd0);
1642         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643         gen_op_iwmmxt_andq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x200:                                                 /* WAND */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 0) & 0xf;
1652         rd1 = (insn >> 16) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         gen_op_iwmmxt_andq_M0_wRn(rd1);
1655         gen_op_iwmmxt_setpsr_nz();
1656         gen_op_iwmmxt_movq_wRn_M0(wrd);
1657         gen_op_iwmmxt_set_mup();
1658         gen_op_iwmmxt_set_cup();
1659         break;
1660     case 0x810: case 0xa10:                             /* WMADD */
1661         wrd = (insn >> 12) & 0xf;
1662         rd0 = (insn >> 0) & 0xf;
1663         rd1 = (insn >> 16) & 0xf;
1664         gen_op_iwmmxt_movq_M0_wRn(rd0);
1665         if (insn & (1 << 21))
1666             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667         else
1668             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669         gen_op_iwmmxt_movq_wRn_M0(wrd);
1670         gen_op_iwmmxt_set_mup();
1671         break;
1672     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673         wrd = (insn >> 12) & 0xf;
1674         rd0 = (insn >> 16) & 0xf;
1675         rd1 = (insn >> 0) & 0xf;
1676         gen_op_iwmmxt_movq_M0_wRn(rd0);
1677         switch ((insn >> 22) & 3) {
1678         case 0:
1679             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680             break;
1681         case 1:
1682             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683             break;
1684         case 2:
1685             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686             break;
1687         case 3:
1688             return 1;
1689         }
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 16) & 0xf;
1697         rd1 = (insn >> 0) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         switch ((insn >> 22) & 3) {
1700         case 0:
1701             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702             break;
1703         case 1:
1704             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705             break;
1706         case 2:
1707             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708             break;
1709         case 3:
1710             return 1;
1711         }
1712         gen_op_iwmmxt_movq_wRn_M0(wrd);
1713         gen_op_iwmmxt_set_mup();
1714         gen_op_iwmmxt_set_cup();
1715         break;
1716     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717         wrd = (insn >> 12) & 0xf;
1718         rd0 = (insn >> 16) & 0xf;
1719         rd1 = (insn >> 0) & 0xf;
1720         gen_op_iwmmxt_movq_M0_wRn(rd0);
1721         if (insn & (1 << 22))
1722             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723         else
1724             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725         if (!(insn & (1 << 20)))
1726             gen_op_iwmmxt_addl_M0_wRn(wrd);
1727         gen_op_iwmmxt_movq_wRn_M0(wrd);
1728         gen_op_iwmmxt_set_mup();
1729         break;
1730     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731         wrd = (insn >> 12) & 0xf;
1732         rd0 = (insn >> 16) & 0xf;
1733         rd1 = (insn >> 0) & 0xf;
1734         gen_op_iwmmxt_movq_M0_wRn(rd0);
1735         if (insn & (1 << 21)) {
1736             if (insn & (1 << 20))
1737                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738             else
1739                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740         } else {
1741             if (insn & (1 << 20))
1742                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743             else
1744                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745         }
1746         gen_op_iwmmxt_movq_wRn_M0(wrd);
1747         gen_op_iwmmxt_set_mup();
1748         break;
1749     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750         wrd = (insn >> 12) & 0xf;
1751         rd0 = (insn >> 16) & 0xf;
1752         rd1 = (insn >> 0) & 0xf;
1753         gen_op_iwmmxt_movq_M0_wRn(rd0);
1754         if (insn & (1 << 21))
1755             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756         else
1757             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758         if (!(insn & (1 << 20))) {
1759             iwmmxt_load_reg(cpu_V1, wrd);
1760             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761         }
1762         gen_op_iwmmxt_movq_wRn_M0(wrd);
1763         gen_op_iwmmxt_set_mup();
1764         break;
1765     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766         wrd = (insn >> 12) & 0xf;
1767         rd0 = (insn >> 16) & 0xf;
1768         rd1 = (insn >> 0) & 0xf;
1769         gen_op_iwmmxt_movq_M0_wRn(rd0);
1770         switch ((insn >> 22) & 3) {
1771         case 0:
1772             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773             break;
1774         case 1:
1775             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776             break;
1777         case 2:
1778             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779             break;
1780         case 3:
1781             return 1;
1782         }
1783         gen_op_iwmmxt_movq_wRn_M0(wrd);
1784         gen_op_iwmmxt_set_mup();
1785         gen_op_iwmmxt_set_cup();
1786         break;
1787     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 22)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813         tcg_gen_andi_i32(tmp, tmp, 7);
1814         iwmmxt_load_reg(cpu_V1, rd1);
1815         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816         gen_op_iwmmxt_movq_wRn_M0(wrd);
1817         gen_op_iwmmxt_set_mup();
1818         break;
1819     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820         if (((insn >> 6) & 3) == 3)
1821             return 1;
1822         rd = (insn >> 12) & 0xf;
1823         wrd = (insn >> 16) & 0xf;
1824         tmp = load_reg(s, rd);
1825         gen_op_iwmmxt_movq_M0_wRn(wrd);
1826         switch ((insn >> 6) & 3) {
1827         case 0:
1828             tmp2 = tcg_constant_i32(0xff);
1829             tmp3 = tcg_constant_i32((insn & 7) << 3);
1830             break;
1831         case 1:
1832             tmp2 = tcg_constant_i32(0xffff);
1833             tmp3 = tcg_constant_i32((insn & 3) << 4);
1834             break;
1835         case 2:
1836             tmp2 = tcg_constant_i32(0xffffffff);
1837             tmp3 = tcg_constant_i32((insn & 1) << 5);
1838             break;
1839         default:
1840             g_assert_not_reached();
1841         }
1842         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         break;
1846     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847         rd = (insn >> 12) & 0xf;
1848         wrd = (insn >> 16) & 0xf;
1849         if (rd == 15 || ((insn >> 22) & 3) == 3)
1850             return 1;
1851         gen_op_iwmmxt_movq_M0_wRn(wrd);
1852         tmp = tcg_temp_new_i32();
1853         switch ((insn >> 22) & 3) {
1854         case 0:
1855             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857             if (insn & 8) {
1858                 tcg_gen_ext8s_i32(tmp, tmp);
1859             } else {
1860                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1861             }
1862             break;
1863         case 1:
1864             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866             if (insn & 8) {
1867                 tcg_gen_ext16s_i32(tmp, tmp);
1868             } else {
1869                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870             }
1871             break;
1872         case 2:
1873             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875             break;
1876         }
1877         store_reg(s, rd, tmp);
1878         break;
1879     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881             return 1;
1882         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883         switch ((insn >> 22) & 3) {
1884         case 0:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886             break;
1887         case 1:
1888             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889             break;
1890         case 2:
1891             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892             break;
1893         }
1894         tcg_gen_shli_i32(tmp, tmp, 28);
1895         gen_set_nzcv(tmp);
1896         break;
1897     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898         if (((insn >> 6) & 3) == 3)
1899             return 1;
1900         rd = (insn >> 12) & 0xf;
1901         wrd = (insn >> 16) & 0xf;
1902         tmp = load_reg(s, rd);
1903         switch ((insn >> 6) & 3) {
1904         case 0:
1905             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906             break;
1907         case 1:
1908             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909             break;
1910         case 2:
1911             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912             break;
1913         }
1914         gen_op_iwmmxt_movq_wRn_M0(wrd);
1915         gen_op_iwmmxt_set_mup();
1916         break;
1917     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919             return 1;
1920         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921         tmp2 = tcg_temp_new_i32();
1922         tcg_gen_mov_i32(tmp2, tmp);
1923         switch ((insn >> 22) & 3) {
1924         case 0:
1925             for (i = 0; i < 7; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 1:
1931             for (i = 0; i < 3; i ++) {
1932                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                 tcg_gen_and_i32(tmp, tmp, tmp2);
1934             }
1935             break;
1936         case 2:
1937             tcg_gen_shli_i32(tmp2, tmp2, 16);
1938             tcg_gen_and_i32(tmp, tmp, tmp2);
1939             break;
1940         }
1941         gen_set_nzcv(tmp);
1942         break;
1943     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944         wrd = (insn >> 12) & 0xf;
1945         rd0 = (insn >> 16) & 0xf;
1946         gen_op_iwmmxt_movq_M0_wRn(rd0);
1947         switch ((insn >> 22) & 3) {
1948         case 0:
1949             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950             break;
1951         case 1:
1952             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953             break;
1954         case 2:
1955             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956             break;
1957         case 3:
1958             return 1;
1959         }
1960         gen_op_iwmmxt_movq_wRn_M0(wrd);
1961         gen_op_iwmmxt_set_mup();
1962         break;
1963     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965             return 1;
1966         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967         tmp2 = tcg_temp_new_i32();
1968         tcg_gen_mov_i32(tmp2, tmp);
1969         switch ((insn >> 22) & 3) {
1970         case 0:
1971             for (i = 0; i < 7; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 1:
1977             for (i = 0; i < 3; i ++) {
1978                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                 tcg_gen_or_i32(tmp, tmp, tmp2);
1980             }
1981             break;
1982         case 2:
1983             tcg_gen_shli_i32(tmp2, tmp2, 16);
1984             tcg_gen_or_i32(tmp, tmp, tmp2);
1985             break;
1986         }
1987         gen_set_nzcv(tmp);
1988         break;
1989     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990         rd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993             return 1;
1994         gen_op_iwmmxt_movq_M0_wRn(rd0);
1995         tmp = tcg_temp_new_i32();
1996         switch ((insn >> 22) & 3) {
1997         case 0:
1998             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999             break;
2000         case 1:
2001             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002             break;
2003         case 2:
2004             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005             break;
2006         }
2007         store_reg(s, rd, tmp);
2008         break;
2009     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011         wrd = (insn >> 12) & 0xf;
2012         rd0 = (insn >> 16) & 0xf;
2013         rd1 = (insn >> 0) & 0xf;
2014         gen_op_iwmmxt_movq_M0_wRn(rd0);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021             break;
2022         case 1:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027             break;
2028         case 2:
2029             if (insn & (1 << 21))
2030                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031             else
2032                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033             break;
2034         case 3:
2035             return 1;
2036         }
2037         gen_op_iwmmxt_movq_wRn_M0(wrd);
2038         gen_op_iwmmxt_set_mup();
2039         gen_op_iwmmxt_set_cup();
2040         break;
2041     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043         wrd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsb_M0();
2050             else
2051                 gen_op_iwmmxt_unpacklub_M0();
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsw_M0();
2056             else
2057                 gen_op_iwmmxt_unpackluw_M0();
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_unpacklsl_M0();
2062             else
2063                 gen_op_iwmmxt_unpacklul_M0();
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhuw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpackhsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpackhul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104     case 0x214: case 0x614: case 0xa14: case 0xe14:
2105         if (((insn >> 22) & 3) == 0)
2106             return 1;
2107         wrd = (insn >> 12) & 0xf;
2108         rd0 = (insn >> 16) & 0xf;
2109         gen_op_iwmmxt_movq_M0_wRn(rd0);
2110         tmp = tcg_temp_new_i32();
2111         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112             return 1;
2113         }
2114         switch ((insn >> 22) & 3) {
2115         case 1:
2116             gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
2117             break;
2118         case 2:
2119             gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
2120             break;
2121         case 3:
2122             gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
2123             break;
2124         }
2125         gen_op_iwmmxt_movq_wRn_M0(wrd);
2126         gen_op_iwmmxt_set_mup();
2127         gen_op_iwmmxt_set_cup();
2128         break;
2129     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130     case 0x014: case 0x414: case 0x814: case 0xc14:
2131         if (((insn >> 22) & 3) == 0)
2132             return 1;
2133         wrd = (insn >> 12) & 0xf;
2134         rd0 = (insn >> 16) & 0xf;
2135         gen_op_iwmmxt_movq_M0_wRn(rd0);
2136         tmp = tcg_temp_new_i32();
2137         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138             return 1;
2139         }
2140         switch ((insn >> 22) & 3) {
2141         case 1:
2142             gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
2143             break;
2144         case 2:
2145             gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
2146             break;
2147         case 3:
2148             gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
2149             break;
2150         }
2151         gen_op_iwmmxt_movq_wRn_M0(wrd);
2152         gen_op_iwmmxt_set_mup();
2153         gen_op_iwmmxt_set_cup();
2154         break;
2155     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156     case 0x114: case 0x514: case 0x914: case 0xd14:
2157         if (((insn >> 22) & 3) == 0)
2158             return 1;
2159         wrd = (insn >> 12) & 0xf;
2160         rd0 = (insn >> 16) & 0xf;
2161         gen_op_iwmmxt_movq_M0_wRn(rd0);
2162         tmp = tcg_temp_new_i32();
2163         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164             return 1;
2165         }
2166         switch ((insn >> 22) & 3) {
2167         case 1:
2168             gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
2169             break;
2170         case 2:
2171             gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
2172             break;
2173         case 3:
2174             gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
2175             break;
2176         }
2177         gen_op_iwmmxt_movq_wRn_M0(wrd);
2178         gen_op_iwmmxt_set_mup();
2179         gen_op_iwmmxt_set_cup();
2180         break;
2181     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182     case 0x314: case 0x714: case 0xb14: case 0xf14:
2183         if (((insn >> 22) & 3) == 0)
2184             return 1;
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         gen_op_iwmmxt_movq_M0_wRn(rd0);
2188         tmp = tcg_temp_new_i32();
2189         switch ((insn >> 22) & 3) {
2190         case 1:
2191             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
2195             break;
2196         case 2:
2197             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
2201             break;
2202         case 3:
2203             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                 return 1;
2205             }
2206             gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
2207             break;
2208         }
2209         gen_op_iwmmxt_movq_wRn_M0(wrd);
2210         gen_op_iwmmxt_set_mup();
2211         gen_op_iwmmxt_set_cup();
2212         break;
2213     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215         wrd = (insn >> 12) & 0xf;
2216         rd0 = (insn >> 16) & 0xf;
2217         rd1 = (insn >> 0) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         switch ((insn >> 22) & 3) {
2220         case 0:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2225             break;
2226         case 1:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231             break;
2232         case 2:
2233             if (insn & (1 << 21))
2234                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235             else
2236                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2237             break;
2238         case 3:
2239             return 1;
2240         }
2241         gen_op_iwmmxt_movq_wRn_M0(wrd);
2242         gen_op_iwmmxt_set_mup();
2243         break;
2244     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246         wrd = (insn >> 12) & 0xf;
2247         rd0 = (insn >> 16) & 0xf;
2248         rd1 = (insn >> 0) & 0xf;
2249         gen_op_iwmmxt_movq_M0_wRn(rd0);
2250         switch ((insn >> 22) & 3) {
2251         case 0:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256             break;
2257         case 1:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262             break;
2263         case 2:
2264             if (insn & (1 << 21))
2265                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266             else
2267                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268             break;
2269         case 3:
2270             return 1;
2271         }
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         break;
2275     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276     case 0x402: case 0x502: case 0x602: case 0x702:
2277         wrd = (insn >> 12) & 0xf;
2278         rd0 = (insn >> 16) & 0xf;
2279         rd1 = (insn >> 0) & 0xf;
2280         gen_op_iwmmxt_movq_M0_wRn(rd0);
2281         iwmmxt_load_reg(cpu_V1, rd1);
2282         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                 tcg_constant_i32((insn >> 20) & 3));
2284         gen_op_iwmmxt_movq_wRn_M0(wrd);
2285         gen_op_iwmmxt_set_mup();
2286         break;
2287     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291         wrd = (insn >> 12) & 0xf;
2292         rd0 = (insn >> 16) & 0xf;
2293         rd1 = (insn >> 0) & 0xf;
2294         gen_op_iwmmxt_movq_M0_wRn(rd0);
2295         switch ((insn >> 20) & 0xf) {
2296         case 0x0:
2297             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298             break;
2299         case 0x1:
2300             gen_op_iwmmxt_subub_M0_wRn(rd1);
2301             break;
2302         case 0x3:
2303             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304             break;
2305         case 0x4:
2306             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307             break;
2308         case 0x5:
2309             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310             break;
2311         case 0x7:
2312             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313             break;
2314         case 0x8:
2315             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316             break;
2317         case 0x9:
2318             gen_op_iwmmxt_subul_M0_wRn(rd1);
2319             break;
2320         case 0xb:
2321             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322             break;
2323         default:
2324             return 1;
2325         }
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         gen_op_iwmmxt_set_cup();
2329         break;
2330     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338         gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
2339         gen_op_iwmmxt_movq_wRn_M0(wrd);
2340         gen_op_iwmmxt_set_mup();
2341         gen_op_iwmmxt_set_cup();
2342         break;
2343     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344     case 0x418: case 0x518: case 0x618: case 0x718:
2345     case 0x818: case 0x918: case 0xa18: case 0xb18:
2346     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347         wrd = (insn >> 12) & 0xf;
2348         rd0 = (insn >> 16) & 0xf;
2349         rd1 = (insn >> 0) & 0xf;
2350         gen_op_iwmmxt_movq_M0_wRn(rd0);
2351         switch ((insn >> 20) & 0xf) {
2352         case 0x0:
2353             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354             break;
2355         case 0x1:
2356             gen_op_iwmmxt_addub_M0_wRn(rd1);
2357             break;
2358         case 0x3:
2359             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360             break;
2361         case 0x4:
2362             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363             break;
2364         case 0x5:
2365             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366             break;
2367         case 0x7:
2368             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369             break;
2370         case 0x8:
2371             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372             break;
2373         case 0x9:
2374             gen_op_iwmmxt_addul_M0_wRn(rd1);
2375             break;
2376         case 0xb:
2377             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378             break;
2379         default:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387     case 0x408: case 0x508: case 0x608: case 0x708:
2388     case 0x808: case 0x908: case 0xa08: case 0xb08:
2389     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391             return 1;
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         rd1 = (insn >> 0) & 0xf;
2395         gen_op_iwmmxt_movq_M0_wRn(rd0);
2396         switch ((insn >> 22) & 3) {
2397         case 1:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402             break;
2403         case 2:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2408             break;
2409         case 3:
2410             if (insn & (1 << 21))
2411                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412             else
2413                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414             break;
2415         }
2416         gen_op_iwmmxt_movq_wRn_M0(wrd);
2417         gen_op_iwmmxt_set_mup();
2418         gen_op_iwmmxt_set_cup();
2419         break;
2420     case 0x201: case 0x203: case 0x205: case 0x207:
2421     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422     case 0x211: case 0x213: case 0x215: case 0x217:
2423     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424         wrd = (insn >> 5) & 0xf;
2425         rd0 = (insn >> 12) & 0xf;
2426         rd1 = (insn >> 0) & 0xf;
2427         if (rd0 == 0xf || rd1 == 0xf)
2428             return 1;
2429         gen_op_iwmmxt_movq_M0_wRn(wrd);
2430         tmp = load_reg(s, rd0);
2431         tmp2 = load_reg(s, rd1);
2432         switch ((insn >> 16) & 0xf) {
2433         case 0x0:                                       /* TMIA */
2434             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435             break;
2436         case 0x8:                                       /* TMIAPH */
2437             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440             if (insn & (1 << 16))
2441                 tcg_gen_shri_i32(tmp, tmp, 16);
2442             if (insn & (1 << 17))
2443                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2444             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445             break;
2446         default:
2447             return 1;
2448         }
2449         gen_op_iwmmxt_movq_wRn_M0(wrd);
2450         gen_op_iwmmxt_set_mup();
2451         break;
2452     default:
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460    (ie. an undefined instruction).  */
2461 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462 {
2463     int acc, rd0, rd1, rdhi, rdlo;
2464     TCGv_i32 tmp, tmp2;
2465 
2466     if ((insn & 0x0ff00f10) == 0x0e200010) {
2467         /* Multiply with Internal Accumulate Format */
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = insn & 0xf;
2470         acc = (insn >> 5) & 7;
2471 
2472         if (acc != 0)
2473             return 1;
2474 
2475         tmp = load_reg(s, rd0);
2476         tmp2 = load_reg(s, rd1);
2477         switch ((insn >> 16) & 0xf) {
2478         case 0x0:                                       /* MIA */
2479             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480             break;
2481         case 0x8:                                       /* MIAPH */
2482             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483             break;
2484         case 0xc:                                       /* MIABB */
2485         case 0xd:                                       /* MIABT */
2486         case 0xe:                                       /* MIATB */
2487         case 0xf:                                       /* MIATT */
2488             if (insn & (1 << 16))
2489                 tcg_gen_shri_i32(tmp, tmp, 16);
2490             if (insn & (1 << 17))
2491                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2492             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         default:
2495             return 1;
2496         }
2497 
2498         gen_op_iwmmxt_movq_wRn_M0(acc);
2499         return 0;
2500     }
2501 
2502     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503         /* Internal Accumulator Access Format */
2504         rdhi = (insn >> 16) & 0xf;
2505         rdlo = (insn >> 12) & 0xf;
2506         acc = insn & 7;
2507 
2508         if (acc != 0)
2509             return 1;
2510 
2511         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512             iwmmxt_load_reg(cpu_V0, acc);
2513             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516         } else {                                        /* MAR */
2517             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518             iwmmxt_store_reg(cpu_V0, acc);
2519         }
2520         return 0;
2521     }
2522 
2523     return 1;
2524 }
2525 
2526 static void gen_goto_ptr(void)
2527 {
2528     tcg_gen_lookup_and_goto_ptr();
2529 }
2530 
2531 /* This will end the TB but doesn't guarantee we'll return to
2532  * cpu_loop_exec. Any live exit_requests will be processed as we
2533  * enter the next TB.
2534  */
2535 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536 {
2537     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538         /*
2539          * For pcrel, the pc must always be up-to-date on entry to
2540          * the linked TB, so that it can use simple additions for all
2541          * further adjustments.  For !pcrel, the linked TB is compiled
2542          * to know its full virtual address, so we can delay the
2543          * update to pc to the unlinked path.  A long chain of links
2544          * can thus avoid many updates to the PC.
2545          */
2546         if (tb_cflags(s->base.tb) & CF_PCREL) {
2547             gen_update_pc(s, diff);
2548             tcg_gen_goto_tb(n);
2549         } else {
2550             tcg_gen_goto_tb(n);
2551             gen_update_pc(s, diff);
2552         }
2553         tcg_gen_exit_tb(s->base.tb, n);
2554     } else {
2555         gen_update_pc(s, diff);
2556         gen_goto_ptr();
2557     }
2558     s->base.is_jmp = DISAS_NORETURN;
2559 }
2560 
2561 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2562 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563 {
2564     if (unlikely(s->ss_active)) {
2565         /* An indirect jump so that we still trigger the debug exception.  */
2566         gen_update_pc(s, diff);
2567         s->base.is_jmp = DISAS_JUMP;
2568         return;
2569     }
2570     switch (s->base.is_jmp) {
2571     case DISAS_NEXT:
2572     case DISAS_TOO_MANY:
2573     case DISAS_NORETURN:
2574         /*
2575          * The normal case: just go to the destination TB.
2576          * NB: NORETURN happens if we generate code like
2577          *    gen_brcondi(l);
2578          *    gen_jmp();
2579          *    gen_set_label(l);
2580          *    gen_jmp();
2581          * on the second call to gen_jmp().
2582          */
2583         gen_goto_tb(s, tbno, diff);
2584         break;
2585     case DISAS_UPDATE_NOCHAIN:
2586     case DISAS_UPDATE_EXIT:
2587         /*
2588          * We already decided we're leaving the TB for some other reason.
2589          * Avoid using goto_tb so we really do exit back to the main loop
2590          * and don't chain to another TB.
2591          */
2592         gen_update_pc(s, diff);
2593         gen_goto_ptr();
2594         s->base.is_jmp = DISAS_NORETURN;
2595         break;
2596     default:
2597         /*
2598          * We shouldn't be emitting code for a jump and also have
2599          * is_jmp set to one of the special cases like DISAS_SWI.
2600          */
2601         g_assert_not_reached();
2602     }
2603 }
2604 
2605 static inline void gen_jmp(DisasContext *s, target_long diff)
2606 {
2607     gen_jmp_tb(s, diff, 0);
2608 }
2609 
2610 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611 {
2612     if (x)
2613         tcg_gen_sari_i32(t0, t0, 16);
2614     else
2615         gen_sxth(t0);
2616     if (y)
2617         tcg_gen_sari_i32(t1, t1, 16);
2618     else
2619         gen_sxth(t1);
2620     tcg_gen_mul_i32(t0, t0, t1);
2621 }
2622 
2623 /* Return the mask of PSR bits set by a MSR instruction.  */
2624 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625 {
2626     uint32_t mask = 0;
2627 
2628     if (flags & (1 << 0)) {
2629         mask |= 0xff;
2630     }
2631     if (flags & (1 << 1)) {
2632         mask |= 0xff00;
2633     }
2634     if (flags & (1 << 2)) {
2635         mask |= 0xff0000;
2636     }
2637     if (flags & (1 << 3)) {
2638         mask |= 0xff000000;
2639     }
2640 
2641     /* Mask out undefined and reserved bits.  */
2642     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643 
2644     /* Mask out execution state.  */
2645     if (!spsr) {
2646         mask &= ~CPSR_EXEC;
2647     }
2648 
2649     /* Mask out privileged bits.  */
2650     if (IS_USER(s)) {
2651         mask &= CPSR_USER;
2652     }
2653     return mask;
2654 }
2655 
2656 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658 {
2659     TCGv_i32 tmp;
2660     if (spsr) {
2661         /* ??? This is also undefined in system mode.  */
2662         if (IS_USER(s))
2663             return 1;
2664 
2665         tmp = load_cpu_field(spsr);
2666         tcg_gen_andi_i32(tmp, tmp, ~mask);
2667         tcg_gen_andi_i32(t0, t0, mask);
2668         tcg_gen_or_i32(tmp, tmp, t0);
2669         store_cpu_field(tmp, spsr);
2670     } else {
2671         gen_set_cpsr(t0, mask);
2672     }
2673     gen_lookup_tb(s);
2674     return 0;
2675 }
2676 
2677 /* Returns nonzero if access to the PSR is not permitted.  */
2678 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679 {
2680     TCGv_i32 tmp;
2681     tmp = tcg_temp_new_i32();
2682     tcg_gen_movi_i32(tmp, val);
2683     return gen_set_psr(s, mask, spsr, tmp);
2684 }
2685 
2686 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                      int *tgtmode, int *regno)
2688 {
2689     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690      * the target mode and register number, and identify the various
2691      * unpredictable cases.
2692      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693      *  + executed in user mode
2694      *  + using R15 as the src/dest register
2695      *  + accessing an unimplemented register
2696      *  + accessing a register that's inaccessible at current PL/security state*
2697      *  + accessing a register that you could access with a different insn
2698      * We choose to UNDEF in all these cases.
2699      * Since we don't know which of the various AArch32 modes we are in
2700      * we have to defer some checks to runtime.
2701      * Accesses to Monitor mode registers from Secure EL1 (which implies
2702      * that EL3 is AArch64) must trap to EL3.
2703      *
2704      * If the access checks fail this function will emit code to take
2705      * an exception and return false. Otherwise it will return true,
2706      * and set *tgtmode and *regno appropriately.
2707      */
2708     /* These instructions are present only in ARMv8, or in ARMv7 with the
2709      * Virtualization Extensions.
2710      */
2711     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713         goto undef;
2714     }
2715 
2716     if (IS_USER(s) || rn == 15) {
2717         goto undef;
2718     }
2719 
2720     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721      * of registers into (r, sysm).
2722      */
2723     if (r) {
2724         /* SPSRs for other modes */
2725         switch (sysm) {
2726         case 0xe: /* SPSR_fiq */
2727             *tgtmode = ARM_CPU_MODE_FIQ;
2728             break;
2729         case 0x10: /* SPSR_irq */
2730             *tgtmode = ARM_CPU_MODE_IRQ;
2731             break;
2732         case 0x12: /* SPSR_svc */
2733             *tgtmode = ARM_CPU_MODE_SVC;
2734             break;
2735         case 0x14: /* SPSR_abt */
2736             *tgtmode = ARM_CPU_MODE_ABT;
2737             break;
2738         case 0x16: /* SPSR_und */
2739             *tgtmode = ARM_CPU_MODE_UND;
2740             break;
2741         case 0x1c: /* SPSR_mon */
2742             *tgtmode = ARM_CPU_MODE_MON;
2743             break;
2744         case 0x1e: /* SPSR_hyp */
2745             *tgtmode = ARM_CPU_MODE_HYP;
2746             break;
2747         default: /* unallocated */
2748             goto undef;
2749         }
2750         /* We arbitrarily assign SPSR a register number of 16. */
2751         *regno = 16;
2752     } else {
2753         /* general purpose registers for other modes */
2754         switch (sysm) {
2755         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756             *tgtmode = ARM_CPU_MODE_USR;
2757             *regno = sysm + 8;
2758             break;
2759         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760             *tgtmode = ARM_CPU_MODE_FIQ;
2761             *regno = sysm;
2762             break;
2763         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764             *tgtmode = ARM_CPU_MODE_IRQ;
2765             *regno = sysm & 1 ? 13 : 14;
2766             break;
2767         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768             *tgtmode = ARM_CPU_MODE_SVC;
2769             *regno = sysm & 1 ? 13 : 14;
2770             break;
2771         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772             *tgtmode = ARM_CPU_MODE_ABT;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776             *tgtmode = ARM_CPU_MODE_UND;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780             *tgtmode = ARM_CPU_MODE_MON;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784             *tgtmode = ARM_CPU_MODE_HYP;
2785             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786             *regno = sysm & 1 ? 13 : 17;
2787             break;
2788         default: /* unallocated */
2789             goto undef;
2790         }
2791     }
2792 
2793     /* Catch the 'accessing inaccessible register' cases we can detect
2794      * at translate time.
2795      */
2796     switch (*tgtmode) {
2797     case ARM_CPU_MODE_MON:
2798         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799             goto undef;
2800         }
2801         if (s->current_el == 1) {
2802             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803              * then accesses to Mon registers trap to Secure EL2, if it exists,
2804              * otherwise EL3.
2805              */
2806             TCGv_i32 tcg_el;
2807 
2808             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                 dc_isar_feature(aa64_sel2, s)) {
2810                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814             } else {
2815                 tcg_el = tcg_constant_i32(3);
2816             }
2817 
2818             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                     syn_uncategorized(), tcg_el);
2820             return false;
2821         }
2822         break;
2823     case ARM_CPU_MODE_HYP:
2824         /*
2825          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2826          * (and so we can forbid accesses from EL2 or below). elr_hyp
2827          * can be accessed also from Hyp mode, so forbid accesses from
2828          * EL0 or EL1.
2829          */
2830         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2831             (s->current_el < 3 && *regno != 17)) {
2832             goto undef;
2833         }
2834         break;
2835     default:
2836         break;
2837     }
2838 
2839     return true;
2840 
2841 undef:
2842     /* If we get here then some access check did not pass */
2843     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844     return false;
2845 }
2846 
2847 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848 {
2849     TCGv_i32 tcg_reg;
2850     int tgtmode = 0, regno = 0;
2851 
2852     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853         return;
2854     }
2855 
2856     /* Sync state because msr_banked() can raise exceptions */
2857     gen_set_condexec(s);
2858     gen_update_pc(s, 0);
2859     tcg_reg = load_reg(s, rn);
2860     gen_helper_msr_banked(tcg_env, tcg_reg,
2861                           tcg_constant_i32(tgtmode),
2862                           tcg_constant_i32(regno));
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864 }
2865 
2866 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867 {
2868     TCGv_i32 tcg_reg;
2869     int tgtmode = 0, regno = 0;
2870 
2871     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872         return;
2873     }
2874 
2875     /* Sync state because mrs_banked() can raise exceptions */
2876     gen_set_condexec(s);
2877     gen_update_pc(s, 0);
2878     tcg_reg = tcg_temp_new_i32();
2879     gen_helper_mrs_banked(tcg_reg, tcg_env,
2880                           tcg_constant_i32(tgtmode),
2881                           tcg_constant_i32(regno));
2882     store_reg(s, rn, tcg_reg);
2883     s->base.is_jmp = DISAS_UPDATE_EXIT;
2884 }
2885 
2886 /* Store value to PC as for an exception return (ie don't
2887  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888  * will do the masking based on the new value of the Thumb bit.
2889  */
2890 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891 {
2892     tcg_gen_mov_i32(cpu_R[15], pc);
2893 }
2894 
2895 /* Generate a v6 exception return.  Marks both values as dead.  */
2896 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897 {
2898     store_pc_exc_ret(s, pc);
2899     /* The cpsr_write_eret helper will mask the low bits of PC
2900      * appropriately depending on the new Thumb bit, so it must
2901      * be called after storing the new PC.
2902      */
2903     translator_io_start(&s->base);
2904     gen_helper_cpsr_write_eret(tcg_env, cpsr);
2905     /* Must exit loop to check un-masked IRQs */
2906     s->base.is_jmp = DISAS_EXIT;
2907 }
2908 
2909 /* Generate an old-style exception return. Marks pc as dead. */
2910 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911 {
2912     gen_rfe(s, pc, load_cpu_field(spsr));
2913 }
2914 
2915 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                             uint32_t opr_sz, uint32_t max_sz,
2917                             gen_helper_gvec_3_ptr *fn)
2918 {
2919     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920 
2921     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
2922     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                        opr_sz, max_sz, 0, fn);
2924 }
2925 
2926 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928 {
2929     static gen_helper_gvec_3_ptr * const fns[2] = {
2930         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931     };
2932     tcg_debug_assert(vece >= 1 && vece <= 2);
2933     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934 }
2935 
2936 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 #define GEN_CMP0(NAME, COND)                              \
2947     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
2948               uint32_t opr_sz, uint32_t max_sz)           \
2949     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
2950 
2951 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
2952 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
2953 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
2954 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
2955 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
2956 
2957 #undef GEN_CMP0
2958 
2959 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2960 {
2961     tcg_gen_vec_sar8i_i64(a, a, shift);
2962     tcg_gen_vec_add8_i64(d, d, a);
2963 }
2964 
2965 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2966 {
2967     tcg_gen_vec_sar16i_i64(a, a, shift);
2968     tcg_gen_vec_add16_i64(d, d, a);
2969 }
2970 
2971 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2972 {
2973     tcg_gen_sari_i32(a, a, shift);
2974     tcg_gen_add_i32(d, d, a);
2975 }
2976 
2977 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2978 {
2979     tcg_gen_sari_i64(a, a, shift);
2980     tcg_gen_add_i64(d, d, a);
2981 }
2982 
2983 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2984 {
2985     tcg_gen_sari_vec(vece, a, a, sh);
2986     tcg_gen_add_vec(vece, d, d, a);
2987 }
2988 
2989 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2990                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2991 {
2992     static const TCGOpcode vecop_list[] = {
2993         INDEX_op_sari_vec, INDEX_op_add_vec, 0
2994     };
2995     static const GVecGen2i ops[4] = {
2996         { .fni8 = gen_ssra8_i64,
2997           .fniv = gen_ssra_vec,
2998           .fno = gen_helper_gvec_ssra_b,
2999           .load_dest = true,
3000           .opt_opc = vecop_list,
3001           .vece = MO_8 },
3002         { .fni8 = gen_ssra16_i64,
3003           .fniv = gen_ssra_vec,
3004           .fno = gen_helper_gvec_ssra_h,
3005           .load_dest = true,
3006           .opt_opc = vecop_list,
3007           .vece = MO_16 },
3008         { .fni4 = gen_ssra32_i32,
3009           .fniv = gen_ssra_vec,
3010           .fno = gen_helper_gvec_ssra_s,
3011           .load_dest = true,
3012           .opt_opc = vecop_list,
3013           .vece = MO_32 },
3014         { .fni8 = gen_ssra64_i64,
3015           .fniv = gen_ssra_vec,
3016           .fno = gen_helper_gvec_ssra_d,
3017           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3018           .opt_opc = vecop_list,
3019           .load_dest = true,
3020           .vece = MO_64 },
3021     };
3022 
3023     /* tszimm encoding produces immediates in the range [1..esize]. */
3024     tcg_debug_assert(shift > 0);
3025     tcg_debug_assert(shift <= (8 << vece));
3026 
3027     /*
3028      * Shifts larger than the element size are architecturally valid.
3029      * Signed results in all sign bits.
3030      */
3031     shift = MIN(shift, (8 << vece) - 1);
3032     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3033 }
3034 
3035 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3036 {
3037     tcg_gen_vec_shr8i_i64(a, a, shift);
3038     tcg_gen_vec_add8_i64(d, d, a);
3039 }
3040 
3041 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3042 {
3043     tcg_gen_vec_shr16i_i64(a, a, shift);
3044     tcg_gen_vec_add16_i64(d, d, a);
3045 }
3046 
3047 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3048 {
3049     tcg_gen_shri_i32(a, a, shift);
3050     tcg_gen_add_i32(d, d, a);
3051 }
3052 
3053 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3054 {
3055     tcg_gen_shri_i64(a, a, shift);
3056     tcg_gen_add_i64(d, d, a);
3057 }
3058 
3059 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3060 {
3061     tcg_gen_shri_vec(vece, a, a, sh);
3062     tcg_gen_add_vec(vece, d, d, a);
3063 }
3064 
3065 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3066                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3067 {
3068     static const TCGOpcode vecop_list[] = {
3069         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3070     };
3071     static const GVecGen2i ops[4] = {
3072         { .fni8 = gen_usra8_i64,
3073           .fniv = gen_usra_vec,
3074           .fno = gen_helper_gvec_usra_b,
3075           .load_dest = true,
3076           .opt_opc = vecop_list,
3077           .vece = MO_8, },
3078         { .fni8 = gen_usra16_i64,
3079           .fniv = gen_usra_vec,
3080           .fno = gen_helper_gvec_usra_h,
3081           .load_dest = true,
3082           .opt_opc = vecop_list,
3083           .vece = MO_16, },
3084         { .fni4 = gen_usra32_i32,
3085           .fniv = gen_usra_vec,
3086           .fno = gen_helper_gvec_usra_s,
3087           .load_dest = true,
3088           .opt_opc = vecop_list,
3089           .vece = MO_32, },
3090         { .fni8 = gen_usra64_i64,
3091           .fniv = gen_usra_vec,
3092           .fno = gen_helper_gvec_usra_d,
3093           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3094           .load_dest = true,
3095           .opt_opc = vecop_list,
3096           .vece = MO_64, },
3097     };
3098 
3099     /* tszimm encoding produces immediates in the range [1..esize]. */
3100     tcg_debug_assert(shift > 0);
3101     tcg_debug_assert(shift <= (8 << vece));
3102 
3103     /*
3104      * Shifts larger than the element size are architecturally valid.
3105      * Unsigned results in all zeros as input to accumulate: nop.
3106      */
3107     if (shift < (8 << vece)) {
3108         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3109     } else {
3110         /* Nop, but we do need to clear the tail. */
3111         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3112     }
3113 }
3114 
3115 /*
3116  * Shift one less than the requested amount, and the low bit is
3117  * the rounding bit.  For the 8 and 16-bit operations, because we
3118  * mask the low bit, we can perform a normal integer shift instead
3119  * of a vector shift.
3120  */
3121 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3122 {
3123     TCGv_i64 t = tcg_temp_new_i64();
3124 
3125     tcg_gen_shri_i64(t, a, sh - 1);
3126     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3127     tcg_gen_vec_sar8i_i64(d, a, sh);
3128     tcg_gen_vec_add8_i64(d, d, t);
3129 }
3130 
3131 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3132 {
3133     TCGv_i64 t = tcg_temp_new_i64();
3134 
3135     tcg_gen_shri_i64(t, a, sh - 1);
3136     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3137     tcg_gen_vec_sar16i_i64(d, a, sh);
3138     tcg_gen_vec_add16_i64(d, d, t);
3139 }
3140 
3141 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3142 {
3143     TCGv_i32 t;
3144 
3145     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3146     if (sh == 32) {
3147         tcg_gen_movi_i32(d, 0);
3148         return;
3149     }
3150     t = tcg_temp_new_i32();
3151     tcg_gen_extract_i32(t, a, sh - 1, 1);
3152     tcg_gen_sari_i32(d, a, sh);
3153     tcg_gen_add_i32(d, d, t);
3154 }
3155 
3156 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3157 {
3158     TCGv_i64 t = tcg_temp_new_i64();
3159 
3160     tcg_gen_extract_i64(t, a, sh - 1, 1);
3161     tcg_gen_sari_i64(d, a, sh);
3162     tcg_gen_add_i64(d, d, t);
3163 }
3164 
3165 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3166 {
3167     TCGv_vec t = tcg_temp_new_vec_matching(d);
3168     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3169 
3170     tcg_gen_shri_vec(vece, t, a, sh - 1);
3171     tcg_gen_dupi_vec(vece, ones, 1);
3172     tcg_gen_and_vec(vece, t, t, ones);
3173     tcg_gen_sari_vec(vece, d, a, sh);
3174     tcg_gen_add_vec(vece, d, d, t);
3175 }
3176 
3177 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3178                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3179 {
3180     static const TCGOpcode vecop_list[] = {
3181         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3182     };
3183     static const GVecGen2i ops[4] = {
3184         { .fni8 = gen_srshr8_i64,
3185           .fniv = gen_srshr_vec,
3186           .fno = gen_helper_gvec_srshr_b,
3187           .opt_opc = vecop_list,
3188           .vece = MO_8 },
3189         { .fni8 = gen_srshr16_i64,
3190           .fniv = gen_srshr_vec,
3191           .fno = gen_helper_gvec_srshr_h,
3192           .opt_opc = vecop_list,
3193           .vece = MO_16 },
3194         { .fni4 = gen_srshr32_i32,
3195           .fniv = gen_srshr_vec,
3196           .fno = gen_helper_gvec_srshr_s,
3197           .opt_opc = vecop_list,
3198           .vece = MO_32 },
3199         { .fni8 = gen_srshr64_i64,
3200           .fniv = gen_srshr_vec,
3201           .fno = gen_helper_gvec_srshr_d,
3202           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3203           .opt_opc = vecop_list,
3204           .vece = MO_64 },
3205     };
3206 
3207     /* tszimm encoding produces immediates in the range [1..esize] */
3208     tcg_debug_assert(shift > 0);
3209     tcg_debug_assert(shift <= (8 << vece));
3210 
3211     if (shift == (8 << vece)) {
3212         /*
3213          * Shifts larger than the element size are architecturally valid.
3214          * Signed results in all sign bits.  With rounding, this produces
3215          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3216          * I.e. always zero.
3217          */
3218         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3219     } else {
3220         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3221     }
3222 }
3223 
3224 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3225 {
3226     TCGv_i64 t = tcg_temp_new_i64();
3227 
3228     gen_srshr8_i64(t, a, sh);
3229     tcg_gen_vec_add8_i64(d, d, t);
3230 }
3231 
3232 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3233 {
3234     TCGv_i64 t = tcg_temp_new_i64();
3235 
3236     gen_srshr16_i64(t, a, sh);
3237     tcg_gen_vec_add16_i64(d, d, t);
3238 }
3239 
3240 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3241 {
3242     TCGv_i32 t = tcg_temp_new_i32();
3243 
3244     gen_srshr32_i32(t, a, sh);
3245     tcg_gen_add_i32(d, d, t);
3246 }
3247 
3248 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3249 {
3250     TCGv_i64 t = tcg_temp_new_i64();
3251 
3252     gen_srshr64_i64(t, a, sh);
3253     tcg_gen_add_i64(d, d, t);
3254 }
3255 
3256 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3257 {
3258     TCGv_vec t = tcg_temp_new_vec_matching(d);
3259 
3260     gen_srshr_vec(vece, t, a, sh);
3261     tcg_gen_add_vec(vece, d, d, t);
3262 }
3263 
3264 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3265                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3266 {
3267     static const TCGOpcode vecop_list[] = {
3268         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3269     };
3270     static const GVecGen2i ops[4] = {
3271         { .fni8 = gen_srsra8_i64,
3272           .fniv = gen_srsra_vec,
3273           .fno = gen_helper_gvec_srsra_b,
3274           .opt_opc = vecop_list,
3275           .load_dest = true,
3276           .vece = MO_8 },
3277         { .fni8 = gen_srsra16_i64,
3278           .fniv = gen_srsra_vec,
3279           .fno = gen_helper_gvec_srsra_h,
3280           .opt_opc = vecop_list,
3281           .load_dest = true,
3282           .vece = MO_16 },
3283         { .fni4 = gen_srsra32_i32,
3284           .fniv = gen_srsra_vec,
3285           .fno = gen_helper_gvec_srsra_s,
3286           .opt_opc = vecop_list,
3287           .load_dest = true,
3288           .vece = MO_32 },
3289         { .fni8 = gen_srsra64_i64,
3290           .fniv = gen_srsra_vec,
3291           .fno = gen_helper_gvec_srsra_d,
3292           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3293           .opt_opc = vecop_list,
3294           .load_dest = true,
3295           .vece = MO_64 },
3296     };
3297 
3298     /* tszimm encoding produces immediates in the range [1..esize] */
3299     tcg_debug_assert(shift > 0);
3300     tcg_debug_assert(shift <= (8 << vece));
3301 
3302     /*
3303      * Shifts larger than the element size are architecturally valid.
3304      * Signed results in all sign bits.  With rounding, this produces
3305      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3306      * I.e. always zero.  With accumulation, this leaves D unchanged.
3307      */
3308     if (shift == (8 << vece)) {
3309         /* Nop, but we do need to clear the tail. */
3310         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3311     } else {
3312         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3313     }
3314 }
3315 
3316 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3317 {
3318     TCGv_i64 t = tcg_temp_new_i64();
3319 
3320     tcg_gen_shri_i64(t, a, sh - 1);
3321     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3322     tcg_gen_vec_shr8i_i64(d, a, sh);
3323     tcg_gen_vec_add8_i64(d, d, t);
3324 }
3325 
3326 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3327 {
3328     TCGv_i64 t = tcg_temp_new_i64();
3329 
3330     tcg_gen_shri_i64(t, a, sh - 1);
3331     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3332     tcg_gen_vec_shr16i_i64(d, a, sh);
3333     tcg_gen_vec_add16_i64(d, d, t);
3334 }
3335 
3336 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3337 {
3338     TCGv_i32 t;
3339 
3340     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3341     if (sh == 32) {
3342         tcg_gen_extract_i32(d, a, sh - 1, 1);
3343         return;
3344     }
3345     t = tcg_temp_new_i32();
3346     tcg_gen_extract_i32(t, a, sh - 1, 1);
3347     tcg_gen_shri_i32(d, a, sh);
3348     tcg_gen_add_i32(d, d, t);
3349 }
3350 
3351 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3352 {
3353     TCGv_i64 t = tcg_temp_new_i64();
3354 
3355     tcg_gen_extract_i64(t, a, sh - 1, 1);
3356     tcg_gen_shri_i64(d, a, sh);
3357     tcg_gen_add_i64(d, d, t);
3358 }
3359 
3360 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3361 {
3362     TCGv_vec t = tcg_temp_new_vec_matching(d);
3363     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3364 
3365     tcg_gen_shri_vec(vece, t, a, shift - 1);
3366     tcg_gen_dupi_vec(vece, ones, 1);
3367     tcg_gen_and_vec(vece, t, t, ones);
3368     tcg_gen_shri_vec(vece, d, a, shift);
3369     tcg_gen_add_vec(vece, d, d, t);
3370 }
3371 
3372 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3373                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3374 {
3375     static const TCGOpcode vecop_list[] = {
3376         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3377     };
3378     static const GVecGen2i ops[4] = {
3379         { .fni8 = gen_urshr8_i64,
3380           .fniv = gen_urshr_vec,
3381           .fno = gen_helper_gvec_urshr_b,
3382           .opt_opc = vecop_list,
3383           .vece = MO_8 },
3384         { .fni8 = gen_urshr16_i64,
3385           .fniv = gen_urshr_vec,
3386           .fno = gen_helper_gvec_urshr_h,
3387           .opt_opc = vecop_list,
3388           .vece = MO_16 },
3389         { .fni4 = gen_urshr32_i32,
3390           .fniv = gen_urshr_vec,
3391           .fno = gen_helper_gvec_urshr_s,
3392           .opt_opc = vecop_list,
3393           .vece = MO_32 },
3394         { .fni8 = gen_urshr64_i64,
3395           .fniv = gen_urshr_vec,
3396           .fno = gen_helper_gvec_urshr_d,
3397           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3398           .opt_opc = vecop_list,
3399           .vece = MO_64 },
3400     };
3401 
3402     /* tszimm encoding produces immediates in the range [1..esize] */
3403     tcg_debug_assert(shift > 0);
3404     tcg_debug_assert(shift <= (8 << vece));
3405 
3406     if (shift == (8 << vece)) {
3407         /*
3408          * Shifts larger than the element size are architecturally valid.
3409          * Unsigned results in zero.  With rounding, this produces a
3410          * copy of the most significant bit.
3411          */
3412         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3413     } else {
3414         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3415     }
3416 }
3417 
3418 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3419 {
3420     TCGv_i64 t = tcg_temp_new_i64();
3421 
3422     if (sh == 8) {
3423         tcg_gen_vec_shr8i_i64(t, a, 7);
3424     } else {
3425         gen_urshr8_i64(t, a, sh);
3426     }
3427     tcg_gen_vec_add8_i64(d, d, t);
3428 }
3429 
3430 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3431 {
3432     TCGv_i64 t = tcg_temp_new_i64();
3433 
3434     if (sh == 16) {
3435         tcg_gen_vec_shr16i_i64(t, a, 15);
3436     } else {
3437         gen_urshr16_i64(t, a, sh);
3438     }
3439     tcg_gen_vec_add16_i64(d, d, t);
3440 }
3441 
3442 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3443 {
3444     TCGv_i32 t = tcg_temp_new_i32();
3445 
3446     if (sh == 32) {
3447         tcg_gen_shri_i32(t, a, 31);
3448     } else {
3449         gen_urshr32_i32(t, a, sh);
3450     }
3451     tcg_gen_add_i32(d, d, t);
3452 }
3453 
3454 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3455 {
3456     TCGv_i64 t = tcg_temp_new_i64();
3457 
3458     if (sh == 64) {
3459         tcg_gen_shri_i64(t, a, 63);
3460     } else {
3461         gen_urshr64_i64(t, a, sh);
3462     }
3463     tcg_gen_add_i64(d, d, t);
3464 }
3465 
3466 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3467 {
3468     TCGv_vec t = tcg_temp_new_vec_matching(d);
3469 
3470     if (sh == (8 << vece)) {
3471         tcg_gen_shri_vec(vece, t, a, sh - 1);
3472     } else {
3473         gen_urshr_vec(vece, t, a, sh);
3474     }
3475     tcg_gen_add_vec(vece, d, d, t);
3476 }
3477 
3478 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3479                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3480 {
3481     static const TCGOpcode vecop_list[] = {
3482         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3483     };
3484     static const GVecGen2i ops[4] = {
3485         { .fni8 = gen_ursra8_i64,
3486           .fniv = gen_ursra_vec,
3487           .fno = gen_helper_gvec_ursra_b,
3488           .opt_opc = vecop_list,
3489           .load_dest = true,
3490           .vece = MO_8 },
3491         { .fni8 = gen_ursra16_i64,
3492           .fniv = gen_ursra_vec,
3493           .fno = gen_helper_gvec_ursra_h,
3494           .opt_opc = vecop_list,
3495           .load_dest = true,
3496           .vece = MO_16 },
3497         { .fni4 = gen_ursra32_i32,
3498           .fniv = gen_ursra_vec,
3499           .fno = gen_helper_gvec_ursra_s,
3500           .opt_opc = vecop_list,
3501           .load_dest = true,
3502           .vece = MO_32 },
3503         { .fni8 = gen_ursra64_i64,
3504           .fniv = gen_ursra_vec,
3505           .fno = gen_helper_gvec_ursra_d,
3506           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3507           .opt_opc = vecop_list,
3508           .load_dest = true,
3509           .vece = MO_64 },
3510     };
3511 
3512     /* tszimm encoding produces immediates in the range [1..esize] */
3513     tcg_debug_assert(shift > 0);
3514     tcg_debug_assert(shift <= (8 << vece));
3515 
3516     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3517 }
3518 
3519 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3520 {
3521     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3522     TCGv_i64 t = tcg_temp_new_i64();
3523 
3524     tcg_gen_shri_i64(t, a, shift);
3525     tcg_gen_andi_i64(t, t, mask);
3526     tcg_gen_andi_i64(d, d, ~mask);
3527     tcg_gen_or_i64(d, d, t);
3528 }
3529 
3530 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3531 {
3532     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3533     TCGv_i64 t = tcg_temp_new_i64();
3534 
3535     tcg_gen_shri_i64(t, a, shift);
3536     tcg_gen_andi_i64(t, t, mask);
3537     tcg_gen_andi_i64(d, d, ~mask);
3538     tcg_gen_or_i64(d, d, t);
3539 }
3540 
3541 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3542 {
3543     tcg_gen_shri_i32(a, a, shift);
3544     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3545 }
3546 
3547 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3548 {
3549     tcg_gen_shri_i64(a, a, shift);
3550     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3551 }
3552 
3553 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3554 {
3555     TCGv_vec t = tcg_temp_new_vec_matching(d);
3556     TCGv_vec m = tcg_temp_new_vec_matching(d);
3557 
3558     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3559     tcg_gen_shri_vec(vece, t, a, sh);
3560     tcg_gen_and_vec(vece, d, d, m);
3561     tcg_gen_or_vec(vece, d, d, t);
3562 }
3563 
3564 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3565                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3566 {
3567     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3568     const GVecGen2i ops[4] = {
3569         { .fni8 = gen_shr8_ins_i64,
3570           .fniv = gen_shr_ins_vec,
3571           .fno = gen_helper_gvec_sri_b,
3572           .load_dest = true,
3573           .opt_opc = vecop_list,
3574           .vece = MO_8 },
3575         { .fni8 = gen_shr16_ins_i64,
3576           .fniv = gen_shr_ins_vec,
3577           .fno = gen_helper_gvec_sri_h,
3578           .load_dest = true,
3579           .opt_opc = vecop_list,
3580           .vece = MO_16 },
3581         { .fni4 = gen_shr32_ins_i32,
3582           .fniv = gen_shr_ins_vec,
3583           .fno = gen_helper_gvec_sri_s,
3584           .load_dest = true,
3585           .opt_opc = vecop_list,
3586           .vece = MO_32 },
3587         { .fni8 = gen_shr64_ins_i64,
3588           .fniv = gen_shr_ins_vec,
3589           .fno = gen_helper_gvec_sri_d,
3590           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3591           .load_dest = true,
3592           .opt_opc = vecop_list,
3593           .vece = MO_64 },
3594     };
3595 
3596     /* tszimm encoding produces immediates in the range [1..esize]. */
3597     tcg_debug_assert(shift > 0);
3598     tcg_debug_assert(shift <= (8 << vece));
3599 
3600     /* Shift of esize leaves destination unchanged. */
3601     if (shift < (8 << vece)) {
3602         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3603     } else {
3604         /* Nop, but we do need to clear the tail. */
3605         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3606     }
3607 }
3608 
3609 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3610 {
3611     uint64_t mask = dup_const(MO_8, 0xff << shift);
3612     TCGv_i64 t = tcg_temp_new_i64();
3613 
3614     tcg_gen_shli_i64(t, a, shift);
3615     tcg_gen_andi_i64(t, t, mask);
3616     tcg_gen_andi_i64(d, d, ~mask);
3617     tcg_gen_or_i64(d, d, t);
3618 }
3619 
3620 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3621 {
3622     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3623     TCGv_i64 t = tcg_temp_new_i64();
3624 
3625     tcg_gen_shli_i64(t, a, shift);
3626     tcg_gen_andi_i64(t, t, mask);
3627     tcg_gen_andi_i64(d, d, ~mask);
3628     tcg_gen_or_i64(d, d, t);
3629 }
3630 
3631 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3632 {
3633     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3634 }
3635 
3636 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3637 {
3638     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3639 }
3640 
3641 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3642 {
3643     TCGv_vec t = tcg_temp_new_vec_matching(d);
3644     TCGv_vec m = tcg_temp_new_vec_matching(d);
3645 
3646     tcg_gen_shli_vec(vece, t, a, sh);
3647     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3648     tcg_gen_and_vec(vece, d, d, m);
3649     tcg_gen_or_vec(vece, d, d, t);
3650 }
3651 
3652 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3653                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3654 {
3655     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3656     const GVecGen2i ops[4] = {
3657         { .fni8 = gen_shl8_ins_i64,
3658           .fniv = gen_shl_ins_vec,
3659           .fno = gen_helper_gvec_sli_b,
3660           .load_dest = true,
3661           .opt_opc = vecop_list,
3662           .vece = MO_8 },
3663         { .fni8 = gen_shl16_ins_i64,
3664           .fniv = gen_shl_ins_vec,
3665           .fno = gen_helper_gvec_sli_h,
3666           .load_dest = true,
3667           .opt_opc = vecop_list,
3668           .vece = MO_16 },
3669         { .fni4 = gen_shl32_ins_i32,
3670           .fniv = gen_shl_ins_vec,
3671           .fno = gen_helper_gvec_sli_s,
3672           .load_dest = true,
3673           .opt_opc = vecop_list,
3674           .vece = MO_32 },
3675         { .fni8 = gen_shl64_ins_i64,
3676           .fniv = gen_shl_ins_vec,
3677           .fno = gen_helper_gvec_sli_d,
3678           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3679           .load_dest = true,
3680           .opt_opc = vecop_list,
3681           .vece = MO_64 },
3682     };
3683 
3684     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3685     tcg_debug_assert(shift >= 0);
3686     tcg_debug_assert(shift < (8 << vece));
3687 
3688     if (shift == 0) {
3689         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3690     } else {
3691         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3692     }
3693 }
3694 
3695 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3696 {
3697     gen_helper_neon_mul_u8(a, a, b);
3698     gen_helper_neon_add_u8(d, d, a);
3699 }
3700 
3701 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3702 {
3703     gen_helper_neon_mul_u8(a, a, b);
3704     gen_helper_neon_sub_u8(d, d, a);
3705 }
3706 
3707 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3708 {
3709     gen_helper_neon_mul_u16(a, a, b);
3710     gen_helper_neon_add_u16(d, d, a);
3711 }
3712 
3713 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3714 {
3715     gen_helper_neon_mul_u16(a, a, b);
3716     gen_helper_neon_sub_u16(d, d, a);
3717 }
3718 
3719 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3720 {
3721     tcg_gen_mul_i32(a, a, b);
3722     tcg_gen_add_i32(d, d, a);
3723 }
3724 
3725 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3726 {
3727     tcg_gen_mul_i32(a, a, b);
3728     tcg_gen_sub_i32(d, d, a);
3729 }
3730 
3731 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3732 {
3733     tcg_gen_mul_i64(a, a, b);
3734     tcg_gen_add_i64(d, d, a);
3735 }
3736 
3737 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3738 {
3739     tcg_gen_mul_i64(a, a, b);
3740     tcg_gen_sub_i64(d, d, a);
3741 }
3742 
3743 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3744 {
3745     tcg_gen_mul_vec(vece, a, a, b);
3746     tcg_gen_add_vec(vece, d, d, a);
3747 }
3748 
3749 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3750 {
3751     tcg_gen_mul_vec(vece, a, a, b);
3752     tcg_gen_sub_vec(vece, d, d, a);
3753 }
3754 
3755 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3756  * these tables are shared with AArch64 which does support them.
3757  */
3758 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3759                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3760 {
3761     static const TCGOpcode vecop_list[] = {
3762         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3763     };
3764     static const GVecGen3 ops[4] = {
3765         { .fni4 = gen_mla8_i32,
3766           .fniv = gen_mla_vec,
3767           .load_dest = true,
3768           .opt_opc = vecop_list,
3769           .vece = MO_8 },
3770         { .fni4 = gen_mla16_i32,
3771           .fniv = gen_mla_vec,
3772           .load_dest = true,
3773           .opt_opc = vecop_list,
3774           .vece = MO_16 },
3775         { .fni4 = gen_mla32_i32,
3776           .fniv = gen_mla_vec,
3777           .load_dest = true,
3778           .opt_opc = vecop_list,
3779           .vece = MO_32 },
3780         { .fni8 = gen_mla64_i64,
3781           .fniv = gen_mla_vec,
3782           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3783           .load_dest = true,
3784           .opt_opc = vecop_list,
3785           .vece = MO_64 },
3786     };
3787     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3788 }
3789 
3790 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3791                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3792 {
3793     static const TCGOpcode vecop_list[] = {
3794         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3795     };
3796     static const GVecGen3 ops[4] = {
3797         { .fni4 = gen_mls8_i32,
3798           .fniv = gen_mls_vec,
3799           .load_dest = true,
3800           .opt_opc = vecop_list,
3801           .vece = MO_8 },
3802         { .fni4 = gen_mls16_i32,
3803           .fniv = gen_mls_vec,
3804           .load_dest = true,
3805           .opt_opc = vecop_list,
3806           .vece = MO_16 },
3807         { .fni4 = gen_mls32_i32,
3808           .fniv = gen_mls_vec,
3809           .load_dest = true,
3810           .opt_opc = vecop_list,
3811           .vece = MO_32 },
3812         { .fni8 = gen_mls64_i64,
3813           .fniv = gen_mls_vec,
3814           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3815           .load_dest = true,
3816           .opt_opc = vecop_list,
3817           .vece = MO_64 },
3818     };
3819     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3820 }
3821 
3822 /* CMTST : test is "if (X & Y != 0)". */
3823 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3824 {
3825     tcg_gen_and_i32(d, a, b);
3826     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3827 }
3828 
3829 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3830 {
3831     tcg_gen_and_i64(d, a, b);
3832     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3833 }
3834 
3835 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3836 {
3837     tcg_gen_and_vec(vece, d, a, b);
3838     tcg_gen_dupi_vec(vece, a, 0);
3839     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3840 }
3841 
3842 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3843                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3844 {
3845     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3846     static const GVecGen3 ops[4] = {
3847         { .fni4 = gen_helper_neon_tst_u8,
3848           .fniv = gen_cmtst_vec,
3849           .opt_opc = vecop_list,
3850           .vece = MO_8 },
3851         { .fni4 = gen_helper_neon_tst_u16,
3852           .fniv = gen_cmtst_vec,
3853           .opt_opc = vecop_list,
3854           .vece = MO_16 },
3855         { .fni4 = gen_cmtst_i32,
3856           .fniv = gen_cmtst_vec,
3857           .opt_opc = vecop_list,
3858           .vece = MO_32 },
3859         { .fni8 = gen_cmtst_i64,
3860           .fniv = gen_cmtst_vec,
3861           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3862           .opt_opc = vecop_list,
3863           .vece = MO_64 },
3864     };
3865     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3866 }
3867 
3868 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3869 {
3870     TCGv_i32 lval = tcg_temp_new_i32();
3871     TCGv_i32 rval = tcg_temp_new_i32();
3872     TCGv_i32 lsh = tcg_temp_new_i32();
3873     TCGv_i32 rsh = tcg_temp_new_i32();
3874     TCGv_i32 zero = tcg_constant_i32(0);
3875     TCGv_i32 max = tcg_constant_i32(32);
3876 
3877     /*
3878      * Rely on the TCG guarantee that out of range shifts produce
3879      * unspecified results, not undefined behaviour (i.e. no trap).
3880      * Discard out-of-range results after the fact.
3881      */
3882     tcg_gen_ext8s_i32(lsh, shift);
3883     tcg_gen_neg_i32(rsh, lsh);
3884     tcg_gen_shl_i32(lval, src, lsh);
3885     tcg_gen_shr_i32(rval, src, rsh);
3886     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3887     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3888 }
3889 
3890 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3891 {
3892     TCGv_i64 lval = tcg_temp_new_i64();
3893     TCGv_i64 rval = tcg_temp_new_i64();
3894     TCGv_i64 lsh = tcg_temp_new_i64();
3895     TCGv_i64 rsh = tcg_temp_new_i64();
3896     TCGv_i64 zero = tcg_constant_i64(0);
3897     TCGv_i64 max = tcg_constant_i64(64);
3898 
3899     /*
3900      * Rely on the TCG guarantee that out of range shifts produce
3901      * unspecified results, not undefined behaviour (i.e. no trap).
3902      * Discard out-of-range results after the fact.
3903      */
3904     tcg_gen_ext8s_i64(lsh, shift);
3905     tcg_gen_neg_i64(rsh, lsh);
3906     tcg_gen_shl_i64(lval, src, lsh);
3907     tcg_gen_shr_i64(rval, src, rsh);
3908     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3909     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3910 }
3911 
3912 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3913                          TCGv_vec src, TCGv_vec shift)
3914 {
3915     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3916     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3917     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3918     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3919     TCGv_vec msk, max;
3920 
3921     tcg_gen_neg_vec(vece, rsh, shift);
3922     if (vece == MO_8) {
3923         tcg_gen_mov_vec(lsh, shift);
3924     } else {
3925         msk = tcg_temp_new_vec_matching(dst);
3926         tcg_gen_dupi_vec(vece, msk, 0xff);
3927         tcg_gen_and_vec(vece, lsh, shift, msk);
3928         tcg_gen_and_vec(vece, rsh, rsh, msk);
3929     }
3930 
3931     /*
3932      * Rely on the TCG guarantee that out of range shifts produce
3933      * unspecified results, not undefined behaviour (i.e. no trap).
3934      * Discard out-of-range results after the fact.
3935      */
3936     tcg_gen_shlv_vec(vece, lval, src, lsh);
3937     tcg_gen_shrv_vec(vece, rval, src, rsh);
3938 
3939     max = tcg_temp_new_vec_matching(dst);
3940     tcg_gen_dupi_vec(vece, max, 8 << vece);
3941 
3942     /*
3943      * The choice of LT (signed) and GEU (unsigned) are biased toward
3944      * the instructions of the x86_64 host.  For MO_8, the whole byte
3945      * is significant so we must use an unsigned compare; otherwise we
3946      * have already masked to a byte and so a signed compare works.
3947      * Other tcg hosts have a full set of comparisons and do not care.
3948      */
3949     if (vece == MO_8) {
3950         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3951         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3952         tcg_gen_andc_vec(vece, lval, lval, lsh);
3953         tcg_gen_andc_vec(vece, rval, rval, rsh);
3954     } else {
3955         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3956         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3957         tcg_gen_and_vec(vece, lval, lval, lsh);
3958         tcg_gen_and_vec(vece, rval, rval, rsh);
3959     }
3960     tcg_gen_or_vec(vece, dst, lval, rval);
3961 }
3962 
3963 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3964                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3965 {
3966     static const TCGOpcode vecop_list[] = {
3967         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3968         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3969     };
3970     static const GVecGen3 ops[4] = {
3971         { .fniv = gen_ushl_vec,
3972           .fno = gen_helper_gvec_ushl_b,
3973           .opt_opc = vecop_list,
3974           .vece = MO_8 },
3975         { .fniv = gen_ushl_vec,
3976           .fno = gen_helper_gvec_ushl_h,
3977           .opt_opc = vecop_list,
3978           .vece = MO_16 },
3979         { .fni4 = gen_ushl_i32,
3980           .fniv = gen_ushl_vec,
3981           .opt_opc = vecop_list,
3982           .vece = MO_32 },
3983         { .fni8 = gen_ushl_i64,
3984           .fniv = gen_ushl_vec,
3985           .opt_opc = vecop_list,
3986           .vece = MO_64 },
3987     };
3988     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3989 }
3990 
3991 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3992 {
3993     TCGv_i32 lval = tcg_temp_new_i32();
3994     TCGv_i32 rval = tcg_temp_new_i32();
3995     TCGv_i32 lsh = tcg_temp_new_i32();
3996     TCGv_i32 rsh = tcg_temp_new_i32();
3997     TCGv_i32 zero = tcg_constant_i32(0);
3998     TCGv_i32 max = tcg_constant_i32(31);
3999 
4000     /*
4001      * Rely on the TCG guarantee that out of range shifts produce
4002      * unspecified results, not undefined behaviour (i.e. no trap).
4003      * Discard out-of-range results after the fact.
4004      */
4005     tcg_gen_ext8s_i32(lsh, shift);
4006     tcg_gen_neg_i32(rsh, lsh);
4007     tcg_gen_shl_i32(lval, src, lsh);
4008     tcg_gen_umin_i32(rsh, rsh, max);
4009     tcg_gen_sar_i32(rval, src, rsh);
4010     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4011     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4012 }
4013 
4014 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4015 {
4016     TCGv_i64 lval = tcg_temp_new_i64();
4017     TCGv_i64 rval = tcg_temp_new_i64();
4018     TCGv_i64 lsh = tcg_temp_new_i64();
4019     TCGv_i64 rsh = tcg_temp_new_i64();
4020     TCGv_i64 zero = tcg_constant_i64(0);
4021     TCGv_i64 max = tcg_constant_i64(63);
4022 
4023     /*
4024      * Rely on the TCG guarantee that out of range shifts produce
4025      * unspecified results, not undefined behaviour (i.e. no trap).
4026      * Discard out-of-range results after the fact.
4027      */
4028     tcg_gen_ext8s_i64(lsh, shift);
4029     tcg_gen_neg_i64(rsh, lsh);
4030     tcg_gen_shl_i64(lval, src, lsh);
4031     tcg_gen_umin_i64(rsh, rsh, max);
4032     tcg_gen_sar_i64(rval, src, rsh);
4033     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4034     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4035 }
4036 
4037 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4038                          TCGv_vec src, TCGv_vec shift)
4039 {
4040     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4041     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4042     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4043     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4044     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4045 
4046     /*
4047      * Rely on the TCG guarantee that out of range shifts produce
4048      * unspecified results, not undefined behaviour (i.e. no trap).
4049      * Discard out-of-range results after the fact.
4050      */
4051     tcg_gen_neg_vec(vece, rsh, shift);
4052     if (vece == MO_8) {
4053         tcg_gen_mov_vec(lsh, shift);
4054     } else {
4055         tcg_gen_dupi_vec(vece, tmp, 0xff);
4056         tcg_gen_and_vec(vece, lsh, shift, tmp);
4057         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4058     }
4059 
4060     /* Bound rsh so out of bound right shift gets -1.  */
4061     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4062     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4063     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4064 
4065     tcg_gen_shlv_vec(vece, lval, src, lsh);
4066     tcg_gen_sarv_vec(vece, rval, src, rsh);
4067 
4068     /* Select in-bound left shift.  */
4069     tcg_gen_andc_vec(vece, lval, lval, tmp);
4070 
4071     /* Select between left and right shift.  */
4072     if (vece == MO_8) {
4073         tcg_gen_dupi_vec(vece, tmp, 0);
4074         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4075     } else {
4076         tcg_gen_dupi_vec(vece, tmp, 0x80);
4077         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4078     }
4079 }
4080 
4081 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4082                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4083 {
4084     static const TCGOpcode vecop_list[] = {
4085         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4086         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4087     };
4088     static const GVecGen3 ops[4] = {
4089         { .fniv = gen_sshl_vec,
4090           .fno = gen_helper_gvec_sshl_b,
4091           .opt_opc = vecop_list,
4092           .vece = MO_8 },
4093         { .fniv = gen_sshl_vec,
4094           .fno = gen_helper_gvec_sshl_h,
4095           .opt_opc = vecop_list,
4096           .vece = MO_16 },
4097         { .fni4 = gen_sshl_i32,
4098           .fniv = gen_sshl_vec,
4099           .opt_opc = vecop_list,
4100           .vece = MO_32 },
4101         { .fni8 = gen_sshl_i64,
4102           .fniv = gen_sshl_vec,
4103           .opt_opc = vecop_list,
4104           .vece = MO_64 },
4105     };
4106     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4107 }
4108 
4109 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4110                           TCGv_vec a, TCGv_vec b)
4111 {
4112     TCGv_vec x = tcg_temp_new_vec_matching(t);
4113     tcg_gen_add_vec(vece, x, a, b);
4114     tcg_gen_usadd_vec(vece, t, a, b);
4115     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4116     tcg_gen_or_vec(vece, sat, sat, x);
4117 }
4118 
4119 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4120                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4121 {
4122     static const TCGOpcode vecop_list[] = {
4123         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4124     };
4125     static const GVecGen4 ops[4] = {
4126         { .fniv = gen_uqadd_vec,
4127           .fno = gen_helper_gvec_uqadd_b,
4128           .write_aofs = true,
4129           .opt_opc = vecop_list,
4130           .vece = MO_8 },
4131         { .fniv = gen_uqadd_vec,
4132           .fno = gen_helper_gvec_uqadd_h,
4133           .write_aofs = true,
4134           .opt_opc = vecop_list,
4135           .vece = MO_16 },
4136         { .fniv = gen_uqadd_vec,
4137           .fno = gen_helper_gvec_uqadd_s,
4138           .write_aofs = true,
4139           .opt_opc = vecop_list,
4140           .vece = MO_32 },
4141         { .fniv = gen_uqadd_vec,
4142           .fno = gen_helper_gvec_uqadd_d,
4143           .write_aofs = true,
4144           .opt_opc = vecop_list,
4145           .vece = MO_64 },
4146     };
4147     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4148                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4149 }
4150 
4151 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4152                           TCGv_vec a, TCGv_vec b)
4153 {
4154     TCGv_vec x = tcg_temp_new_vec_matching(t);
4155     tcg_gen_add_vec(vece, x, a, b);
4156     tcg_gen_ssadd_vec(vece, t, a, b);
4157     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4158     tcg_gen_or_vec(vece, sat, sat, x);
4159 }
4160 
4161 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4162                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4163 {
4164     static const TCGOpcode vecop_list[] = {
4165         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4166     };
4167     static const GVecGen4 ops[4] = {
4168         { .fniv = gen_sqadd_vec,
4169           .fno = gen_helper_gvec_sqadd_b,
4170           .opt_opc = vecop_list,
4171           .write_aofs = true,
4172           .vece = MO_8 },
4173         { .fniv = gen_sqadd_vec,
4174           .fno = gen_helper_gvec_sqadd_h,
4175           .opt_opc = vecop_list,
4176           .write_aofs = true,
4177           .vece = MO_16 },
4178         { .fniv = gen_sqadd_vec,
4179           .fno = gen_helper_gvec_sqadd_s,
4180           .opt_opc = vecop_list,
4181           .write_aofs = true,
4182           .vece = MO_32 },
4183         { .fniv = gen_sqadd_vec,
4184           .fno = gen_helper_gvec_sqadd_d,
4185           .opt_opc = vecop_list,
4186           .write_aofs = true,
4187           .vece = MO_64 },
4188     };
4189     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4190                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4191 }
4192 
4193 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4194                           TCGv_vec a, TCGv_vec b)
4195 {
4196     TCGv_vec x = tcg_temp_new_vec_matching(t);
4197     tcg_gen_sub_vec(vece, x, a, b);
4198     tcg_gen_ussub_vec(vece, t, a, b);
4199     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4200     tcg_gen_or_vec(vece, sat, sat, x);
4201 }
4202 
4203 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4204                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4205 {
4206     static const TCGOpcode vecop_list[] = {
4207         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4208     };
4209     static const GVecGen4 ops[4] = {
4210         { .fniv = gen_uqsub_vec,
4211           .fno = gen_helper_gvec_uqsub_b,
4212           .opt_opc = vecop_list,
4213           .write_aofs = true,
4214           .vece = MO_8 },
4215         { .fniv = gen_uqsub_vec,
4216           .fno = gen_helper_gvec_uqsub_h,
4217           .opt_opc = vecop_list,
4218           .write_aofs = true,
4219           .vece = MO_16 },
4220         { .fniv = gen_uqsub_vec,
4221           .fno = gen_helper_gvec_uqsub_s,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_32 },
4225         { .fniv = gen_uqsub_vec,
4226           .fno = gen_helper_gvec_uqsub_d,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_64 },
4230     };
4231     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4232                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4233 }
4234 
4235 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4236                           TCGv_vec a, TCGv_vec b)
4237 {
4238     TCGv_vec x = tcg_temp_new_vec_matching(t);
4239     tcg_gen_sub_vec(vece, x, a, b);
4240     tcg_gen_sssub_vec(vece, t, a, b);
4241     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4242     tcg_gen_or_vec(vece, sat, sat, x);
4243 }
4244 
4245 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4246                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4247 {
4248     static const TCGOpcode vecop_list[] = {
4249         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4250     };
4251     static const GVecGen4 ops[4] = {
4252         { .fniv = gen_sqsub_vec,
4253           .fno = gen_helper_gvec_sqsub_b,
4254           .opt_opc = vecop_list,
4255           .write_aofs = true,
4256           .vece = MO_8 },
4257         { .fniv = gen_sqsub_vec,
4258           .fno = gen_helper_gvec_sqsub_h,
4259           .opt_opc = vecop_list,
4260           .write_aofs = true,
4261           .vece = MO_16 },
4262         { .fniv = gen_sqsub_vec,
4263           .fno = gen_helper_gvec_sqsub_s,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_32 },
4267         { .fniv = gen_sqsub_vec,
4268           .fno = gen_helper_gvec_sqsub_d,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_64 },
4272     };
4273     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4274                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4275 }
4276 
4277 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4278 {
4279     TCGv_i32 t = tcg_temp_new_i32();
4280 
4281     tcg_gen_sub_i32(t, a, b);
4282     tcg_gen_sub_i32(d, b, a);
4283     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4284 }
4285 
4286 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4287 {
4288     TCGv_i64 t = tcg_temp_new_i64();
4289 
4290     tcg_gen_sub_i64(t, a, b);
4291     tcg_gen_sub_i64(d, b, a);
4292     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4293 }
4294 
4295 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4296 {
4297     TCGv_vec t = tcg_temp_new_vec_matching(d);
4298 
4299     tcg_gen_smin_vec(vece, t, a, b);
4300     tcg_gen_smax_vec(vece, d, a, b);
4301     tcg_gen_sub_vec(vece, d, d, t);
4302 }
4303 
4304 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4305                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4306 {
4307     static const TCGOpcode vecop_list[] = {
4308         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4309     };
4310     static const GVecGen3 ops[4] = {
4311         { .fniv = gen_sabd_vec,
4312           .fno = gen_helper_gvec_sabd_b,
4313           .opt_opc = vecop_list,
4314           .vece = MO_8 },
4315         { .fniv = gen_sabd_vec,
4316           .fno = gen_helper_gvec_sabd_h,
4317           .opt_opc = vecop_list,
4318           .vece = MO_16 },
4319         { .fni4 = gen_sabd_i32,
4320           .fniv = gen_sabd_vec,
4321           .fno = gen_helper_gvec_sabd_s,
4322           .opt_opc = vecop_list,
4323           .vece = MO_32 },
4324         { .fni8 = gen_sabd_i64,
4325           .fniv = gen_sabd_vec,
4326           .fno = gen_helper_gvec_sabd_d,
4327           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4328           .opt_opc = vecop_list,
4329           .vece = MO_64 },
4330     };
4331     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4332 }
4333 
4334 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4335 {
4336     TCGv_i32 t = tcg_temp_new_i32();
4337 
4338     tcg_gen_sub_i32(t, a, b);
4339     tcg_gen_sub_i32(d, b, a);
4340     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4341 }
4342 
4343 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4344 {
4345     TCGv_i64 t = tcg_temp_new_i64();
4346 
4347     tcg_gen_sub_i64(t, a, b);
4348     tcg_gen_sub_i64(d, b, a);
4349     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4350 }
4351 
4352 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4353 {
4354     TCGv_vec t = tcg_temp_new_vec_matching(d);
4355 
4356     tcg_gen_umin_vec(vece, t, a, b);
4357     tcg_gen_umax_vec(vece, d, a, b);
4358     tcg_gen_sub_vec(vece, d, d, t);
4359 }
4360 
4361 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4362                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4363 {
4364     static const TCGOpcode vecop_list[] = {
4365         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4366     };
4367     static const GVecGen3 ops[4] = {
4368         { .fniv = gen_uabd_vec,
4369           .fno = gen_helper_gvec_uabd_b,
4370           .opt_opc = vecop_list,
4371           .vece = MO_8 },
4372         { .fniv = gen_uabd_vec,
4373           .fno = gen_helper_gvec_uabd_h,
4374           .opt_opc = vecop_list,
4375           .vece = MO_16 },
4376         { .fni4 = gen_uabd_i32,
4377           .fniv = gen_uabd_vec,
4378           .fno = gen_helper_gvec_uabd_s,
4379           .opt_opc = vecop_list,
4380           .vece = MO_32 },
4381         { .fni8 = gen_uabd_i64,
4382           .fniv = gen_uabd_vec,
4383           .fno = gen_helper_gvec_uabd_d,
4384           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4385           .opt_opc = vecop_list,
4386           .vece = MO_64 },
4387     };
4388     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4389 }
4390 
4391 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4392 {
4393     TCGv_i32 t = tcg_temp_new_i32();
4394     gen_sabd_i32(t, a, b);
4395     tcg_gen_add_i32(d, d, t);
4396 }
4397 
4398 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4399 {
4400     TCGv_i64 t = tcg_temp_new_i64();
4401     gen_sabd_i64(t, a, b);
4402     tcg_gen_add_i64(d, d, t);
4403 }
4404 
4405 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4406 {
4407     TCGv_vec t = tcg_temp_new_vec_matching(d);
4408     gen_sabd_vec(vece, t, a, b);
4409     tcg_gen_add_vec(vece, d, d, t);
4410 }
4411 
4412 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4413                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4414 {
4415     static const TCGOpcode vecop_list[] = {
4416         INDEX_op_sub_vec, INDEX_op_add_vec,
4417         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4418     };
4419     static const GVecGen3 ops[4] = {
4420         { .fniv = gen_saba_vec,
4421           .fno = gen_helper_gvec_saba_b,
4422           .opt_opc = vecop_list,
4423           .load_dest = true,
4424           .vece = MO_8 },
4425         { .fniv = gen_saba_vec,
4426           .fno = gen_helper_gvec_saba_h,
4427           .opt_opc = vecop_list,
4428           .load_dest = true,
4429           .vece = MO_16 },
4430         { .fni4 = gen_saba_i32,
4431           .fniv = gen_saba_vec,
4432           .fno = gen_helper_gvec_saba_s,
4433           .opt_opc = vecop_list,
4434           .load_dest = true,
4435           .vece = MO_32 },
4436         { .fni8 = gen_saba_i64,
4437           .fniv = gen_saba_vec,
4438           .fno = gen_helper_gvec_saba_d,
4439           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4440           .opt_opc = vecop_list,
4441           .load_dest = true,
4442           .vece = MO_64 },
4443     };
4444     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4445 }
4446 
4447 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4448 {
4449     TCGv_i32 t = tcg_temp_new_i32();
4450     gen_uabd_i32(t, a, b);
4451     tcg_gen_add_i32(d, d, t);
4452 }
4453 
4454 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4455 {
4456     TCGv_i64 t = tcg_temp_new_i64();
4457     gen_uabd_i64(t, a, b);
4458     tcg_gen_add_i64(d, d, t);
4459 }
4460 
4461 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4462 {
4463     TCGv_vec t = tcg_temp_new_vec_matching(d);
4464     gen_uabd_vec(vece, t, a, b);
4465     tcg_gen_add_vec(vece, d, d, t);
4466 }
4467 
4468 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4469                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4470 {
4471     static const TCGOpcode vecop_list[] = {
4472         INDEX_op_sub_vec, INDEX_op_add_vec,
4473         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4474     };
4475     static const GVecGen3 ops[4] = {
4476         { .fniv = gen_uaba_vec,
4477           .fno = gen_helper_gvec_uaba_b,
4478           .opt_opc = vecop_list,
4479           .load_dest = true,
4480           .vece = MO_8 },
4481         { .fniv = gen_uaba_vec,
4482           .fno = gen_helper_gvec_uaba_h,
4483           .opt_opc = vecop_list,
4484           .load_dest = true,
4485           .vece = MO_16 },
4486         { .fni4 = gen_uaba_i32,
4487           .fniv = gen_uaba_vec,
4488           .fno = gen_helper_gvec_uaba_s,
4489           .opt_opc = vecop_list,
4490           .load_dest = true,
4491           .vece = MO_32 },
4492         { .fni8 = gen_uaba_i64,
4493           .fniv = gen_uaba_vec,
4494           .fno = gen_helper_gvec_uaba_d,
4495           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4496           .opt_opc = vecop_list,
4497           .load_dest = true,
4498           .vece = MO_64 },
4499     };
4500     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4501 }
4502 
4503 static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
4504 {
4505     static const uint16_t mask[3] = {
4506         0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
4507         0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
4508         0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
4509     };
4510 
4511     if (crn >= 9 && crn <= 11) {
4512         return (mask[crn - 9] >> crm) & 1;
4513     }
4514     return false;
4515 }
4516 
4517 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4518                            int opc1, int crn, int crm, int opc2,
4519                            bool isread, int rt, int rt2)
4520 {
4521     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4522     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4523     TCGv_ptr tcg_ri = NULL;
4524     bool need_exit_tb = false;
4525     uint32_t syndrome;
4526 
4527     /*
4528      * Note that since we are an implementation which takes an
4529      * exception on a trapped conditional instruction only if the
4530      * instruction passes its condition code check, we can take
4531      * advantage of the clause in the ARM ARM that allows us to set
4532      * the COND field in the instruction to 0xE in all cases.
4533      * We could fish the actual condition out of the insn (ARM)
4534      * or the condexec bits (Thumb) but it isn't necessary.
4535      */
4536     switch (cpnum) {
4537     case 14:
4538         if (is64) {
4539             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4540                                          isread, false);
4541         } else {
4542             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4543                                         rt, isread, false);
4544         }
4545         break;
4546     case 15:
4547         if (is64) {
4548             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4549                                          isread, false);
4550         } else {
4551             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4552                                         rt, isread, false);
4553         }
4554         break;
4555     default:
4556         /*
4557          * ARMv8 defines that only coprocessors 14 and 15 exist,
4558          * so this can only happen if this is an ARMv7 or earlier CPU,
4559          * in which case the syndrome information won't actually be
4560          * guest visible.
4561          */
4562         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4563         syndrome = syn_uncategorized();
4564         break;
4565     }
4566 
4567     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4568         /*
4569          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4570          * over the UNDEF for "no such register" or the UNDEF for "access
4571          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4572          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4573          * access_check_cp_reg(), after the checks for whether the access
4574          * configurably trapped to EL1.
4575          */
4576         uint32_t maskbit = is64 ? crm : crn;
4577 
4578         if (maskbit != 4 && maskbit != 14) {
4579             /* T4 and T14 are RES0 so never cause traps */
4580             TCGv_i32 t;
4581             DisasLabel over = gen_disas_label(s);
4582 
4583             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4584             tcg_gen_andi_i32(t, t, 1u << maskbit);
4585             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4586 
4587             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4588             /*
4589              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4590              * but since we're conditionally branching over it, we want
4591              * to assume continue-to-next-instruction.
4592              */
4593             s->base.is_jmp = DISAS_NEXT;
4594             set_disas_label(s, over);
4595         }
4596     }
4597 
4598     if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
4599         /*
4600          * Check for TIDCP trap, which must take precedence over the UNDEF
4601          * for "no such register" etc.  It shares precedence with HSTR,
4602          * but raises the same exception, so order doesn't matter.
4603          */
4604         switch (s->current_el) {
4605         case 0:
4606             if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
4607                 && dc_isar_feature(aa64_tidcp1, s)) {
4608                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
4609             }
4610             break;
4611         case 1:
4612             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
4613             break;
4614         }
4615     }
4616 
4617     if (!ri) {
4618         /*
4619          * Unknown register; this might be a guest error or a QEMU
4620          * unimplemented feature.
4621          */
4622         if (is64) {
4623             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4624                           "64 bit system register cp:%d opc1: %d crm:%d "
4625                           "(%s)\n",
4626                           isread ? "read" : "write", cpnum, opc1, crm,
4627                           s->ns ? "non-secure" : "secure");
4628         } else {
4629             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4630                           "system register cp:%d opc1:%d crn:%d crm:%d "
4631                           "opc2:%d (%s)\n",
4632                           isread ? "read" : "write", cpnum, opc1, crn,
4633                           crm, opc2, s->ns ? "non-secure" : "secure");
4634         }
4635         unallocated_encoding(s);
4636         return;
4637     }
4638 
4639     /* Check access permissions */
4640     if (!cp_access_ok(s->current_el, ri, isread)) {
4641         unallocated_encoding(s);
4642         return;
4643     }
4644 
4645     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4646         (ri->fgt && s->fgt_active) ||
4647         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4648         /*
4649          * Emit code to perform further access permissions checks at
4650          * runtime; this may result in an exception.
4651          * Note that on XScale all cp0..c13 registers do an access check
4652          * call in order to handle c15_cpar.
4653          */
4654         gen_set_condexec(s);
4655         gen_update_pc(s, 0);
4656         tcg_ri = tcg_temp_new_ptr();
4657         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
4658                                        tcg_constant_i32(key),
4659                                        tcg_constant_i32(syndrome),
4660                                        tcg_constant_i32(isread));
4661     } else if (ri->type & ARM_CP_RAISES_EXC) {
4662         /*
4663          * The readfn or writefn might raise an exception;
4664          * synchronize the CPU state in case it does.
4665          */
4666         gen_set_condexec(s);
4667         gen_update_pc(s, 0);
4668     }
4669 
4670     /* Handle special cases first */
4671     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4672     case 0:
4673         break;
4674     case ARM_CP_NOP:
4675         return;
4676     case ARM_CP_WFI:
4677         if (isread) {
4678             unallocated_encoding(s);
4679         } else {
4680             gen_update_pc(s, curr_insn_len(s));
4681             s->base.is_jmp = DISAS_WFI;
4682         }
4683         return;
4684     default:
4685         g_assert_not_reached();
4686     }
4687 
4688     if (ri->type & ARM_CP_IO) {
4689         /* I/O operations must end the TB here (whether read or write) */
4690         need_exit_tb = translator_io_start(&s->base);
4691     }
4692 
4693     if (isread) {
4694         /* Read */
4695         if (is64) {
4696             TCGv_i64 tmp64;
4697             TCGv_i32 tmp;
4698             if (ri->type & ARM_CP_CONST) {
4699                 tmp64 = tcg_constant_i64(ri->resetvalue);
4700             } else if (ri->readfn) {
4701                 if (!tcg_ri) {
4702                     tcg_ri = gen_lookup_cp_reg(key);
4703                 }
4704                 tmp64 = tcg_temp_new_i64();
4705                 gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri);
4706             } else {
4707                 tmp64 = tcg_temp_new_i64();
4708                 tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset);
4709             }
4710             tmp = tcg_temp_new_i32();
4711             tcg_gen_extrl_i64_i32(tmp, tmp64);
4712             store_reg(s, rt, tmp);
4713             tmp = tcg_temp_new_i32();
4714             tcg_gen_extrh_i64_i32(tmp, tmp64);
4715             store_reg(s, rt2, tmp);
4716         } else {
4717             TCGv_i32 tmp;
4718             if (ri->type & ARM_CP_CONST) {
4719                 tmp = tcg_constant_i32(ri->resetvalue);
4720             } else if (ri->readfn) {
4721                 if (!tcg_ri) {
4722                     tcg_ri = gen_lookup_cp_reg(key);
4723                 }
4724                 tmp = tcg_temp_new_i32();
4725                 gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri);
4726             } else {
4727                 tmp = load_cpu_offset(ri->fieldoffset);
4728             }
4729             if (rt == 15) {
4730                 /* Destination register of r15 for 32 bit loads sets
4731                  * the condition codes from the high 4 bits of the value
4732                  */
4733                 gen_set_nzcv(tmp);
4734             } else {
4735                 store_reg(s, rt, tmp);
4736             }
4737         }
4738     } else {
4739         /* Write */
4740         if (ri->type & ARM_CP_CONST) {
4741             /* If not forbidden by access permissions, treat as WI */
4742             return;
4743         }
4744 
4745         if (is64) {
4746             TCGv_i32 tmplo, tmphi;
4747             TCGv_i64 tmp64 = tcg_temp_new_i64();
4748             tmplo = load_reg(s, rt);
4749             tmphi = load_reg(s, rt2);
4750             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4751             if (ri->writefn) {
4752                 if (!tcg_ri) {
4753                     tcg_ri = gen_lookup_cp_reg(key);
4754                 }
4755                 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64);
4756             } else {
4757                 tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset);
4758             }
4759         } else {
4760             TCGv_i32 tmp = load_reg(s, rt);
4761             if (ri->writefn) {
4762                 if (!tcg_ri) {
4763                     tcg_ri = gen_lookup_cp_reg(key);
4764                 }
4765                 gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp);
4766             } else {
4767                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4768             }
4769         }
4770     }
4771 
4772     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4773         /*
4774          * A write to any coprocessor register that ends a TB
4775          * must rebuild the hflags for the next TB.
4776          */
4777         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4778         /*
4779          * We default to ending the TB on a coprocessor register write,
4780          * but allow this to be suppressed by the register definition
4781          * (usually only necessary to work around guest bugs).
4782          */
4783         need_exit_tb = true;
4784     }
4785     if (need_exit_tb) {
4786         gen_lookup_tb(s);
4787     }
4788 }
4789 
4790 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4791 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4792 {
4793     int cpnum = (insn >> 8) & 0xf;
4794 
4795     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4796         unallocated_encoding(s);
4797     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4798         if (disas_iwmmxt_insn(s, insn)) {
4799             unallocated_encoding(s);
4800         }
4801     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4802         if (disas_dsp_insn(s, insn)) {
4803             unallocated_encoding(s);
4804         }
4805     }
4806 }
4807 
4808 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4809 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4810 {
4811     TCGv_i32 tmp;
4812     tmp = tcg_temp_new_i32();
4813     tcg_gen_extrl_i64_i32(tmp, val);
4814     store_reg(s, rlow, tmp);
4815     tmp = tcg_temp_new_i32();
4816     tcg_gen_extrh_i64_i32(tmp, val);
4817     store_reg(s, rhigh, tmp);
4818 }
4819 
4820 /* load and add a 64-bit value from a register pair.  */
4821 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4822 {
4823     TCGv_i64 tmp;
4824     TCGv_i32 tmpl;
4825     TCGv_i32 tmph;
4826 
4827     /* Load 64-bit value rd:rn.  */
4828     tmpl = load_reg(s, rlow);
4829     tmph = load_reg(s, rhigh);
4830     tmp = tcg_temp_new_i64();
4831     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4832     tcg_gen_add_i64(val, val, tmp);
4833 }
4834 
4835 /* Set N and Z flags from hi|lo.  */
4836 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4837 {
4838     tcg_gen_mov_i32(cpu_NF, hi);
4839     tcg_gen_or_i32(cpu_ZF, lo, hi);
4840 }
4841 
4842 /* Load/Store exclusive instructions are implemented by remembering
4843    the value/address loaded, and seeing if these are the same
4844    when the store is performed.  This should be sufficient to implement
4845    the architecturally mandated semantics, and avoids having to monitor
4846    regular stores.  The compare vs the remembered value is done during
4847    the cmpxchg operation, but we must compare the addresses manually.  */
4848 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4849                                TCGv_i32 addr, int size)
4850 {
4851     TCGv_i32 tmp = tcg_temp_new_i32();
4852     MemOp opc = size | MO_ALIGN | s->be_data;
4853 
4854     s->is_ldex = true;
4855 
4856     if (size == 3) {
4857         TCGv_i32 tmp2 = tcg_temp_new_i32();
4858         TCGv_i64 t64 = tcg_temp_new_i64();
4859 
4860         /*
4861          * For AArch32, architecturally the 32-bit word at the lowest
4862          * address is always Rt and the one at addr+4 is Rt2, even if
4863          * the CPU is big-endian. That means we don't want to do a
4864          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4865          * architecturally 64-bit access, but instead do a 64-bit access
4866          * using MO_BE if appropriate and then split the two halves.
4867          */
4868         TCGv taddr = gen_aa32_addr(s, addr, opc);
4869 
4870         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4871         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4872         if (s->be_data == MO_BE) {
4873             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4874         } else {
4875             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4876         }
4877         store_reg(s, rt2, tmp2);
4878     } else {
4879         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4880         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4881     }
4882 
4883     store_reg(s, rt, tmp);
4884     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4885 }
4886 
4887 static void gen_clrex(DisasContext *s)
4888 {
4889     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4890 }
4891 
4892 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4893                                 TCGv_i32 addr, int size)
4894 {
4895     TCGv_i32 t0, t1, t2;
4896     TCGv_i64 extaddr;
4897     TCGv taddr;
4898     TCGLabel *done_label;
4899     TCGLabel *fail_label;
4900     MemOp opc = size | MO_ALIGN | s->be_data;
4901 
4902     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4903          [addr] = {Rt};
4904          {Rd} = 0;
4905        } else {
4906          {Rd} = 1;
4907        } */
4908     fail_label = gen_new_label();
4909     done_label = gen_new_label();
4910     extaddr = tcg_temp_new_i64();
4911     tcg_gen_extu_i32_i64(extaddr, addr);
4912     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4913 
4914     taddr = gen_aa32_addr(s, addr, opc);
4915     t0 = tcg_temp_new_i32();
4916     t1 = load_reg(s, rt);
4917     if (size == 3) {
4918         TCGv_i64 o64 = tcg_temp_new_i64();
4919         TCGv_i64 n64 = tcg_temp_new_i64();
4920 
4921         t2 = load_reg(s, rt2);
4922 
4923         /*
4924          * For AArch32, architecturally the 32-bit word at the lowest
4925          * address is always Rt and the one at addr+4 is Rt2, even if
4926          * the CPU is big-endian. Since we're going to treat this as a
4927          * single 64-bit BE store, we need to put the two halves in the
4928          * opposite order for BE to LE, so that they end up in the right
4929          * places.  We don't want gen_aa32_st_i64, because that checks
4930          * SCTLR_B as if for an architectural 64-bit access.
4931          */
4932         if (s->be_data == MO_BE) {
4933             tcg_gen_concat_i32_i64(n64, t2, t1);
4934         } else {
4935             tcg_gen_concat_i32_i64(n64, t1, t2);
4936         }
4937 
4938         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4939                                    get_mem_index(s), opc);
4940 
4941         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4942         tcg_gen_extrl_i64_i32(t0, o64);
4943     } else {
4944         t2 = tcg_temp_new_i32();
4945         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4946         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4947         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4948     }
4949     tcg_gen_mov_i32(cpu_R[rd], t0);
4950     tcg_gen_br(done_label);
4951 
4952     gen_set_label(fail_label);
4953     tcg_gen_movi_i32(cpu_R[rd], 1);
4954     gen_set_label(done_label);
4955     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4956 }
4957 
4958 /* gen_srs:
4959  * @env: CPUARMState
4960  * @s: DisasContext
4961  * @mode: mode field from insn (which stack to store to)
4962  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4963  * @writeback: true if writeback bit set
4964  *
4965  * Generate code for the SRS (Store Return State) insn.
4966  */
4967 static void gen_srs(DisasContext *s,
4968                     uint32_t mode, uint32_t amode, bool writeback)
4969 {
4970     int32_t offset;
4971     TCGv_i32 addr, tmp;
4972     bool undef = false;
4973 
4974     /* SRS is:
4975      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4976      *   and specified mode is monitor mode
4977      * - UNDEFINED in Hyp mode
4978      * - UNPREDICTABLE in User or System mode
4979      * - UNPREDICTABLE if the specified mode is:
4980      * -- not implemented
4981      * -- not a valid mode number
4982      * -- a mode that's at a higher exception level
4983      * -- Monitor, if we are Non-secure
4984      * For the UNPREDICTABLE cases we choose to UNDEF.
4985      */
4986     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4987         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4988         return;
4989     }
4990 
4991     if (s->current_el == 0 || s->current_el == 2) {
4992         undef = true;
4993     }
4994 
4995     switch (mode) {
4996     case ARM_CPU_MODE_USR:
4997     case ARM_CPU_MODE_FIQ:
4998     case ARM_CPU_MODE_IRQ:
4999     case ARM_CPU_MODE_SVC:
5000     case ARM_CPU_MODE_ABT:
5001     case ARM_CPU_MODE_UND:
5002     case ARM_CPU_MODE_SYS:
5003         break;
5004     case ARM_CPU_MODE_HYP:
5005         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5006             undef = true;
5007         }
5008         break;
5009     case ARM_CPU_MODE_MON:
5010         /* No need to check specifically for "are we non-secure" because
5011          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5012          * so if this isn't EL3 then we must be non-secure.
5013          */
5014         if (s->current_el != 3) {
5015             undef = true;
5016         }
5017         break;
5018     default:
5019         undef = true;
5020     }
5021 
5022     if (undef) {
5023         unallocated_encoding(s);
5024         return;
5025     }
5026 
5027     addr = tcg_temp_new_i32();
5028     /* get_r13_banked() will raise an exception if called from System mode */
5029     gen_set_condexec(s);
5030     gen_update_pc(s, 0);
5031     gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode));
5032     switch (amode) {
5033     case 0: /* DA */
5034         offset = -4;
5035         break;
5036     case 1: /* IA */
5037         offset = 0;
5038         break;
5039     case 2: /* DB */
5040         offset = -8;
5041         break;
5042     case 3: /* IB */
5043         offset = 4;
5044         break;
5045     default:
5046         g_assert_not_reached();
5047     }
5048     tcg_gen_addi_i32(addr, addr, offset);
5049     tmp = load_reg(s, 14);
5050     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5051     tmp = load_cpu_field(spsr);
5052     tcg_gen_addi_i32(addr, addr, 4);
5053     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5054     if (writeback) {
5055         switch (amode) {
5056         case 0:
5057             offset = -8;
5058             break;
5059         case 1:
5060             offset = 4;
5061             break;
5062         case 2:
5063             offset = -4;
5064             break;
5065         case 3:
5066             offset = 0;
5067             break;
5068         default:
5069             g_assert_not_reached();
5070         }
5071         tcg_gen_addi_i32(addr, addr, offset);
5072         gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr);
5073     }
5074     s->base.is_jmp = DISAS_UPDATE_EXIT;
5075 }
5076 
5077 /* Skip this instruction if the ARM condition is false */
5078 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5079 {
5080     arm_gen_condlabel(s);
5081     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5082 }
5083 
5084 
5085 /*
5086  * Constant expanders used by T16/T32 decode
5087  */
5088 
5089 /* Return only the rotation part of T32ExpandImm.  */
5090 static int t32_expandimm_rot(DisasContext *s, int x)
5091 {
5092     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5093 }
5094 
5095 /* Return the unrotated immediate from T32ExpandImm.  */
5096 static int t32_expandimm_imm(DisasContext *s, int x)
5097 {
5098     int imm = extract32(x, 0, 8);
5099 
5100     switch (extract32(x, 8, 4)) {
5101     case 0: /* XY */
5102         /* Nothing to do.  */
5103         break;
5104     case 1: /* 00XY00XY */
5105         imm *= 0x00010001;
5106         break;
5107     case 2: /* XY00XY00 */
5108         imm *= 0x01000100;
5109         break;
5110     case 3: /* XYXYXYXY */
5111         imm *= 0x01010101;
5112         break;
5113     default:
5114         /* Rotated constant.  */
5115         imm |= 0x80;
5116         break;
5117     }
5118     return imm;
5119 }
5120 
5121 static int t32_branch24(DisasContext *s, int x)
5122 {
5123     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5124     x ^= !(x < 0) * (3 << 21);
5125     /* Append the final zero.  */
5126     return x << 1;
5127 }
5128 
5129 static int t16_setflags(DisasContext *s)
5130 {
5131     return s->condexec_mask == 0;
5132 }
5133 
5134 static int t16_push_list(DisasContext *s, int x)
5135 {
5136     return (x & 0xff) | (x & 0x100) << (14 - 8);
5137 }
5138 
5139 static int t16_pop_list(DisasContext *s, int x)
5140 {
5141     return (x & 0xff) | (x & 0x100) << (15 - 8);
5142 }
5143 
5144 /*
5145  * Include the generated decoders.
5146  */
5147 
5148 #include "decode-a32.c.inc"
5149 #include "decode-a32-uncond.c.inc"
5150 #include "decode-t32.c.inc"
5151 #include "decode-t16.c.inc"
5152 
5153 static bool valid_cp(DisasContext *s, int cp)
5154 {
5155     /*
5156      * Return true if this coprocessor field indicates something
5157      * that's really a possible coprocessor.
5158      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5159      * and of those only cp14 and cp15 were used for registers.
5160      * cp10 and cp11 were used for VFP and Neon, whose decode is
5161      * dealt with elsewhere. With the advent of fp16, cp9 is also
5162      * now part of VFP.
5163      * For v8A and later, the encoding has been tightened so that
5164      * only cp14 and cp15 are valid, and other values aren't considered
5165      * to be in the coprocessor-instruction space at all. v8M still
5166      * permits coprocessors 0..7.
5167      * For XScale, we must not decode the XScale cp0, cp1 space as
5168      * a standard coprocessor insn, because we want to fall through to
5169      * the legacy disas_xscale_insn() decoder after decodetree is done.
5170      */
5171     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5172         return false;
5173     }
5174 
5175     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5176         !arm_dc_feature(s, ARM_FEATURE_M)) {
5177         return cp >= 14;
5178     }
5179     return cp < 8 || cp >= 14;
5180 }
5181 
5182 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5183 {
5184     if (!valid_cp(s, a->cp)) {
5185         return false;
5186     }
5187     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5188                    false, a->rt, 0);
5189     return true;
5190 }
5191 
5192 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5193 {
5194     if (!valid_cp(s, a->cp)) {
5195         return false;
5196     }
5197     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5198                    true, a->rt, 0);
5199     return true;
5200 }
5201 
5202 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5203 {
5204     if (!valid_cp(s, a->cp)) {
5205         return false;
5206     }
5207     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5208                    false, a->rt, a->rt2);
5209     return true;
5210 }
5211 
5212 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5213 {
5214     if (!valid_cp(s, a->cp)) {
5215         return false;
5216     }
5217     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5218                    true, a->rt, a->rt2);
5219     return true;
5220 }
5221 
5222 /* Helpers to swap operands for reverse-subtract.  */
5223 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5224 {
5225     tcg_gen_sub_i32(dst, b, a);
5226 }
5227 
5228 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5229 {
5230     gen_sub_CC(dst, b, a);
5231 }
5232 
5233 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5234 {
5235     gen_sub_carry(dest, b, a);
5236 }
5237 
5238 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5239 {
5240     gen_sbc_CC(dest, b, a);
5241 }
5242 
5243 /*
5244  * Helpers for the data processing routines.
5245  *
5246  * After the computation store the results back.
5247  * This may be suppressed altogether (STREG_NONE), require a runtime
5248  * check against the stack limits (STREG_SP_CHECK), or generate an
5249  * exception return.  Oh, or store into a register.
5250  *
5251  * Always return true, indicating success for a trans_* function.
5252  */
5253 typedef enum {
5254    STREG_NONE,
5255    STREG_NORMAL,
5256    STREG_SP_CHECK,
5257    STREG_EXC_RET,
5258 } StoreRegKind;
5259 
5260 static bool store_reg_kind(DisasContext *s, int rd,
5261                             TCGv_i32 val, StoreRegKind kind)
5262 {
5263     switch (kind) {
5264     case STREG_NONE:
5265         return true;
5266     case STREG_NORMAL:
5267         /* See ALUWritePC: Interworking only from a32 mode. */
5268         if (s->thumb) {
5269             store_reg(s, rd, val);
5270         } else {
5271             store_reg_bx(s, rd, val);
5272         }
5273         return true;
5274     case STREG_SP_CHECK:
5275         store_sp_checked(s, val);
5276         return true;
5277     case STREG_EXC_RET:
5278         gen_exception_return(s, val);
5279         return true;
5280     }
5281     g_assert_not_reached();
5282 }
5283 
5284 /*
5285  * Data Processing (register)
5286  *
5287  * Operate, with set flags, one register source,
5288  * one immediate shifted register source, and a destination.
5289  */
5290 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5291                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5292                          int logic_cc, StoreRegKind kind)
5293 {
5294     TCGv_i32 tmp1, tmp2;
5295 
5296     tmp2 = load_reg(s, a->rm);
5297     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5298     tmp1 = load_reg(s, a->rn);
5299 
5300     gen(tmp1, tmp1, tmp2);
5301 
5302     if (logic_cc) {
5303         gen_logic_CC(tmp1);
5304     }
5305     return store_reg_kind(s, a->rd, tmp1, kind);
5306 }
5307 
5308 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5309                          void (*gen)(TCGv_i32, TCGv_i32),
5310                          int logic_cc, StoreRegKind kind)
5311 {
5312     TCGv_i32 tmp;
5313 
5314     tmp = load_reg(s, a->rm);
5315     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5316 
5317     gen(tmp, tmp);
5318     if (logic_cc) {
5319         gen_logic_CC(tmp);
5320     }
5321     return store_reg_kind(s, a->rd, tmp, kind);
5322 }
5323 
5324 /*
5325  * Data-processing (register-shifted register)
5326  *
5327  * Operate, with set flags, one register source,
5328  * one register shifted register source, and a destination.
5329  */
5330 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5331                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5332                          int logic_cc, StoreRegKind kind)
5333 {
5334     TCGv_i32 tmp1, tmp2;
5335 
5336     tmp1 = load_reg(s, a->rs);
5337     tmp2 = load_reg(s, a->rm);
5338     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5339     tmp1 = load_reg(s, a->rn);
5340 
5341     gen(tmp1, tmp1, tmp2);
5342 
5343     if (logic_cc) {
5344         gen_logic_CC(tmp1);
5345     }
5346     return store_reg_kind(s, a->rd, tmp1, kind);
5347 }
5348 
5349 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5350                          void (*gen)(TCGv_i32, TCGv_i32),
5351                          int logic_cc, StoreRegKind kind)
5352 {
5353     TCGv_i32 tmp1, tmp2;
5354 
5355     tmp1 = load_reg(s, a->rs);
5356     tmp2 = load_reg(s, a->rm);
5357     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5358 
5359     gen(tmp2, tmp2);
5360     if (logic_cc) {
5361         gen_logic_CC(tmp2);
5362     }
5363     return store_reg_kind(s, a->rd, tmp2, kind);
5364 }
5365 
5366 /*
5367  * Data-processing (immediate)
5368  *
5369  * Operate, with set flags, one register source,
5370  * one rotated immediate, and a destination.
5371  *
5372  * Note that logic_cc && a->rot setting CF based on the msb of the
5373  * immediate is the reason why we must pass in the unrotated form
5374  * of the immediate.
5375  */
5376 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5377                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5378                          int logic_cc, StoreRegKind kind)
5379 {
5380     TCGv_i32 tmp1;
5381     uint32_t imm;
5382 
5383     imm = ror32(a->imm, a->rot);
5384     if (logic_cc && a->rot) {
5385         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5386     }
5387     tmp1 = load_reg(s, a->rn);
5388 
5389     gen(tmp1, tmp1, tcg_constant_i32(imm));
5390 
5391     if (logic_cc) {
5392         gen_logic_CC(tmp1);
5393     }
5394     return store_reg_kind(s, a->rd, tmp1, kind);
5395 }
5396 
5397 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5398                          void (*gen)(TCGv_i32, TCGv_i32),
5399                          int logic_cc, StoreRegKind kind)
5400 {
5401     TCGv_i32 tmp;
5402     uint32_t imm;
5403 
5404     imm = ror32(a->imm, a->rot);
5405     if (logic_cc && a->rot) {
5406         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5407     }
5408 
5409     tmp = tcg_temp_new_i32();
5410     gen(tmp, tcg_constant_i32(imm));
5411 
5412     if (logic_cc) {
5413         gen_logic_CC(tmp);
5414     }
5415     return store_reg_kind(s, a->rd, tmp, kind);
5416 }
5417 
5418 #define DO_ANY3(NAME, OP, L, K)                                         \
5419     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5420     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5421     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5422     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5423     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5424     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5425 
5426 #define DO_ANY2(NAME, OP, L, K)                                         \
5427     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5428     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5429     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5430     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5431     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5432     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5433 
5434 #define DO_CMP2(NAME, OP, L)                                            \
5435     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5436     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5437     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5438     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5439     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5440     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5441 
5442 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5443 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5444 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5445 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5446 
5447 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5448 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5449 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5450 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5451 
5452 DO_CMP2(TST, tcg_gen_and_i32, true)
5453 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5454 DO_CMP2(CMN, gen_add_CC, false)
5455 DO_CMP2(CMP, gen_sub_CC, false)
5456 
5457 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5458         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5459 
5460 /*
5461  * Note for the computation of StoreRegKind we return out of the
5462  * middle of the functions that are expanded by DO_ANY3, and that
5463  * we modify a->s via that parameter before it is used by OP.
5464  */
5465 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5466         ({
5467             StoreRegKind ret = STREG_NORMAL;
5468             if (a->rd == 15 && a->s) {
5469                 /*
5470                  * See ALUExceptionReturn:
5471                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5472                  * In Hyp mode, UNDEFINED.
5473                  */
5474                 if (IS_USER(s) || s->current_el == 2) {
5475                     unallocated_encoding(s);
5476                     return true;
5477                 }
5478                 /* There is no writeback of nzcv to PSTATE.  */
5479                 a->s = 0;
5480                 ret = STREG_EXC_RET;
5481             } else if (a->rd == 13 && a->rn == 13) {
5482                 ret = STREG_SP_CHECK;
5483             }
5484             ret;
5485         }))
5486 
5487 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5488         ({
5489             StoreRegKind ret = STREG_NORMAL;
5490             if (a->rd == 15 && a->s) {
5491                 /*
5492                  * See ALUExceptionReturn:
5493                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5494                  * In Hyp mode, UNDEFINED.
5495                  */
5496                 if (IS_USER(s) || s->current_el == 2) {
5497                     unallocated_encoding(s);
5498                     return true;
5499                 }
5500                 /* There is no writeback of nzcv to PSTATE.  */
5501                 a->s = 0;
5502                 ret = STREG_EXC_RET;
5503             } else if (a->rd == 13) {
5504                 ret = STREG_SP_CHECK;
5505             }
5506             ret;
5507         }))
5508 
5509 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5510 
5511 /*
5512  * ORN is only available with T32, so there is no register-shifted-register
5513  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5514  */
5515 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5516 {
5517     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5518 }
5519 
5520 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5521 {
5522     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5523 }
5524 
5525 #undef DO_ANY3
5526 #undef DO_ANY2
5527 #undef DO_CMP2
5528 
5529 static bool trans_ADR(DisasContext *s, arg_ri *a)
5530 {
5531     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5532     return true;
5533 }
5534 
5535 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5536 {
5537     if (!ENABLE_ARCH_6T2) {
5538         return false;
5539     }
5540 
5541     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5542     return true;
5543 }
5544 
5545 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5546 {
5547     TCGv_i32 tmp;
5548 
5549     if (!ENABLE_ARCH_6T2) {
5550         return false;
5551     }
5552 
5553     tmp = load_reg(s, a->rd);
5554     tcg_gen_ext16u_i32(tmp, tmp);
5555     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5556     store_reg(s, a->rd, tmp);
5557     return true;
5558 }
5559 
5560 /*
5561  * v8.1M MVE wide-shifts
5562  */
5563 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5564                           WideShiftImmFn *fn)
5565 {
5566     TCGv_i64 rda;
5567     TCGv_i32 rdalo, rdahi;
5568 
5569     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5570         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5571         return false;
5572     }
5573     if (a->rdahi == 15) {
5574         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5575         return false;
5576     }
5577     if (!dc_isar_feature(aa32_mve, s) ||
5578         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5579         a->rdahi == 13) {
5580         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5581         unallocated_encoding(s);
5582         return true;
5583     }
5584 
5585     if (a->shim == 0) {
5586         a->shim = 32;
5587     }
5588 
5589     rda = tcg_temp_new_i64();
5590     rdalo = load_reg(s, a->rdalo);
5591     rdahi = load_reg(s, a->rdahi);
5592     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5593 
5594     fn(rda, rda, a->shim);
5595 
5596     tcg_gen_extrl_i64_i32(rdalo, rda);
5597     tcg_gen_extrh_i64_i32(rdahi, rda);
5598     store_reg(s, a->rdalo, rdalo);
5599     store_reg(s, a->rdahi, rdahi);
5600 
5601     return true;
5602 }
5603 
5604 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5605 {
5606     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5607 }
5608 
5609 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5610 {
5611     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5612 }
5613 
5614 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5615 {
5616     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5617 }
5618 
5619 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5620 {
5621     gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift));
5622 }
5623 
5624 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5625 {
5626     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5627 }
5628 
5629 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5630 {
5631     gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift));
5632 }
5633 
5634 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5635 {
5636     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5637 }
5638 
5639 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5640 {
5641     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5642 }
5643 
5644 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5645 {
5646     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5647 }
5648 
5649 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5650 {
5651     TCGv_i64 rda;
5652     TCGv_i32 rdalo, rdahi;
5653 
5654     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5655         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5656         return false;
5657     }
5658     if (a->rdahi == 15) {
5659         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5660         return false;
5661     }
5662     if (!dc_isar_feature(aa32_mve, s) ||
5663         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5664         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5665         a->rm == a->rdahi || a->rm == a->rdalo) {
5666         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5667         unallocated_encoding(s);
5668         return true;
5669     }
5670 
5671     rda = tcg_temp_new_i64();
5672     rdalo = load_reg(s, a->rdalo);
5673     rdahi = load_reg(s, a->rdahi);
5674     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5675 
5676     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5677     fn(rda, tcg_env, rda, cpu_R[a->rm]);
5678 
5679     tcg_gen_extrl_i64_i32(rdalo, rda);
5680     tcg_gen_extrh_i64_i32(rdahi, rda);
5681     store_reg(s, a->rdalo, rdalo);
5682     store_reg(s, a->rdahi, rdahi);
5683 
5684     return true;
5685 }
5686 
5687 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5688 {
5689     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5690 }
5691 
5692 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5693 {
5694     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5695 }
5696 
5697 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5698 {
5699     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5700 }
5701 
5702 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5703 {
5704     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5705 }
5706 
5707 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5708 {
5709     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5710 }
5711 
5712 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5713 {
5714     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5715 }
5716 
5717 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5718 {
5719     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5720         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5721         return false;
5722     }
5723     if (!dc_isar_feature(aa32_mve, s) ||
5724         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5725         a->rda == 13 || a->rda == 15) {
5726         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5727         unallocated_encoding(s);
5728         return true;
5729     }
5730 
5731     if (a->shim == 0) {
5732         a->shim = 32;
5733     }
5734     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5735 
5736     return true;
5737 }
5738 
5739 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5740 {
5741     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5742 }
5743 
5744 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5745 {
5746     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5747 }
5748 
5749 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5750 {
5751     gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift));
5752 }
5753 
5754 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5755 {
5756     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5757 }
5758 
5759 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5760 {
5761     gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift));
5762 }
5763 
5764 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5765 {
5766     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5767 }
5768 
5769 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5770 {
5771     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5772         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5773         return false;
5774     }
5775     if (!dc_isar_feature(aa32_mve, s) ||
5776         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5777         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5778         a->rm == a->rda) {
5779         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5780         unallocated_encoding(s);
5781         return true;
5782     }
5783 
5784     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5785     fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]);
5786     return true;
5787 }
5788 
5789 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5790 {
5791     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5792 }
5793 
5794 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5795 {
5796     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5797 }
5798 
5799 /*
5800  * Multiply and multiply accumulate
5801  */
5802 
5803 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5804 {
5805     TCGv_i32 t1, t2;
5806 
5807     t1 = load_reg(s, a->rn);
5808     t2 = load_reg(s, a->rm);
5809     tcg_gen_mul_i32(t1, t1, t2);
5810     if (add) {
5811         t2 = load_reg(s, a->ra);
5812         tcg_gen_add_i32(t1, t1, t2);
5813     }
5814     if (a->s) {
5815         gen_logic_CC(t1);
5816     }
5817     store_reg(s, a->rd, t1);
5818     return true;
5819 }
5820 
5821 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5822 {
5823     return op_mla(s, a, false);
5824 }
5825 
5826 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5827 {
5828     return op_mla(s, a, true);
5829 }
5830 
5831 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5832 {
5833     TCGv_i32 t1, t2;
5834 
5835     if (!ENABLE_ARCH_6T2) {
5836         return false;
5837     }
5838     t1 = load_reg(s, a->rn);
5839     t2 = load_reg(s, a->rm);
5840     tcg_gen_mul_i32(t1, t1, t2);
5841     t2 = load_reg(s, a->ra);
5842     tcg_gen_sub_i32(t1, t2, t1);
5843     store_reg(s, a->rd, t1);
5844     return true;
5845 }
5846 
5847 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5848 {
5849     TCGv_i32 t0, t1, t2, t3;
5850 
5851     t0 = load_reg(s, a->rm);
5852     t1 = load_reg(s, a->rn);
5853     if (uns) {
5854         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5855     } else {
5856         tcg_gen_muls2_i32(t0, t1, t0, t1);
5857     }
5858     if (add) {
5859         t2 = load_reg(s, a->ra);
5860         t3 = load_reg(s, a->rd);
5861         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5862     }
5863     if (a->s) {
5864         gen_logicq_cc(t0, t1);
5865     }
5866     store_reg(s, a->ra, t0);
5867     store_reg(s, a->rd, t1);
5868     return true;
5869 }
5870 
5871 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5872 {
5873     return op_mlal(s, a, true, false);
5874 }
5875 
5876 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5877 {
5878     return op_mlal(s, a, false, false);
5879 }
5880 
5881 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5882 {
5883     return op_mlal(s, a, true, true);
5884 }
5885 
5886 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5887 {
5888     return op_mlal(s, a, false, true);
5889 }
5890 
5891 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5892 {
5893     TCGv_i32 t0, t1, t2, zero;
5894 
5895     if (s->thumb
5896         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5897         : !ENABLE_ARCH_6) {
5898         return false;
5899     }
5900 
5901     t0 = load_reg(s, a->rm);
5902     t1 = load_reg(s, a->rn);
5903     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5904     zero = tcg_constant_i32(0);
5905     t2 = load_reg(s, a->ra);
5906     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5907     t2 = load_reg(s, a->rd);
5908     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5909     store_reg(s, a->ra, t0);
5910     store_reg(s, a->rd, t1);
5911     return true;
5912 }
5913 
5914 /*
5915  * Saturating addition and subtraction
5916  */
5917 
5918 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5919 {
5920     TCGv_i32 t0, t1;
5921 
5922     if (s->thumb
5923         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5924         : !ENABLE_ARCH_5TE) {
5925         return false;
5926     }
5927 
5928     t0 = load_reg(s, a->rm);
5929     t1 = load_reg(s, a->rn);
5930     if (doub) {
5931         gen_helper_add_saturate(t1, tcg_env, t1, t1);
5932     }
5933     if (add) {
5934         gen_helper_add_saturate(t0, tcg_env, t0, t1);
5935     } else {
5936         gen_helper_sub_saturate(t0, tcg_env, t0, t1);
5937     }
5938     store_reg(s, a->rd, t0);
5939     return true;
5940 }
5941 
5942 #define DO_QADDSUB(NAME, ADD, DOUB) \
5943 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5944 {                                                        \
5945     return op_qaddsub(s, a, ADD, DOUB);                  \
5946 }
5947 
5948 DO_QADDSUB(QADD, true, false)
5949 DO_QADDSUB(QSUB, false, false)
5950 DO_QADDSUB(QDADD, true, true)
5951 DO_QADDSUB(QDSUB, false, true)
5952 
5953 #undef DO_QADDSUB
5954 
5955 /*
5956  * Halfword multiply and multiply accumulate
5957  */
5958 
5959 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5960                        int add_long, bool nt, bool mt)
5961 {
5962     TCGv_i32 t0, t1, tl, th;
5963 
5964     if (s->thumb
5965         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5966         : !ENABLE_ARCH_5TE) {
5967         return false;
5968     }
5969 
5970     t0 = load_reg(s, a->rn);
5971     t1 = load_reg(s, a->rm);
5972     gen_mulxy(t0, t1, nt, mt);
5973 
5974     switch (add_long) {
5975     case 0:
5976         store_reg(s, a->rd, t0);
5977         break;
5978     case 1:
5979         t1 = load_reg(s, a->ra);
5980         gen_helper_add_setq(t0, tcg_env, t0, t1);
5981         store_reg(s, a->rd, t0);
5982         break;
5983     case 2:
5984         tl = load_reg(s, a->ra);
5985         th = load_reg(s, a->rd);
5986         /* Sign-extend the 32-bit product to 64 bits.  */
5987         t1 = tcg_temp_new_i32();
5988         tcg_gen_sari_i32(t1, t0, 31);
5989         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5990         store_reg(s, a->ra, tl);
5991         store_reg(s, a->rd, th);
5992         break;
5993     default:
5994         g_assert_not_reached();
5995     }
5996     return true;
5997 }
5998 
5999 #define DO_SMLAX(NAME, add, nt, mt) \
6000 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6001 {                                                          \
6002     return op_smlaxxx(s, a, add, nt, mt);                  \
6003 }
6004 
6005 DO_SMLAX(SMULBB, 0, 0, 0)
6006 DO_SMLAX(SMULBT, 0, 0, 1)
6007 DO_SMLAX(SMULTB, 0, 1, 0)
6008 DO_SMLAX(SMULTT, 0, 1, 1)
6009 
6010 DO_SMLAX(SMLABB, 1, 0, 0)
6011 DO_SMLAX(SMLABT, 1, 0, 1)
6012 DO_SMLAX(SMLATB, 1, 1, 0)
6013 DO_SMLAX(SMLATT, 1, 1, 1)
6014 
6015 DO_SMLAX(SMLALBB, 2, 0, 0)
6016 DO_SMLAX(SMLALBT, 2, 0, 1)
6017 DO_SMLAX(SMLALTB, 2, 1, 0)
6018 DO_SMLAX(SMLALTT, 2, 1, 1)
6019 
6020 #undef DO_SMLAX
6021 
6022 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6023 {
6024     TCGv_i32 t0, t1;
6025 
6026     if (!ENABLE_ARCH_5TE) {
6027         return false;
6028     }
6029 
6030     t0 = load_reg(s, a->rn);
6031     t1 = load_reg(s, a->rm);
6032     /*
6033      * Since the nominal result is product<47:16>, shift the 16-bit
6034      * input up by 16 bits, so that the result is at product<63:32>.
6035      */
6036     if (mt) {
6037         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6038     } else {
6039         tcg_gen_shli_i32(t1, t1, 16);
6040     }
6041     tcg_gen_muls2_i32(t0, t1, t0, t1);
6042     if (add) {
6043         t0 = load_reg(s, a->ra);
6044         gen_helper_add_setq(t1, tcg_env, t1, t0);
6045     }
6046     store_reg(s, a->rd, t1);
6047     return true;
6048 }
6049 
6050 #define DO_SMLAWX(NAME, add, mt) \
6051 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6052 {                                                          \
6053     return op_smlawx(s, a, add, mt);                       \
6054 }
6055 
6056 DO_SMLAWX(SMULWB, 0, 0)
6057 DO_SMLAWX(SMULWT, 0, 1)
6058 DO_SMLAWX(SMLAWB, 1, 0)
6059 DO_SMLAWX(SMLAWT, 1, 1)
6060 
6061 #undef DO_SMLAWX
6062 
6063 /*
6064  * MSR (immediate) and hints
6065  */
6066 
6067 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6068 {
6069     /*
6070      * When running single-threaded TCG code, use the helper to ensure that
6071      * the next round-robin scheduled vCPU gets a crack.  When running in
6072      * MTTCG we don't generate jumps to the helper as it won't affect the
6073      * scheduling of other vCPUs.
6074      */
6075     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6076         gen_update_pc(s, curr_insn_len(s));
6077         s->base.is_jmp = DISAS_YIELD;
6078     }
6079     return true;
6080 }
6081 
6082 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6083 {
6084     /*
6085      * When running single-threaded TCG code, use the helper to ensure that
6086      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6087      * just skip this instruction.  Currently the SEV/SEVL instructions,
6088      * which are *one* of many ways to wake the CPU from WFE, are not
6089      * implemented so we can't sleep like WFI does.
6090      */
6091     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6092         gen_update_pc(s, curr_insn_len(s));
6093         s->base.is_jmp = DISAS_WFE;
6094     }
6095     return true;
6096 }
6097 
6098 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6099 {
6100     /* For WFI, halt the vCPU until an IRQ. */
6101     gen_update_pc(s, curr_insn_len(s));
6102     s->base.is_jmp = DISAS_WFI;
6103     return true;
6104 }
6105 
6106 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6107 {
6108     /*
6109      * For M-profile, minimal-RAS ESB can be a NOP.
6110      * Without RAS, we must implement this as NOP.
6111      */
6112     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6113         /*
6114          * QEMU does not have a source of physical SErrors,
6115          * so we are only concerned with virtual SErrors.
6116          * The pseudocode in the ARM for this case is
6117          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6118          *      AArch32.vESBOperation();
6119          * Most of the condition can be evaluated at translation time.
6120          * Test for EL2 present, and defer test for SEL2 to runtime.
6121          */
6122         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6123             gen_helper_vesb(tcg_env);
6124         }
6125     }
6126     return true;
6127 }
6128 
6129 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6130 {
6131     return true;
6132 }
6133 
6134 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6135 {
6136     uint32_t val = ror32(a->imm, a->rot * 2);
6137     uint32_t mask = msr_mask(s, a->mask, a->r);
6138 
6139     if (gen_set_psr_im(s, mask, a->r, val)) {
6140         unallocated_encoding(s);
6141     }
6142     return true;
6143 }
6144 
6145 /*
6146  * Cyclic Redundancy Check
6147  */
6148 
6149 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6150 {
6151     TCGv_i32 t1, t2, t3;
6152 
6153     if (!dc_isar_feature(aa32_crc32, s)) {
6154         return false;
6155     }
6156 
6157     t1 = load_reg(s, a->rn);
6158     t2 = load_reg(s, a->rm);
6159     switch (sz) {
6160     case MO_8:
6161         gen_uxtb(t2);
6162         break;
6163     case MO_16:
6164         gen_uxth(t2);
6165         break;
6166     case MO_32:
6167         break;
6168     default:
6169         g_assert_not_reached();
6170     }
6171     t3 = tcg_constant_i32(1 << sz);
6172     if (c) {
6173         gen_helper_crc32c(t1, t1, t2, t3);
6174     } else {
6175         gen_helper_crc32(t1, t1, t2, t3);
6176     }
6177     store_reg(s, a->rd, t1);
6178     return true;
6179 }
6180 
6181 #define DO_CRC32(NAME, c, sz) \
6182 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6183     { return op_crc32(s, a, c, sz); }
6184 
6185 DO_CRC32(CRC32B, false, MO_8)
6186 DO_CRC32(CRC32H, false, MO_16)
6187 DO_CRC32(CRC32W, false, MO_32)
6188 DO_CRC32(CRC32CB, true, MO_8)
6189 DO_CRC32(CRC32CH, true, MO_16)
6190 DO_CRC32(CRC32CW, true, MO_32)
6191 
6192 #undef DO_CRC32
6193 
6194 /*
6195  * Miscellaneous instructions
6196  */
6197 
6198 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6199 {
6200     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6201         return false;
6202     }
6203     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6204     return true;
6205 }
6206 
6207 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6208 {
6209     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6210         return false;
6211     }
6212     gen_msr_banked(s, a->r, a->sysm, a->rn);
6213     return true;
6214 }
6215 
6216 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6217 {
6218     TCGv_i32 tmp;
6219 
6220     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6221         return false;
6222     }
6223     if (a->r) {
6224         if (IS_USER(s)) {
6225             unallocated_encoding(s);
6226             return true;
6227         }
6228         tmp = load_cpu_field(spsr);
6229     } else {
6230         tmp = tcg_temp_new_i32();
6231         gen_helper_cpsr_read(tmp, tcg_env);
6232     }
6233     store_reg(s, a->rd, tmp);
6234     return true;
6235 }
6236 
6237 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6238 {
6239     TCGv_i32 tmp;
6240     uint32_t mask = msr_mask(s, a->mask, a->r);
6241 
6242     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6243         return false;
6244     }
6245     tmp = load_reg(s, a->rn);
6246     if (gen_set_psr(s, mask, a->r, tmp)) {
6247         unallocated_encoding(s);
6248     }
6249     return true;
6250 }
6251 
6252 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6253 {
6254     TCGv_i32 tmp;
6255 
6256     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6257         return false;
6258     }
6259     tmp = tcg_temp_new_i32();
6260     gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm));
6261     store_reg(s, a->rd, tmp);
6262     return true;
6263 }
6264 
6265 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6266 {
6267     TCGv_i32 addr, reg;
6268 
6269     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6270         return false;
6271     }
6272     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6273     reg = load_reg(s, a->rn);
6274     gen_helper_v7m_msr(tcg_env, addr, reg);
6275     /* If we wrote to CONTROL, the EL might have changed */
6276     gen_rebuild_hflags(s, true);
6277     gen_lookup_tb(s);
6278     return true;
6279 }
6280 
6281 static bool trans_BX(DisasContext *s, arg_BX *a)
6282 {
6283     if (!ENABLE_ARCH_4T) {
6284         return false;
6285     }
6286     gen_bx_excret(s, load_reg(s, a->rm));
6287     return true;
6288 }
6289 
6290 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6291 {
6292     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6293         return false;
6294     }
6295     /*
6296      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6297      * TBFLAGS bit on a basically-never-happens case, so call a helper
6298      * function to check for the trap and raise the exception if needed
6299      * (passing it the register number for the syndrome value).
6300      * v8A doesn't have this HSTR bit.
6301      */
6302     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6303         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6304         s->current_el < 2 && s->ns) {
6305         gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm));
6306     }
6307     /* Trivial implementation equivalent to bx.  */
6308     gen_bx(s, load_reg(s, a->rm));
6309     return true;
6310 }
6311 
6312 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6313 {
6314     TCGv_i32 tmp;
6315 
6316     if (!ENABLE_ARCH_5) {
6317         return false;
6318     }
6319     tmp = load_reg(s, a->rm);
6320     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6321     gen_bx(s, tmp);
6322     return true;
6323 }
6324 
6325 /*
6326  * BXNS/BLXNS: only exist for v8M with the security extensions,
6327  * and always UNDEF if NonSecure.  We don't implement these in
6328  * the user-only mode either (in theory you can use them from
6329  * Secure User mode but they are too tied in to system emulation).
6330  */
6331 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6332 {
6333     if (!s->v8m_secure || IS_USER_ONLY) {
6334         unallocated_encoding(s);
6335     } else {
6336         gen_bxns(s, a->rm);
6337     }
6338     return true;
6339 }
6340 
6341 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6342 {
6343     if (!s->v8m_secure || IS_USER_ONLY) {
6344         unallocated_encoding(s);
6345     } else {
6346         gen_blxns(s, a->rm);
6347     }
6348     return true;
6349 }
6350 
6351 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6352 {
6353     TCGv_i32 tmp;
6354 
6355     if (!ENABLE_ARCH_5) {
6356         return false;
6357     }
6358     tmp = load_reg(s, a->rm);
6359     tcg_gen_clzi_i32(tmp, tmp, 32);
6360     store_reg(s, a->rd, tmp);
6361     return true;
6362 }
6363 
6364 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6365 {
6366     TCGv_i32 tmp;
6367 
6368     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6369         return false;
6370     }
6371     if (IS_USER(s)) {
6372         unallocated_encoding(s);
6373         return true;
6374     }
6375     if (s->current_el == 2) {
6376         /* ERET from Hyp uses ELR_Hyp, not LR */
6377         tmp = load_cpu_field_low32(elr_el[2]);
6378     } else {
6379         tmp = load_reg(s, 14);
6380     }
6381     gen_exception_return(s, tmp);
6382     return true;
6383 }
6384 
6385 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6386 {
6387     gen_hlt(s, a->imm);
6388     return true;
6389 }
6390 
6391 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6392 {
6393     if (!ENABLE_ARCH_5) {
6394         return false;
6395     }
6396     /* BKPT is OK with ECI set and leaves it untouched */
6397     s->eci_handled = true;
6398     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6399         semihosting_enabled(s->current_el == 0) &&
6400         (a->imm == 0xab)) {
6401         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6402     } else {
6403         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6404     }
6405     return true;
6406 }
6407 
6408 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6409 {
6410     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6411         return false;
6412     }
6413     if (IS_USER(s)) {
6414         unallocated_encoding(s);
6415     } else {
6416         gen_hvc(s, a->imm);
6417     }
6418     return true;
6419 }
6420 
6421 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6422 {
6423     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6424         return false;
6425     }
6426     if (IS_USER(s)) {
6427         unallocated_encoding(s);
6428     } else {
6429         gen_smc(s);
6430     }
6431     return true;
6432 }
6433 
6434 static bool trans_SG(DisasContext *s, arg_SG *a)
6435 {
6436     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6437         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6438         return false;
6439     }
6440     /*
6441      * SG (v8M only)
6442      * The bulk of the behaviour for this instruction is implemented
6443      * in v7m_handle_execute_nsc(), which deals with the insn when
6444      * it is executed by a CPU in non-secure state from memory
6445      * which is Secure & NonSecure-Callable.
6446      * Here we only need to handle the remaining cases:
6447      *  * in NS memory (including the "security extension not
6448      *    implemented" case) : NOP
6449      *  * in S memory but CPU already secure (clear IT bits)
6450      * We know that the attribute for the memory this insn is
6451      * in must match the current CPU state, because otherwise
6452      * get_phys_addr_pmsav8 would have generated an exception.
6453      */
6454     if (s->v8m_secure) {
6455         /* Like the IT insn, we don't need to generate any code */
6456         s->condexec_cond = 0;
6457         s->condexec_mask = 0;
6458     }
6459     return true;
6460 }
6461 
6462 static bool trans_TT(DisasContext *s, arg_TT *a)
6463 {
6464     TCGv_i32 addr, tmp;
6465 
6466     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6467         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6468         return false;
6469     }
6470     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6471         /* We UNDEF for these UNPREDICTABLE cases */
6472         unallocated_encoding(s);
6473         return true;
6474     }
6475     if (a->A && !s->v8m_secure) {
6476         /* This case is UNDEFINED.  */
6477         unallocated_encoding(s);
6478         return true;
6479     }
6480 
6481     addr = load_reg(s, a->rn);
6482     tmp = tcg_temp_new_i32();
6483     gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6484     store_reg(s, a->rd, tmp);
6485     return true;
6486 }
6487 
6488 /*
6489  * Load/store register index
6490  */
6491 
6492 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6493 {
6494     ISSInfo ret;
6495 
6496     /* ISS not valid if writeback */
6497     if (p && !w) {
6498         ret = rd;
6499         if (curr_insn_len(s) == 2) {
6500             ret |= ISSIs16Bit;
6501         }
6502     } else {
6503         ret = ISSInvalid;
6504     }
6505     return ret;
6506 }
6507 
6508 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6509 {
6510     TCGv_i32 addr = load_reg(s, a->rn);
6511 
6512     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6513         gen_helper_v8m_stackcheck(tcg_env, addr);
6514     }
6515 
6516     if (a->p) {
6517         TCGv_i32 ofs = load_reg(s, a->rm);
6518         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6519         if (a->u) {
6520             tcg_gen_add_i32(addr, addr, ofs);
6521         } else {
6522             tcg_gen_sub_i32(addr, addr, ofs);
6523         }
6524     }
6525     return addr;
6526 }
6527 
6528 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6529                             TCGv_i32 addr, int address_offset)
6530 {
6531     if (!a->p) {
6532         TCGv_i32 ofs = load_reg(s, a->rm);
6533         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6534         if (a->u) {
6535             tcg_gen_add_i32(addr, addr, ofs);
6536         } else {
6537             tcg_gen_sub_i32(addr, addr, ofs);
6538         }
6539     } else if (!a->w) {
6540         return;
6541     }
6542     tcg_gen_addi_i32(addr, addr, address_offset);
6543     store_reg(s, a->rn, addr);
6544 }
6545 
6546 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6547                        MemOp mop, int mem_idx)
6548 {
6549     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6550     TCGv_i32 addr, tmp;
6551 
6552     addr = op_addr_rr_pre(s, a);
6553 
6554     tmp = tcg_temp_new_i32();
6555     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6556     disas_set_da_iss(s, mop, issinfo);
6557 
6558     /*
6559      * Perform base writeback before the loaded value to
6560      * ensure correct behavior with overlapping index registers.
6561      */
6562     op_addr_rr_post(s, a, addr, 0);
6563     store_reg_from_load(s, a->rt, tmp);
6564     return true;
6565 }
6566 
6567 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6568                         MemOp mop, int mem_idx)
6569 {
6570     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6571     TCGv_i32 addr, tmp;
6572 
6573     /*
6574      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6575      * is either UNPREDICTABLE or has defined behaviour
6576      */
6577     if (s->thumb && a->rn == 15) {
6578         return false;
6579     }
6580 
6581     addr = op_addr_rr_pre(s, a);
6582 
6583     tmp = load_reg(s, a->rt);
6584     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6585     disas_set_da_iss(s, mop, issinfo);
6586 
6587     op_addr_rr_post(s, a, addr, 0);
6588     return true;
6589 }
6590 
6591 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6592 {
6593     int mem_idx = get_mem_index(s);
6594     TCGv_i32 addr, tmp;
6595 
6596     if (!ENABLE_ARCH_5TE) {
6597         return false;
6598     }
6599     if (a->rt & 1) {
6600         unallocated_encoding(s);
6601         return true;
6602     }
6603     addr = op_addr_rr_pre(s, a);
6604 
6605     tmp = tcg_temp_new_i32();
6606     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6607     store_reg(s, a->rt, tmp);
6608 
6609     tcg_gen_addi_i32(addr, addr, 4);
6610 
6611     tmp = tcg_temp_new_i32();
6612     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6613     store_reg(s, a->rt + 1, tmp);
6614 
6615     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6616     op_addr_rr_post(s, a, addr, -4);
6617     return true;
6618 }
6619 
6620 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6621 {
6622     int mem_idx = get_mem_index(s);
6623     TCGv_i32 addr, tmp;
6624 
6625     if (!ENABLE_ARCH_5TE) {
6626         return false;
6627     }
6628     if (a->rt & 1) {
6629         unallocated_encoding(s);
6630         return true;
6631     }
6632     addr = op_addr_rr_pre(s, a);
6633 
6634     tmp = load_reg(s, a->rt);
6635     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6636 
6637     tcg_gen_addi_i32(addr, addr, 4);
6638 
6639     tmp = load_reg(s, a->rt + 1);
6640     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6641 
6642     op_addr_rr_post(s, a, addr, -4);
6643     return true;
6644 }
6645 
6646 /*
6647  * Load/store immediate index
6648  */
6649 
6650 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6651 {
6652     int ofs = a->imm;
6653 
6654     if (!a->u) {
6655         ofs = -ofs;
6656     }
6657 
6658     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6659         /*
6660          * Stackcheck. Here we know 'addr' is the current SP;
6661          * U is set if we're moving SP up, else down. It is
6662          * UNKNOWN whether the limit check triggers when SP starts
6663          * below the limit and ends up above it; we chose to do so.
6664          */
6665         if (!a->u) {
6666             TCGv_i32 newsp = tcg_temp_new_i32();
6667             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6668             gen_helper_v8m_stackcheck(tcg_env, newsp);
6669         } else {
6670             gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]);
6671         }
6672     }
6673 
6674     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6675 }
6676 
6677 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6678                             TCGv_i32 addr, int address_offset)
6679 {
6680     if (!a->p) {
6681         if (a->u) {
6682             address_offset += a->imm;
6683         } else {
6684             address_offset -= a->imm;
6685         }
6686     } else if (!a->w) {
6687         return;
6688     }
6689     tcg_gen_addi_i32(addr, addr, address_offset);
6690     store_reg(s, a->rn, addr);
6691 }
6692 
6693 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6694                        MemOp mop, int mem_idx)
6695 {
6696     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6697     TCGv_i32 addr, tmp;
6698 
6699     addr = op_addr_ri_pre(s, a);
6700 
6701     tmp = tcg_temp_new_i32();
6702     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6703     disas_set_da_iss(s, mop, issinfo);
6704 
6705     /*
6706      * Perform base writeback before the loaded value to
6707      * ensure correct behavior with overlapping index registers.
6708      */
6709     op_addr_ri_post(s, a, addr, 0);
6710     store_reg_from_load(s, a->rt, tmp);
6711     return true;
6712 }
6713 
6714 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6715                         MemOp mop, int mem_idx)
6716 {
6717     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6718     TCGv_i32 addr, tmp;
6719 
6720     /*
6721      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6722      * is either UNPREDICTABLE or has defined behaviour
6723      */
6724     if (s->thumb && a->rn == 15) {
6725         return false;
6726     }
6727 
6728     addr = op_addr_ri_pre(s, a);
6729 
6730     tmp = load_reg(s, a->rt);
6731     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6732     disas_set_da_iss(s, mop, issinfo);
6733 
6734     op_addr_ri_post(s, a, addr, 0);
6735     return true;
6736 }
6737 
6738 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6739 {
6740     int mem_idx = get_mem_index(s);
6741     TCGv_i32 addr, tmp;
6742 
6743     addr = op_addr_ri_pre(s, a);
6744 
6745     tmp = tcg_temp_new_i32();
6746     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6747     store_reg(s, a->rt, tmp);
6748 
6749     tcg_gen_addi_i32(addr, addr, 4);
6750 
6751     tmp = tcg_temp_new_i32();
6752     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6753     store_reg(s, rt2, tmp);
6754 
6755     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6756     op_addr_ri_post(s, a, addr, -4);
6757     return true;
6758 }
6759 
6760 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6761 {
6762     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6763         return false;
6764     }
6765     return op_ldrd_ri(s, a, a->rt + 1);
6766 }
6767 
6768 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6769 {
6770     arg_ldst_ri b = {
6771         .u = a->u, .w = a->w, .p = a->p,
6772         .rn = a->rn, .rt = a->rt, .imm = a->imm
6773     };
6774     return op_ldrd_ri(s, &b, a->rt2);
6775 }
6776 
6777 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6778 {
6779     int mem_idx = get_mem_index(s);
6780     TCGv_i32 addr, tmp;
6781 
6782     addr = op_addr_ri_pre(s, a);
6783 
6784     tmp = load_reg(s, a->rt);
6785     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6786 
6787     tcg_gen_addi_i32(addr, addr, 4);
6788 
6789     tmp = load_reg(s, rt2);
6790     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6791 
6792     op_addr_ri_post(s, a, addr, -4);
6793     return true;
6794 }
6795 
6796 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6797 {
6798     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6799         return false;
6800     }
6801     return op_strd_ri(s, a, a->rt + 1);
6802 }
6803 
6804 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6805 {
6806     arg_ldst_ri b = {
6807         .u = a->u, .w = a->w, .p = a->p,
6808         .rn = a->rn, .rt = a->rt, .imm = a->imm
6809     };
6810     return op_strd_ri(s, &b, a->rt2);
6811 }
6812 
6813 #define DO_LDST(NAME, WHICH, MEMOP) \
6814 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6815 {                                                                     \
6816     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6817 }                                                                     \
6818 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6819 {                                                                     \
6820     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6821 }                                                                     \
6822 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6823 {                                                                     \
6824     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6825 }                                                                     \
6826 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6827 {                                                                     \
6828     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6829 }
6830 
6831 DO_LDST(LDR, load, MO_UL)
6832 DO_LDST(LDRB, load, MO_UB)
6833 DO_LDST(LDRH, load, MO_UW)
6834 DO_LDST(LDRSB, load, MO_SB)
6835 DO_LDST(LDRSH, load, MO_SW)
6836 
6837 DO_LDST(STR, store, MO_UL)
6838 DO_LDST(STRB, store, MO_UB)
6839 DO_LDST(STRH, store, MO_UW)
6840 
6841 #undef DO_LDST
6842 
6843 /*
6844  * Synchronization primitives
6845  */
6846 
6847 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6848 {
6849     TCGv_i32 addr, tmp;
6850     TCGv taddr;
6851 
6852     opc |= s->be_data;
6853     addr = load_reg(s, a->rn);
6854     taddr = gen_aa32_addr(s, addr, opc);
6855 
6856     tmp = load_reg(s, a->rt2);
6857     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6858 
6859     store_reg(s, a->rt, tmp);
6860     return true;
6861 }
6862 
6863 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6864 {
6865     return op_swp(s, a, MO_UL | MO_ALIGN);
6866 }
6867 
6868 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6869 {
6870     return op_swp(s, a, MO_UB);
6871 }
6872 
6873 /*
6874  * Load/Store Exclusive and Load-Acquire/Store-Release
6875  */
6876 
6877 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6878 {
6879     TCGv_i32 addr;
6880     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6881     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6882 
6883     /* We UNDEF for these UNPREDICTABLE cases.  */
6884     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6885         || a->rd == a->rn || a->rd == a->rt
6886         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6887         || (mop == MO_64
6888             && (a->rt2 == 15
6889                 || a->rd == a->rt2
6890                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6891         unallocated_encoding(s);
6892         return true;
6893     }
6894 
6895     if (rel) {
6896         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6897     }
6898 
6899     addr = tcg_temp_new_i32();
6900     load_reg_var(s, addr, a->rn);
6901     tcg_gen_addi_i32(addr, addr, a->imm);
6902 
6903     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6904     return true;
6905 }
6906 
6907 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6908 {
6909     if (!ENABLE_ARCH_6) {
6910         return false;
6911     }
6912     return op_strex(s, a, MO_32, false);
6913 }
6914 
6915 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6916 {
6917     if (!ENABLE_ARCH_6K) {
6918         return false;
6919     }
6920     /* We UNDEF for these UNPREDICTABLE cases.  */
6921     if (a->rt & 1) {
6922         unallocated_encoding(s);
6923         return true;
6924     }
6925     a->rt2 = a->rt + 1;
6926     return op_strex(s, a, MO_64, false);
6927 }
6928 
6929 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6930 {
6931     return op_strex(s, a, MO_64, false);
6932 }
6933 
6934 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6935 {
6936     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6937         return false;
6938     }
6939     return op_strex(s, a, MO_8, false);
6940 }
6941 
6942 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6943 {
6944     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6945         return false;
6946     }
6947     return op_strex(s, a, MO_16, false);
6948 }
6949 
6950 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6951 {
6952     if (!ENABLE_ARCH_8) {
6953         return false;
6954     }
6955     return op_strex(s, a, MO_32, true);
6956 }
6957 
6958 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6959 {
6960     if (!ENABLE_ARCH_8) {
6961         return false;
6962     }
6963     /* We UNDEF for these UNPREDICTABLE cases.  */
6964     if (a->rt & 1) {
6965         unallocated_encoding(s);
6966         return true;
6967     }
6968     a->rt2 = a->rt + 1;
6969     return op_strex(s, a, MO_64, true);
6970 }
6971 
6972 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6973 {
6974     if (!ENABLE_ARCH_8) {
6975         return false;
6976     }
6977     return op_strex(s, a, MO_64, true);
6978 }
6979 
6980 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6981 {
6982     if (!ENABLE_ARCH_8) {
6983         return false;
6984     }
6985     return op_strex(s, a, MO_8, true);
6986 }
6987 
6988 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6989 {
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     return op_strex(s, a, MO_16, true);
6994 }
6995 
6996 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6997 {
6998     TCGv_i32 addr, tmp;
6999 
7000     if (!ENABLE_ARCH_8) {
7001         return false;
7002     }
7003     /* We UNDEF for these UNPREDICTABLE cases.  */
7004     if (a->rn == 15 || a->rt == 15) {
7005         unallocated_encoding(s);
7006         return true;
7007     }
7008 
7009     addr = load_reg(s, a->rn);
7010     tmp = load_reg(s, a->rt);
7011     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7012     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7013     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7014 
7015     return true;
7016 }
7017 
7018 static bool trans_STL(DisasContext *s, arg_STL *a)
7019 {
7020     return op_stl(s, a, MO_UL);
7021 }
7022 
7023 static bool trans_STLB(DisasContext *s, arg_STL *a)
7024 {
7025     return op_stl(s, a, MO_UB);
7026 }
7027 
7028 static bool trans_STLH(DisasContext *s, arg_STL *a)
7029 {
7030     return op_stl(s, a, MO_UW);
7031 }
7032 
7033 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7034 {
7035     TCGv_i32 addr;
7036     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7037     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7038 
7039     /* We UNDEF for these UNPREDICTABLE cases.  */
7040     if (a->rn == 15 || a->rt == 15
7041         || (!v8a && s->thumb && a->rt == 13)
7042         || (mop == MO_64
7043             && (a->rt2 == 15 || a->rt == a->rt2
7044                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7045         unallocated_encoding(s);
7046         return true;
7047     }
7048 
7049     addr = tcg_temp_new_i32();
7050     load_reg_var(s, addr, a->rn);
7051     tcg_gen_addi_i32(addr, addr, a->imm);
7052 
7053     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7054 
7055     if (acq) {
7056         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7057     }
7058     return true;
7059 }
7060 
7061 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7062 {
7063     if (!ENABLE_ARCH_6) {
7064         return false;
7065     }
7066     return op_ldrex(s, a, MO_32, false);
7067 }
7068 
7069 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7070 {
7071     if (!ENABLE_ARCH_6K) {
7072         return false;
7073     }
7074     /* We UNDEF for these UNPREDICTABLE cases.  */
7075     if (a->rt & 1) {
7076         unallocated_encoding(s);
7077         return true;
7078     }
7079     a->rt2 = a->rt + 1;
7080     return op_ldrex(s, a, MO_64, false);
7081 }
7082 
7083 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7084 {
7085     return op_ldrex(s, a, MO_64, false);
7086 }
7087 
7088 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7089 {
7090     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7091         return false;
7092     }
7093     return op_ldrex(s, a, MO_8, false);
7094 }
7095 
7096 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7097 {
7098     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7099         return false;
7100     }
7101     return op_ldrex(s, a, MO_16, false);
7102 }
7103 
7104 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7105 {
7106     if (!ENABLE_ARCH_8) {
7107         return false;
7108     }
7109     return op_ldrex(s, a, MO_32, true);
7110 }
7111 
7112 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7113 {
7114     if (!ENABLE_ARCH_8) {
7115         return false;
7116     }
7117     /* We UNDEF for these UNPREDICTABLE cases.  */
7118     if (a->rt & 1) {
7119         unallocated_encoding(s);
7120         return true;
7121     }
7122     a->rt2 = a->rt + 1;
7123     return op_ldrex(s, a, MO_64, true);
7124 }
7125 
7126 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7127 {
7128     if (!ENABLE_ARCH_8) {
7129         return false;
7130     }
7131     return op_ldrex(s, a, MO_64, true);
7132 }
7133 
7134 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7135 {
7136     if (!ENABLE_ARCH_8) {
7137         return false;
7138     }
7139     return op_ldrex(s, a, MO_8, true);
7140 }
7141 
7142 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7143 {
7144     if (!ENABLE_ARCH_8) {
7145         return false;
7146     }
7147     return op_ldrex(s, a, MO_16, true);
7148 }
7149 
7150 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7151 {
7152     TCGv_i32 addr, tmp;
7153 
7154     if (!ENABLE_ARCH_8) {
7155         return false;
7156     }
7157     /* We UNDEF for these UNPREDICTABLE cases.  */
7158     if (a->rn == 15 || a->rt == 15) {
7159         unallocated_encoding(s);
7160         return true;
7161     }
7162 
7163     addr = load_reg(s, a->rn);
7164     tmp = tcg_temp_new_i32();
7165     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7166     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7167 
7168     store_reg(s, a->rt, tmp);
7169     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7170     return true;
7171 }
7172 
7173 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7174 {
7175     return op_lda(s, a, MO_UL);
7176 }
7177 
7178 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7179 {
7180     return op_lda(s, a, MO_UB);
7181 }
7182 
7183 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7184 {
7185     return op_lda(s, a, MO_UW);
7186 }
7187 
7188 /*
7189  * Media instructions
7190  */
7191 
7192 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7193 {
7194     TCGv_i32 t1, t2;
7195 
7196     if (!ENABLE_ARCH_6) {
7197         return false;
7198     }
7199 
7200     t1 = load_reg(s, a->rn);
7201     t2 = load_reg(s, a->rm);
7202     gen_helper_usad8(t1, t1, t2);
7203     if (a->ra != 15) {
7204         t2 = load_reg(s, a->ra);
7205         tcg_gen_add_i32(t1, t1, t2);
7206     }
7207     store_reg(s, a->rd, t1);
7208     return true;
7209 }
7210 
7211 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7212 {
7213     TCGv_i32 tmp;
7214     int width = a->widthm1 + 1;
7215     int shift = a->lsb;
7216 
7217     if (!ENABLE_ARCH_6T2) {
7218         return false;
7219     }
7220     if (shift + width > 32) {
7221         /* UNPREDICTABLE; we choose to UNDEF */
7222         unallocated_encoding(s);
7223         return true;
7224     }
7225 
7226     tmp = load_reg(s, a->rn);
7227     if (u) {
7228         tcg_gen_extract_i32(tmp, tmp, shift, width);
7229     } else {
7230         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7231     }
7232     store_reg(s, a->rd, tmp);
7233     return true;
7234 }
7235 
7236 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7237 {
7238     return op_bfx(s, a, false);
7239 }
7240 
7241 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7242 {
7243     return op_bfx(s, a, true);
7244 }
7245 
7246 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7247 {
7248     int msb = a->msb, lsb = a->lsb;
7249     TCGv_i32 t_in, t_rd;
7250     int width;
7251 
7252     if (!ENABLE_ARCH_6T2) {
7253         return false;
7254     }
7255     if (msb < lsb) {
7256         /* UNPREDICTABLE; we choose to UNDEF */
7257         unallocated_encoding(s);
7258         return true;
7259     }
7260 
7261     width = msb + 1 - lsb;
7262     if (a->rn == 15) {
7263         /* BFC */
7264         t_in = tcg_constant_i32(0);
7265     } else {
7266         /* BFI */
7267         t_in = load_reg(s, a->rn);
7268     }
7269     t_rd = load_reg(s, a->rd);
7270     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7271     store_reg(s, a->rd, t_rd);
7272     return true;
7273 }
7274 
7275 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7276 {
7277     unallocated_encoding(s);
7278     return true;
7279 }
7280 
7281 /*
7282  * Parallel addition and subtraction
7283  */
7284 
7285 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7286                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7287 {
7288     TCGv_i32 t0, t1;
7289 
7290     if (s->thumb
7291         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7292         : !ENABLE_ARCH_6) {
7293         return false;
7294     }
7295 
7296     t0 = load_reg(s, a->rn);
7297     t1 = load_reg(s, a->rm);
7298 
7299     gen(t0, t0, t1);
7300 
7301     store_reg(s, a->rd, t0);
7302     return true;
7303 }
7304 
7305 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7306                              void (*gen)(TCGv_i32, TCGv_i32,
7307                                          TCGv_i32, TCGv_ptr))
7308 {
7309     TCGv_i32 t0, t1;
7310     TCGv_ptr ge;
7311 
7312     if (s->thumb
7313         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7314         : !ENABLE_ARCH_6) {
7315         return false;
7316     }
7317 
7318     t0 = load_reg(s, a->rn);
7319     t1 = load_reg(s, a->rm);
7320 
7321     ge = tcg_temp_new_ptr();
7322     tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE));
7323     gen(t0, t0, t1, ge);
7324 
7325     store_reg(s, a->rd, t0);
7326     return true;
7327 }
7328 
7329 #define DO_PAR_ADDSUB(NAME, helper) \
7330 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7331 {                                                       \
7332     return op_par_addsub(s, a, helper);                 \
7333 }
7334 
7335 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7336 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7337 {                                                       \
7338     return op_par_addsub_ge(s, a, helper);              \
7339 }
7340 
7341 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7342 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7343 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7344 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7345 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7346 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7347 
7348 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7349 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7350 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7351 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7352 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7353 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7354 
7355 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7356 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7357 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7358 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7359 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7360 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7361 
7362 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7363 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7364 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7365 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7366 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7367 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7368 
7369 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7370 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7371 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7372 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7373 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7374 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7375 
7376 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7377 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7378 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7379 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7380 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7381 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7382 
7383 #undef DO_PAR_ADDSUB
7384 #undef DO_PAR_ADDSUB_GE
7385 
7386 /*
7387  * Packing, unpacking, saturation, and reversal
7388  */
7389 
7390 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7391 {
7392     TCGv_i32 tn, tm;
7393     int shift = a->imm;
7394 
7395     if (s->thumb
7396         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7397         : !ENABLE_ARCH_6) {
7398         return false;
7399     }
7400 
7401     tn = load_reg(s, a->rn);
7402     tm = load_reg(s, a->rm);
7403     if (a->tb) {
7404         /* PKHTB */
7405         if (shift == 0) {
7406             shift = 31;
7407         }
7408         tcg_gen_sari_i32(tm, tm, shift);
7409         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7410     } else {
7411         /* PKHBT */
7412         tcg_gen_shli_i32(tm, tm, shift);
7413         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7414     }
7415     store_reg(s, a->rd, tn);
7416     return true;
7417 }
7418 
7419 static bool op_sat(DisasContext *s, arg_sat *a,
7420                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7421 {
7422     TCGv_i32 tmp;
7423     int shift = a->imm;
7424 
7425     if (!ENABLE_ARCH_6) {
7426         return false;
7427     }
7428 
7429     tmp = load_reg(s, a->rn);
7430     if (a->sh) {
7431         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7432     } else {
7433         tcg_gen_shli_i32(tmp, tmp, shift);
7434     }
7435 
7436     gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm));
7437 
7438     store_reg(s, a->rd, tmp);
7439     return true;
7440 }
7441 
7442 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7443 {
7444     return op_sat(s, a, gen_helper_ssat);
7445 }
7446 
7447 static bool trans_USAT(DisasContext *s, arg_sat *a)
7448 {
7449     return op_sat(s, a, gen_helper_usat);
7450 }
7451 
7452 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7453 {
7454     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7455         return false;
7456     }
7457     return op_sat(s, a, gen_helper_ssat16);
7458 }
7459 
7460 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7461 {
7462     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7463         return false;
7464     }
7465     return op_sat(s, a, gen_helper_usat16);
7466 }
7467 
7468 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7469                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7470                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7471 {
7472     TCGv_i32 tmp;
7473 
7474     if (!ENABLE_ARCH_6) {
7475         return false;
7476     }
7477 
7478     tmp = load_reg(s, a->rm);
7479     /*
7480      * TODO: In many cases we could do a shift instead of a rotate.
7481      * Combined with a simple extend, that becomes an extract.
7482      */
7483     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7484     gen_extract(tmp, tmp);
7485 
7486     if (a->rn != 15) {
7487         TCGv_i32 tmp2 = load_reg(s, a->rn);
7488         gen_add(tmp, tmp, tmp2);
7489     }
7490     store_reg(s, a->rd, tmp);
7491     return true;
7492 }
7493 
7494 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7495 {
7496     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7497 }
7498 
7499 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7500 {
7501     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7502 }
7503 
7504 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7505 {
7506     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7507         return false;
7508     }
7509     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7510 }
7511 
7512 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7513 {
7514     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7515 }
7516 
7517 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7518 {
7519     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7520 }
7521 
7522 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7523 {
7524     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7525         return false;
7526     }
7527     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7528 }
7529 
7530 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7531 {
7532     TCGv_i32 t1, t2, t3;
7533 
7534     if (s->thumb
7535         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7536         : !ENABLE_ARCH_6) {
7537         return false;
7538     }
7539 
7540     t1 = load_reg(s, a->rn);
7541     t2 = load_reg(s, a->rm);
7542     t3 = tcg_temp_new_i32();
7543     tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE));
7544     gen_helper_sel_flags(t1, t3, t1, t2);
7545     store_reg(s, a->rd, t1);
7546     return true;
7547 }
7548 
7549 static bool op_rr(DisasContext *s, arg_rr *a,
7550                   void (*gen)(TCGv_i32, TCGv_i32))
7551 {
7552     TCGv_i32 tmp;
7553 
7554     tmp = load_reg(s, a->rm);
7555     gen(tmp, tmp);
7556     store_reg(s, a->rd, tmp);
7557     return true;
7558 }
7559 
7560 static bool trans_REV(DisasContext *s, arg_rr *a)
7561 {
7562     if (!ENABLE_ARCH_6) {
7563         return false;
7564     }
7565     return op_rr(s, a, tcg_gen_bswap32_i32);
7566 }
7567 
7568 static bool trans_REV16(DisasContext *s, arg_rr *a)
7569 {
7570     if (!ENABLE_ARCH_6) {
7571         return false;
7572     }
7573     return op_rr(s, a, gen_rev16);
7574 }
7575 
7576 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7577 {
7578     if (!ENABLE_ARCH_6) {
7579         return false;
7580     }
7581     return op_rr(s, a, gen_revsh);
7582 }
7583 
7584 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7585 {
7586     if (!ENABLE_ARCH_6T2) {
7587         return false;
7588     }
7589     return op_rr(s, a, gen_helper_rbit);
7590 }
7591 
7592 /*
7593  * Signed multiply, signed and unsigned divide
7594  */
7595 
7596 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7597 {
7598     TCGv_i32 t1, t2;
7599 
7600     if (!ENABLE_ARCH_6) {
7601         return false;
7602     }
7603 
7604     t1 = load_reg(s, a->rn);
7605     t2 = load_reg(s, a->rm);
7606     if (m_swap) {
7607         gen_swap_half(t2, t2);
7608     }
7609     gen_smul_dual(t1, t2);
7610 
7611     if (sub) {
7612         /*
7613          * This subtraction cannot overflow, so we can do a simple
7614          * 32-bit subtraction and then a possible 32-bit saturating
7615          * addition of Ra.
7616          */
7617         tcg_gen_sub_i32(t1, t1, t2);
7618 
7619         if (a->ra != 15) {
7620             t2 = load_reg(s, a->ra);
7621             gen_helper_add_setq(t1, tcg_env, t1, t2);
7622         }
7623     } else if (a->ra == 15) {
7624         /* Single saturation-checking addition */
7625         gen_helper_add_setq(t1, tcg_env, t1, t2);
7626     } else {
7627         /*
7628          * We need to add the products and Ra together and then
7629          * determine whether the final result overflowed. Doing
7630          * this as two separate add-and-check-overflow steps incorrectly
7631          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7632          * Do all the arithmetic at 64-bits and then check for overflow.
7633          */
7634         TCGv_i64 p64, q64;
7635         TCGv_i32 t3, qf, one;
7636 
7637         p64 = tcg_temp_new_i64();
7638         q64 = tcg_temp_new_i64();
7639         tcg_gen_ext_i32_i64(p64, t1);
7640         tcg_gen_ext_i32_i64(q64, t2);
7641         tcg_gen_add_i64(p64, p64, q64);
7642         load_reg_var(s, t2, a->ra);
7643         tcg_gen_ext_i32_i64(q64, t2);
7644         tcg_gen_add_i64(p64, p64, q64);
7645 
7646         tcg_gen_extr_i64_i32(t1, t2, p64);
7647         /*
7648          * t1 is the low half of the result which goes into Rd.
7649          * We have overflow and must set Q if the high half (t2)
7650          * is different from the sign-extension of t1.
7651          */
7652         t3 = tcg_temp_new_i32();
7653         tcg_gen_sari_i32(t3, t1, 31);
7654         qf = load_cpu_field(QF);
7655         one = tcg_constant_i32(1);
7656         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7657         store_cpu_field(qf, QF);
7658     }
7659     store_reg(s, a->rd, t1);
7660     return true;
7661 }
7662 
7663 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7664 {
7665     return op_smlad(s, a, false, false);
7666 }
7667 
7668 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7669 {
7670     return op_smlad(s, a, true, false);
7671 }
7672 
7673 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7674 {
7675     return op_smlad(s, a, false, true);
7676 }
7677 
7678 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7679 {
7680     return op_smlad(s, a, true, true);
7681 }
7682 
7683 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7684 {
7685     TCGv_i32 t1, t2;
7686     TCGv_i64 l1, l2;
7687 
7688     if (!ENABLE_ARCH_6) {
7689         return false;
7690     }
7691 
7692     t1 = load_reg(s, a->rn);
7693     t2 = load_reg(s, a->rm);
7694     if (m_swap) {
7695         gen_swap_half(t2, t2);
7696     }
7697     gen_smul_dual(t1, t2);
7698 
7699     l1 = tcg_temp_new_i64();
7700     l2 = tcg_temp_new_i64();
7701     tcg_gen_ext_i32_i64(l1, t1);
7702     tcg_gen_ext_i32_i64(l2, t2);
7703 
7704     if (sub) {
7705         tcg_gen_sub_i64(l1, l1, l2);
7706     } else {
7707         tcg_gen_add_i64(l1, l1, l2);
7708     }
7709 
7710     gen_addq(s, l1, a->ra, a->rd);
7711     gen_storeq_reg(s, a->ra, a->rd, l1);
7712     return true;
7713 }
7714 
7715 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7716 {
7717     return op_smlald(s, a, false, false);
7718 }
7719 
7720 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7721 {
7722     return op_smlald(s, a, true, false);
7723 }
7724 
7725 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7726 {
7727     return op_smlald(s, a, false, true);
7728 }
7729 
7730 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7731 {
7732     return op_smlald(s, a, true, true);
7733 }
7734 
7735 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7736 {
7737     TCGv_i32 t1, t2;
7738 
7739     if (s->thumb
7740         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7741         : !ENABLE_ARCH_6) {
7742         return false;
7743     }
7744 
7745     t1 = load_reg(s, a->rn);
7746     t2 = load_reg(s, a->rm);
7747     tcg_gen_muls2_i32(t2, t1, t1, t2);
7748 
7749     if (a->ra != 15) {
7750         TCGv_i32 t3 = load_reg(s, a->ra);
7751         if (sub) {
7752             /*
7753              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7754              * a non-zero multiplicand lowpart, and the correct result
7755              * lowpart for rounding.
7756              */
7757             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7758         } else {
7759             tcg_gen_add_i32(t1, t1, t3);
7760         }
7761     }
7762     if (round) {
7763         /*
7764          * Adding 0x80000000 to the 64-bit quantity means that we have
7765          * carry in to the high word when the low word has the msb set.
7766          */
7767         tcg_gen_shri_i32(t2, t2, 31);
7768         tcg_gen_add_i32(t1, t1, t2);
7769     }
7770     store_reg(s, a->rd, t1);
7771     return true;
7772 }
7773 
7774 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7775 {
7776     return op_smmla(s, a, false, false);
7777 }
7778 
7779 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7780 {
7781     return op_smmla(s, a, true, false);
7782 }
7783 
7784 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7785 {
7786     return op_smmla(s, a, false, true);
7787 }
7788 
7789 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7790 {
7791     return op_smmla(s, a, true, true);
7792 }
7793 
7794 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7795 {
7796     TCGv_i32 t1, t2;
7797 
7798     if (s->thumb
7799         ? !dc_isar_feature(aa32_thumb_div, s)
7800         : !dc_isar_feature(aa32_arm_div, s)) {
7801         return false;
7802     }
7803 
7804     t1 = load_reg(s, a->rn);
7805     t2 = load_reg(s, a->rm);
7806     if (u) {
7807         gen_helper_udiv(t1, tcg_env, t1, t2);
7808     } else {
7809         gen_helper_sdiv(t1, tcg_env, t1, t2);
7810     }
7811     store_reg(s, a->rd, t1);
7812     return true;
7813 }
7814 
7815 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7816 {
7817     return op_div(s, a, false);
7818 }
7819 
7820 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7821 {
7822     return op_div(s, a, true);
7823 }
7824 
7825 /*
7826  * Block data transfer
7827  */
7828 
7829 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7830 {
7831     TCGv_i32 addr = load_reg(s, a->rn);
7832 
7833     if (a->b) {
7834         if (a->i) {
7835             /* pre increment */
7836             tcg_gen_addi_i32(addr, addr, 4);
7837         } else {
7838             /* pre decrement */
7839             tcg_gen_addi_i32(addr, addr, -(n * 4));
7840         }
7841     } else if (!a->i && n != 1) {
7842         /* post decrement */
7843         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7844     }
7845 
7846     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7847         /*
7848          * If the writeback is incrementing SP rather than
7849          * decrementing it, and the initial SP is below the
7850          * stack limit but the final written-back SP would
7851          * be above, then we must not perform any memory
7852          * accesses, but it is IMPDEF whether we generate
7853          * an exception. We choose to do so in this case.
7854          * At this point 'addr' is the lowest address, so
7855          * either the original SP (if incrementing) or our
7856          * final SP (if decrementing), so that's what we check.
7857          */
7858         gen_helper_v8m_stackcheck(tcg_env, addr);
7859     }
7860 
7861     return addr;
7862 }
7863 
7864 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7865                                TCGv_i32 addr, int n)
7866 {
7867     if (a->w) {
7868         /* write back */
7869         if (!a->b) {
7870             if (a->i) {
7871                 /* post increment */
7872                 tcg_gen_addi_i32(addr, addr, 4);
7873             } else {
7874                 /* post decrement */
7875                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7876             }
7877         } else if (!a->i && n != 1) {
7878             /* pre decrement */
7879             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7880         }
7881         store_reg(s, a->rn, addr);
7882     }
7883 }
7884 
7885 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7886 {
7887     int i, j, n, list, mem_idx;
7888     bool user = a->u;
7889     TCGv_i32 addr, tmp;
7890 
7891     if (user) {
7892         /* STM (user) */
7893         if (IS_USER(s)) {
7894             /* Only usable in supervisor mode.  */
7895             unallocated_encoding(s);
7896             return true;
7897         }
7898     }
7899 
7900     list = a->list;
7901     n = ctpop16(list);
7902     if (n < min_n || a->rn == 15) {
7903         unallocated_encoding(s);
7904         return true;
7905     }
7906 
7907     s->eci_handled = true;
7908 
7909     addr = op_addr_block_pre(s, a, n);
7910     mem_idx = get_mem_index(s);
7911 
7912     for (i = j = 0; i < 16; i++) {
7913         if (!(list & (1 << i))) {
7914             continue;
7915         }
7916 
7917         if (user && i != 15) {
7918             tmp = tcg_temp_new_i32();
7919             gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i));
7920         } else {
7921             tmp = load_reg(s, i);
7922         }
7923         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7924 
7925         /* No need to add after the last transfer.  */
7926         if (++j != n) {
7927             tcg_gen_addi_i32(addr, addr, 4);
7928         }
7929     }
7930 
7931     op_addr_block_post(s, a, addr, n);
7932     clear_eci_state(s);
7933     return true;
7934 }
7935 
7936 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7937 {
7938     /* BitCount(list) < 1 is UNPREDICTABLE */
7939     return op_stm(s, a, 1);
7940 }
7941 
7942 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7943 {
7944     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7945     if (a->w && (a->list & (1 << a->rn))) {
7946         unallocated_encoding(s);
7947         return true;
7948     }
7949     /* BitCount(list) < 2 is UNPREDICTABLE */
7950     return op_stm(s, a, 2);
7951 }
7952 
7953 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7954 {
7955     int i, j, n, list, mem_idx;
7956     bool loaded_base;
7957     bool user = a->u;
7958     bool exc_return = false;
7959     TCGv_i32 addr, tmp, loaded_var;
7960 
7961     if (user) {
7962         /* LDM (user), LDM (exception return) */
7963         if (IS_USER(s)) {
7964             /* Only usable in supervisor mode.  */
7965             unallocated_encoding(s);
7966             return true;
7967         }
7968         if (extract32(a->list, 15, 1)) {
7969             exc_return = true;
7970             user = false;
7971         } else {
7972             /* LDM (user) does not allow writeback.  */
7973             if (a->w) {
7974                 unallocated_encoding(s);
7975                 return true;
7976             }
7977         }
7978     }
7979 
7980     list = a->list;
7981     n = ctpop16(list);
7982     if (n < min_n || a->rn == 15) {
7983         unallocated_encoding(s);
7984         return true;
7985     }
7986 
7987     s->eci_handled = true;
7988 
7989     addr = op_addr_block_pre(s, a, n);
7990     mem_idx = get_mem_index(s);
7991     loaded_base = false;
7992     loaded_var = NULL;
7993 
7994     for (i = j = 0; i < 16; i++) {
7995         if (!(list & (1 << i))) {
7996             continue;
7997         }
7998 
7999         tmp = tcg_temp_new_i32();
8000         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8001         if (user) {
8002             gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp);
8003         } else if (i == a->rn) {
8004             loaded_var = tmp;
8005             loaded_base = true;
8006         } else if (i == 15 && exc_return) {
8007             store_pc_exc_ret(s, tmp);
8008         } else {
8009             store_reg_from_load(s, i, tmp);
8010         }
8011 
8012         /* No need to add after the last transfer.  */
8013         if (++j != n) {
8014             tcg_gen_addi_i32(addr, addr, 4);
8015         }
8016     }
8017 
8018     op_addr_block_post(s, a, addr, n);
8019 
8020     if (loaded_base) {
8021         /* Note that we reject base == pc above.  */
8022         store_reg(s, a->rn, loaded_var);
8023     }
8024 
8025     if (exc_return) {
8026         /* Restore CPSR from SPSR.  */
8027         tmp = load_cpu_field(spsr);
8028         translator_io_start(&s->base);
8029         gen_helper_cpsr_write_eret(tcg_env, tmp);
8030         /* Must exit loop to check un-masked IRQs */
8031         s->base.is_jmp = DISAS_EXIT;
8032     }
8033     clear_eci_state(s);
8034     return true;
8035 }
8036 
8037 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8038 {
8039     /*
8040      * Writeback register in register list is UNPREDICTABLE
8041      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8042      * an UNKNOWN value to the base register.
8043      */
8044     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8045         unallocated_encoding(s);
8046         return true;
8047     }
8048     /* BitCount(list) < 1 is UNPREDICTABLE */
8049     return do_ldm(s, a, 1);
8050 }
8051 
8052 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8053 {
8054     /* Writeback register in register list is UNPREDICTABLE for T32. */
8055     if (a->w && (a->list & (1 << a->rn))) {
8056         unallocated_encoding(s);
8057         return true;
8058     }
8059     /* BitCount(list) < 2 is UNPREDICTABLE */
8060     return do_ldm(s, a, 2);
8061 }
8062 
8063 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8064 {
8065     /* Writeback is conditional on the base register not being loaded.  */
8066     a->w = !(a->list & (1 << a->rn));
8067     /* BitCount(list) < 1 is UNPREDICTABLE */
8068     return do_ldm(s, a, 1);
8069 }
8070 
8071 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8072 {
8073     int i;
8074     TCGv_i32 zero;
8075 
8076     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8077         return false;
8078     }
8079 
8080     if (extract32(a->list, 13, 1)) {
8081         return false;
8082     }
8083 
8084     if (!a->list) {
8085         /* UNPREDICTABLE; we choose to UNDEF */
8086         return false;
8087     }
8088 
8089     s->eci_handled = true;
8090 
8091     zero = tcg_constant_i32(0);
8092     for (i = 0; i < 15; i++) {
8093         if (extract32(a->list, i, 1)) {
8094             /* Clear R[i] */
8095             tcg_gen_mov_i32(cpu_R[i], zero);
8096         }
8097     }
8098     if (extract32(a->list, 15, 1)) {
8099         /*
8100          * Clear APSR (by calling the MSR helper with the same argument
8101          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8102          */
8103         gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero);
8104     }
8105     clear_eci_state(s);
8106     return true;
8107 }
8108 
8109 /*
8110  * Branch, branch with link
8111  */
8112 
8113 static bool trans_B(DisasContext *s, arg_i *a)
8114 {
8115     gen_jmp(s, jmp_diff(s, a->imm));
8116     return true;
8117 }
8118 
8119 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8120 {
8121     /* This has cond from encoding, required to be outside IT block.  */
8122     if (a->cond >= 0xe) {
8123         return false;
8124     }
8125     if (s->condexec_mask) {
8126         unallocated_encoding(s);
8127         return true;
8128     }
8129     arm_skip_unless(s, a->cond);
8130     gen_jmp(s, jmp_diff(s, a->imm));
8131     return true;
8132 }
8133 
8134 static bool trans_BL(DisasContext *s, arg_i *a)
8135 {
8136     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8137     gen_jmp(s, jmp_diff(s, a->imm));
8138     return true;
8139 }
8140 
8141 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8142 {
8143     /*
8144      * BLX <imm> would be useless on M-profile; the encoding space
8145      * is used for other insns from v8.1M onward, and UNDEFs before that.
8146      */
8147     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8148         return false;
8149     }
8150 
8151     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8152     if (s->thumb && (a->imm & 2)) {
8153         return false;
8154     }
8155     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8156     store_cpu_field_constant(!s->thumb, thumb);
8157     /* This jump is computed from an aligned PC: subtract off the low bits. */
8158     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8159     return true;
8160 }
8161 
8162 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8163 {
8164     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8165     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8166     return true;
8167 }
8168 
8169 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8170 {
8171     TCGv_i32 tmp = tcg_temp_new_i32();
8172 
8173     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8174     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8175     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8176     gen_bx(s, tmp);
8177     return true;
8178 }
8179 
8180 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8181 {
8182     TCGv_i32 tmp;
8183 
8184     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8185     if (!ENABLE_ARCH_5) {
8186         return false;
8187     }
8188     tmp = tcg_temp_new_i32();
8189     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8190     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8191     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8192     gen_bx(s, tmp);
8193     return true;
8194 }
8195 
8196 static bool trans_BF(DisasContext *s, arg_BF *a)
8197 {
8198     /*
8199      * M-profile branch future insns. The architecture permits an
8200      * implementation to implement these as NOPs (equivalent to
8201      * discarding the LO_BRANCH_INFO cache immediately), and we
8202      * take that IMPDEF option because for QEMU a "real" implementation
8203      * would be complicated and wouldn't execute any faster.
8204      */
8205     if (!dc_isar_feature(aa32_lob, s)) {
8206         return false;
8207     }
8208     if (a->boff == 0) {
8209         /* SEE "Related encodings" (loop insns) */
8210         return false;
8211     }
8212     /* Handle as NOP */
8213     return true;
8214 }
8215 
8216 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8217 {
8218     /* M-profile low-overhead loop start */
8219     TCGv_i32 tmp;
8220 
8221     if (!dc_isar_feature(aa32_lob, s)) {
8222         return false;
8223     }
8224     if (a->rn == 13 || a->rn == 15) {
8225         /*
8226          * For DLSTP rn == 15 is a related encoding (LCTP); the
8227          * other cases caught by this condition are all
8228          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8229          */
8230         return false;
8231     }
8232 
8233     if (a->size != 4) {
8234         /* DLSTP */
8235         if (!dc_isar_feature(aa32_mve, s)) {
8236             return false;
8237         }
8238         if (!vfp_access_check(s)) {
8239             return true;
8240         }
8241     }
8242 
8243     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8244     tmp = load_reg(s, a->rn);
8245     store_reg(s, 14, tmp);
8246     if (a->size != 4) {
8247         /* DLSTP: set FPSCR.LTPSIZE */
8248         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8249         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8250     }
8251     return true;
8252 }
8253 
8254 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8255 {
8256     /* M-profile low-overhead while-loop start */
8257     TCGv_i32 tmp;
8258     DisasLabel nextlabel;
8259 
8260     if (!dc_isar_feature(aa32_lob, s)) {
8261         return false;
8262     }
8263     if (a->rn == 13 || a->rn == 15) {
8264         /*
8265          * For WLSTP rn == 15 is a related encoding (LE); the
8266          * other cases caught by this condition are all
8267          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8268          */
8269         return false;
8270     }
8271     if (s->condexec_mask) {
8272         /*
8273          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8274          * we choose to UNDEF, because otherwise our use of
8275          * gen_goto_tb(1) would clash with the use of TB exit 1
8276          * in the dc->condjmp condition-failed codepath in
8277          * arm_tr_tb_stop() and we'd get an assertion.
8278          */
8279         return false;
8280     }
8281     if (a->size != 4) {
8282         /* WLSTP */
8283         if (!dc_isar_feature(aa32_mve, s)) {
8284             return false;
8285         }
8286         /*
8287          * We need to check that the FPU is enabled here, but mustn't
8288          * call vfp_access_check() to do that because we don't want to
8289          * do the lazy state preservation in the "loop count is zero" case.
8290          * Do the check-and-raise-exception by hand.
8291          */
8292         if (s->fp_excp_el) {
8293             gen_exception_insn_el(s, 0, EXCP_NOCP,
8294                                   syn_uncategorized(), s->fp_excp_el);
8295             return true;
8296         }
8297     }
8298 
8299     nextlabel = gen_disas_label(s);
8300     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8301     tmp = load_reg(s, a->rn);
8302     store_reg(s, 14, tmp);
8303     if (a->size != 4) {
8304         /*
8305          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8306          * lazy state preservation, new FP context creation, etc,
8307          * that vfp_access_check() does. We know that the actual
8308          * access check will succeed (ie it won't generate code that
8309          * throws an exception) because we did that check by hand earlier.
8310          */
8311         bool ok = vfp_access_check(s);
8312         assert(ok);
8313         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8314         /*
8315          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8316          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8317          */
8318     }
8319     gen_jmp_tb(s, curr_insn_len(s), 1);
8320 
8321     set_disas_label(s, nextlabel);
8322     gen_jmp(s, jmp_diff(s, a->imm));
8323     return true;
8324 }
8325 
8326 static bool trans_LE(DisasContext *s, arg_LE *a)
8327 {
8328     /*
8329      * M-profile low-overhead loop end. The architecture permits an
8330      * implementation to discard the LO_BRANCH_INFO cache at any time,
8331      * and we take the IMPDEF option to never set it in the first place
8332      * (equivalent to always discarding it immediately), because for QEMU
8333      * a "real" implementation would be complicated and wouldn't execute
8334      * any faster.
8335      */
8336     TCGv_i32 tmp;
8337     DisasLabel loopend;
8338     bool fpu_active;
8339 
8340     if (!dc_isar_feature(aa32_lob, s)) {
8341         return false;
8342     }
8343     if (a->f && a->tp) {
8344         return false;
8345     }
8346     if (s->condexec_mask) {
8347         /*
8348          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8349          * we choose to UNDEF, because otherwise our use of
8350          * gen_goto_tb(1) would clash with the use of TB exit 1
8351          * in the dc->condjmp condition-failed codepath in
8352          * arm_tr_tb_stop() and we'd get an assertion.
8353          */
8354         return false;
8355     }
8356     if (a->tp) {
8357         /* LETP */
8358         if (!dc_isar_feature(aa32_mve, s)) {
8359             return false;
8360         }
8361         if (!vfp_access_check(s)) {
8362             s->eci_handled = true;
8363             return true;
8364         }
8365     }
8366 
8367     /* LE/LETP is OK with ECI set and leaves it untouched */
8368     s->eci_handled = true;
8369 
8370     /*
8371      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8372      * UsageFault exception for the LE insn in that case. Note that we
8373      * are not directly checking FPSCR.LTPSIZE but instead check the
8374      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8375      * not currently active (ie ActiveFPState() returns false). We
8376      * can identify not-active purely from our TB state flags, as the
8377      * FPU is active only if:
8378      *  the FPU is enabled
8379      *  AND lazy state preservation is not active
8380      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8381      *
8382      * Usually we don't need to care about this distinction between
8383      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8384      * will either take an exception or clear the conditions that make
8385      * the FPU not active. But LE is an unusual case of a non-FP insn
8386      * that looks at LTPSIZE.
8387      */
8388     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8389 
8390     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8391         /* Need to do a runtime check for LTPSIZE != 4 */
8392         DisasLabel skipexc = gen_disas_label(s);
8393         tmp = load_cpu_field(v7m.ltpsize);
8394         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8395         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8396         set_disas_label(s, skipexc);
8397     }
8398 
8399     if (a->f) {
8400         /* Loop-forever: just jump back to the loop start */
8401         gen_jmp(s, jmp_diff(s, -a->imm));
8402         return true;
8403     }
8404 
8405     /*
8406      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8407      * For LE, we know at this point that LTPSIZE must be 4 and the
8408      * loop decrement value is 1. For LETP we need to calculate the decrement
8409      * value from LTPSIZE.
8410      */
8411     loopend = gen_disas_label(s);
8412     if (!a->tp) {
8413         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8414         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8415     } else {
8416         /*
8417          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8418          * so that decr stays live after the brcondi.
8419          */
8420         TCGv_i32 decr = tcg_temp_new_i32();
8421         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8422         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8423         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8424 
8425         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8426 
8427         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8428     }
8429     /* Jump back to the loop start */
8430     gen_jmp(s, jmp_diff(s, -a->imm));
8431 
8432     set_disas_label(s, loopend);
8433     if (a->tp) {
8434         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8435         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8436     }
8437     /* End TB, continuing to following insn */
8438     gen_jmp_tb(s, curr_insn_len(s), 1);
8439     return true;
8440 }
8441 
8442 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8443 {
8444     /*
8445      * M-profile Loop Clear with Tail Predication. Since our implementation
8446      * doesn't cache branch information, all we need to do is reset
8447      * FPSCR.LTPSIZE to 4.
8448      */
8449 
8450     if (!dc_isar_feature(aa32_lob, s) ||
8451         !dc_isar_feature(aa32_mve, s)) {
8452         return false;
8453     }
8454 
8455     if (!vfp_access_check(s)) {
8456         return true;
8457     }
8458 
8459     store_cpu_field_constant(4, v7m.ltpsize);
8460     return true;
8461 }
8462 
8463 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8464 {
8465     /*
8466      * M-profile Create Vector Tail Predicate. This insn is itself
8467      * predicated and is subject to beatwise execution.
8468      */
8469     TCGv_i32 rn_shifted, masklen;
8470 
8471     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8472         return false;
8473     }
8474 
8475     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8476         return true;
8477     }
8478 
8479     /*
8480      * We pre-calculate the mask length here to avoid having
8481      * to have multiple helpers specialized for size.
8482      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8483      */
8484     rn_shifted = tcg_temp_new_i32();
8485     masklen = load_reg(s, a->rn);
8486     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8487     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8488                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8489                         rn_shifted, tcg_constant_i32(16));
8490     gen_helper_mve_vctp(tcg_env, masklen);
8491     /* This insn updates predication bits */
8492     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8493     mve_update_eci(s);
8494     return true;
8495 }
8496 
8497 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8498 {
8499     TCGv_i32 addr, tmp;
8500 
8501     tmp = load_reg(s, a->rm);
8502     if (half) {
8503         tcg_gen_add_i32(tmp, tmp, tmp);
8504     }
8505     addr = load_reg(s, a->rn);
8506     tcg_gen_add_i32(addr, addr, tmp);
8507 
8508     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8509 
8510     tcg_gen_add_i32(tmp, tmp, tmp);
8511     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8512     tcg_gen_add_i32(tmp, tmp, addr);
8513     store_reg(s, 15, tmp);
8514     return true;
8515 }
8516 
8517 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8518 {
8519     return op_tbranch(s, a, false);
8520 }
8521 
8522 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8523 {
8524     return op_tbranch(s, a, true);
8525 }
8526 
8527 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8528 {
8529     TCGv_i32 tmp = load_reg(s, a->rn);
8530 
8531     arm_gen_condlabel(s);
8532     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8533                         tmp, 0, s->condlabel.label);
8534     gen_jmp(s, jmp_diff(s, a->imm));
8535     return true;
8536 }
8537 
8538 /*
8539  * Supervisor call - both T32 & A32 come here so we need to check
8540  * which mode we are in when checking for semihosting.
8541  */
8542 
8543 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8544 {
8545     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8546 
8547     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8548         semihosting_enabled(s->current_el == 0) &&
8549         (a->imm == semihost_imm)) {
8550         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8551     } else {
8552         if (s->fgt_svc) {
8553             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8554             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8555         } else {
8556             gen_update_pc(s, curr_insn_len(s));
8557             s->svc_imm = a->imm;
8558             s->base.is_jmp = DISAS_SWI;
8559         }
8560     }
8561     return true;
8562 }
8563 
8564 /*
8565  * Unconditional system instructions
8566  */
8567 
8568 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8569 {
8570     static const int8_t pre_offset[4] = {
8571         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8572     };
8573     static const int8_t post_offset[4] = {
8574         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8575     };
8576     TCGv_i32 addr, t1, t2;
8577 
8578     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8579         return false;
8580     }
8581     if (IS_USER(s)) {
8582         unallocated_encoding(s);
8583         return true;
8584     }
8585 
8586     addr = load_reg(s, a->rn);
8587     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8588 
8589     /* Load PC into tmp and CPSR into tmp2.  */
8590     t1 = tcg_temp_new_i32();
8591     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8592     tcg_gen_addi_i32(addr, addr, 4);
8593     t2 = tcg_temp_new_i32();
8594     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8595 
8596     if (a->w) {
8597         /* Base writeback.  */
8598         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8599         store_reg(s, a->rn, addr);
8600     }
8601     gen_rfe(s, t1, t2);
8602     return true;
8603 }
8604 
8605 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8606 {
8607     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8608         return false;
8609     }
8610     gen_srs(s, a->mode, a->pu, a->w);
8611     return true;
8612 }
8613 
8614 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8615 {
8616     uint32_t mask, val;
8617 
8618     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8619         return false;
8620     }
8621     if (IS_USER(s)) {
8622         /* Implemented as NOP in user mode.  */
8623         return true;
8624     }
8625     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8626 
8627     mask = val = 0;
8628     if (a->imod & 2) {
8629         if (a->A) {
8630             mask |= CPSR_A;
8631         }
8632         if (a->I) {
8633             mask |= CPSR_I;
8634         }
8635         if (a->F) {
8636             mask |= CPSR_F;
8637         }
8638         if (a->imod & 1) {
8639             val |= mask;
8640         }
8641     }
8642     if (a->M) {
8643         mask |= CPSR_M;
8644         val |= a->mode;
8645     }
8646     if (mask) {
8647         gen_set_psr_im(s, mask, 0, val);
8648     }
8649     return true;
8650 }
8651 
8652 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8653 {
8654     TCGv_i32 tmp, addr;
8655 
8656     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8657         return false;
8658     }
8659     if (IS_USER(s)) {
8660         /* Implemented as NOP in user mode.  */
8661         return true;
8662     }
8663 
8664     tmp = tcg_constant_i32(a->im);
8665     /* FAULTMASK */
8666     if (a->F) {
8667         addr = tcg_constant_i32(19);
8668         gen_helper_v7m_msr(tcg_env, addr, tmp);
8669     }
8670     /* PRIMASK */
8671     if (a->I) {
8672         addr = tcg_constant_i32(16);
8673         gen_helper_v7m_msr(tcg_env, addr, tmp);
8674     }
8675     gen_rebuild_hflags(s, false);
8676     gen_lookup_tb(s);
8677     return true;
8678 }
8679 
8680 /*
8681  * Clear-Exclusive, Barriers
8682  */
8683 
8684 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8685 {
8686     if (s->thumb
8687         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8688         : !ENABLE_ARCH_6K) {
8689         return false;
8690     }
8691     gen_clrex(s);
8692     return true;
8693 }
8694 
8695 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8696 {
8697     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8698         return false;
8699     }
8700     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8701     return true;
8702 }
8703 
8704 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8705 {
8706     return trans_DSB(s, NULL);
8707 }
8708 
8709 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8710 {
8711     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8712         return false;
8713     }
8714     /*
8715      * We need to break the TB after this insn to execute
8716      * self-modifying code correctly and also to take
8717      * any pending interrupts immediately.
8718      */
8719     s->base.is_jmp = DISAS_TOO_MANY;
8720     return true;
8721 }
8722 
8723 static bool trans_SB(DisasContext *s, arg_SB *a)
8724 {
8725     if (!dc_isar_feature(aa32_sb, s)) {
8726         return false;
8727     }
8728     /*
8729      * TODO: There is no speculation barrier opcode
8730      * for TCG; MB and end the TB instead.
8731      */
8732     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8733     s->base.is_jmp = DISAS_TOO_MANY;
8734     return true;
8735 }
8736 
8737 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8738 {
8739     if (!ENABLE_ARCH_6) {
8740         return false;
8741     }
8742     if (a->E != (s->be_data == MO_BE)) {
8743         gen_helper_setend(tcg_env);
8744         s->base.is_jmp = DISAS_UPDATE_EXIT;
8745     }
8746     return true;
8747 }
8748 
8749 /*
8750  * Preload instructions
8751  * All are nops, contingent on the appropriate arch level.
8752  */
8753 
8754 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8755 {
8756     return ENABLE_ARCH_5TE;
8757 }
8758 
8759 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8760 {
8761     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8762 }
8763 
8764 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8765 {
8766     return ENABLE_ARCH_7;
8767 }
8768 
8769 /*
8770  * If-then
8771  */
8772 
8773 static bool trans_IT(DisasContext *s, arg_IT *a)
8774 {
8775     int cond_mask = a->cond_mask;
8776 
8777     /*
8778      * No actual code generated for this insn, just setup state.
8779      *
8780      * Combinations of firstcond and mask which set up an 0b1111
8781      * condition are UNPREDICTABLE; we take the CONSTRAINED
8782      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8783      * i.e. both meaning "execute always".
8784      */
8785     s->condexec_cond = (cond_mask >> 4) & 0xe;
8786     s->condexec_mask = cond_mask & 0x1f;
8787     return true;
8788 }
8789 
8790 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8791 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8792 {
8793     TCGv_i32 rn, rm;
8794     DisasCompare c;
8795 
8796     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8797         return false;
8798     }
8799 
8800     if (a->rm == 13) {
8801         /* SEE "Related encodings" (MVE shifts) */
8802         return false;
8803     }
8804 
8805     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8806         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8807         return false;
8808     }
8809 
8810     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8811     rn = tcg_temp_new_i32();
8812     rm = tcg_temp_new_i32();
8813     if (a->rn == 15) {
8814         tcg_gen_movi_i32(rn, 0);
8815     } else {
8816         load_reg_var(s, rn, a->rn);
8817     }
8818     if (a->rm == 15) {
8819         tcg_gen_movi_i32(rm, 0);
8820     } else {
8821         load_reg_var(s, rm, a->rm);
8822     }
8823 
8824     switch (a->op) {
8825     case 0: /* CSEL */
8826         break;
8827     case 1: /* CSINC */
8828         tcg_gen_addi_i32(rm, rm, 1);
8829         break;
8830     case 2: /* CSINV */
8831         tcg_gen_not_i32(rm, rm);
8832         break;
8833     case 3: /* CSNEG */
8834         tcg_gen_neg_i32(rm, rm);
8835         break;
8836     default:
8837         g_assert_not_reached();
8838     }
8839 
8840     arm_test_cc(&c, a->fcond);
8841     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8842 
8843     store_reg(s, a->rd, rn);
8844     return true;
8845 }
8846 
8847 /*
8848  * Legacy decoder.
8849  */
8850 
8851 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8852 {
8853     unsigned int cond = insn >> 28;
8854 
8855     /* M variants do not implement ARM mode; this must raise the INVSTATE
8856      * UsageFault exception.
8857      */
8858     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8859         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8860         return;
8861     }
8862 
8863     if (s->pstate_il) {
8864         /*
8865          * Illegal execution state. This has priority over BTI
8866          * exceptions, but comes after instruction abort exceptions.
8867          */
8868         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8869         return;
8870     }
8871 
8872     if (cond == 0xf) {
8873         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8874          * choose to UNDEF. In ARMv5 and above the space is used
8875          * for miscellaneous unconditional instructions.
8876          */
8877         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8878             unallocated_encoding(s);
8879             return;
8880         }
8881 
8882         /* Unconditional instructions.  */
8883         /* TODO: Perhaps merge these into one decodetree output file.  */
8884         if (disas_a32_uncond(s, insn) ||
8885             disas_vfp_uncond(s, insn) ||
8886             disas_neon_dp(s, insn) ||
8887             disas_neon_ls(s, insn) ||
8888             disas_neon_shared(s, insn)) {
8889             return;
8890         }
8891         /* fall back to legacy decoder */
8892 
8893         if ((insn & 0x0e000f00) == 0x0c000100) {
8894             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8895                 /* iWMMXt register transfer.  */
8896                 if (extract32(s->c15_cpar, 1, 1)) {
8897                     if (!disas_iwmmxt_insn(s, insn)) {
8898                         return;
8899                     }
8900                 }
8901             }
8902         }
8903         goto illegal_op;
8904     }
8905     if (cond != 0xe) {
8906         /* if not always execute, we generate a conditional jump to
8907            next instruction */
8908         arm_skip_unless(s, cond);
8909     }
8910 
8911     /* TODO: Perhaps merge these into one decodetree output file.  */
8912     if (disas_a32(s, insn) ||
8913         disas_vfp(s, insn)) {
8914         return;
8915     }
8916     /* fall back to legacy decoder */
8917     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8918     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8919         if (((insn & 0x0c000e00) == 0x0c000000)
8920             && ((insn & 0x03000000) != 0x03000000)) {
8921             /* Coprocessor insn, coprocessor 0 or 1 */
8922             disas_xscale_insn(s, insn);
8923             return;
8924         }
8925     }
8926 
8927 illegal_op:
8928     unallocated_encoding(s);
8929 }
8930 
8931 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8932 {
8933     /*
8934      * Return true if this is a 16 bit instruction. We must be precise
8935      * about this (matching the decode).
8936      */
8937     if ((insn >> 11) < 0x1d) {
8938         /* Definitely a 16-bit instruction */
8939         return true;
8940     }
8941 
8942     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8943      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8944      * end up actually treating this as two 16-bit insns, though,
8945      * if it's half of a bl/blx pair that might span a page boundary.
8946      */
8947     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8948         arm_dc_feature(s, ARM_FEATURE_M)) {
8949         /* Thumb2 cores (including all M profile ones) always treat
8950          * 32-bit insns as 32-bit.
8951          */
8952         return false;
8953     }
8954 
8955     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8956         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8957          * is not on the next page; we merge this into a 32-bit
8958          * insn.
8959          */
8960         return false;
8961     }
8962     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8963      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8964      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8965      *  -- handle as single 16 bit insn
8966      */
8967     return true;
8968 }
8969 
8970 /* Translate a 32-bit thumb instruction. */
8971 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8972 {
8973     /*
8974      * ARMv6-M supports a limited subset of Thumb2 instructions.
8975      * Other Thumb1 architectures allow only 32-bit
8976      * combined BL/BLX prefix and suffix.
8977      */
8978     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8979         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8980         int i;
8981         bool found = false;
8982         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8983                                                0xf3b08040 /* dsb */,
8984                                                0xf3b08050 /* dmb */,
8985                                                0xf3b08060 /* isb */,
8986                                                0xf3e08000 /* mrs */,
8987                                                0xf000d000 /* bl */};
8988         static const uint32_t armv6m_mask[] = {0xffe0d000,
8989                                                0xfff0d0f0,
8990                                                0xfff0d0f0,
8991                                                0xfff0d0f0,
8992                                                0xffe0d000,
8993                                                0xf800d000};
8994 
8995         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
8996             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
8997                 found = true;
8998                 break;
8999             }
9000         }
9001         if (!found) {
9002             goto illegal_op;
9003         }
9004     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9005         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9006             unallocated_encoding(s);
9007             return;
9008         }
9009     }
9010 
9011     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9012         /*
9013          * NOCP takes precedence over any UNDEF for (almost) the
9014          * entire wide range of coprocessor-space encodings, so check
9015          * for it first before proceeding to actually decode eg VFP
9016          * insns. This decode also handles the few insns which are
9017          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9018          */
9019         if (disas_m_nocp(s, insn)) {
9020             return;
9021         }
9022     }
9023 
9024     if ((insn & 0xef000000) == 0xef000000) {
9025         /*
9026          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9027          * transform into
9028          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9029          */
9030         uint32_t a32_insn = (insn & 0xe2ffffff) |
9031             ((insn & (1 << 28)) >> 4) | (1 << 28);
9032 
9033         if (disas_neon_dp(s, a32_insn)) {
9034             return;
9035         }
9036     }
9037 
9038     if ((insn & 0xff100000) == 0xf9000000) {
9039         /*
9040          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9041          * transform into
9042          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9043          */
9044         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9045 
9046         if (disas_neon_ls(s, a32_insn)) {
9047             return;
9048         }
9049     }
9050 
9051     /*
9052      * TODO: Perhaps merge these into one decodetree output file.
9053      * Note disas_vfp is written for a32 with cond field in the
9054      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9055      */
9056     if (disas_t32(s, insn) ||
9057         disas_vfp_uncond(s, insn) ||
9058         disas_neon_shared(s, insn) ||
9059         disas_mve(s, insn) ||
9060         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9061         return;
9062     }
9063 
9064 illegal_op:
9065     unallocated_encoding(s);
9066 }
9067 
9068 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9069 {
9070     if (!disas_t16(s, insn)) {
9071         unallocated_encoding(s);
9072     }
9073 }
9074 
9075 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9076 {
9077     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9078      * (False positives are OK, false negatives are not.)
9079      * We know this is a Thumb insn, and our caller ensures we are
9080      * only called if dc->base.pc_next is less than 4 bytes from the page
9081      * boundary, so we cross the page if the first 16 bits indicate
9082      * that this is a 32 bit insn.
9083      */
9084     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9085 
9086     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9087 }
9088 
9089 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9090 {
9091     DisasContext *dc = container_of(dcbase, DisasContext, base);
9092     CPUARMState *env = cpu_env(cs);
9093     ARMCPU *cpu = env_archcpu(env);
9094     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9095     uint32_t condexec, core_mmu_idx;
9096 
9097     dc->isar = &cpu->isar;
9098     dc->condjmp = 0;
9099     dc->pc_save = dc->base.pc_first;
9100     dc->aarch64 = false;
9101     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9102     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9103     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9104     /*
9105      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9106      * is always the IT bits. On M-profile, some of the reserved encodings
9107      * of IT are used instead to indicate either ICI or ECI, which
9108      * indicate partial progress of a restartable insn that was interrupted
9109      * partway through by an exception:
9110      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9111      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9112      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9113      * insn, behave normally".
9114      */
9115     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9116     dc->eci_handled = false;
9117     if (condexec & 0xf) {
9118         dc->condexec_mask = (condexec & 0xf) << 1;
9119         dc->condexec_cond = condexec >> 4;
9120     } else {
9121         if (arm_feature(env, ARM_FEATURE_M)) {
9122             dc->eci = condexec >> 4;
9123         }
9124     }
9125 
9126     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9127     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9128     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9129 #if !defined(CONFIG_USER_ONLY)
9130     dc->user = (dc->current_el == 0);
9131 #endif
9132     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9133     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9134     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9135     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9136     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9137 
9138     if (arm_feature(env, ARM_FEATURE_M)) {
9139         dc->vfp_enabled = 1;
9140         dc->be_data = MO_TE;
9141         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9142         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9143         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9144         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9145         dc->v7m_new_fp_ctxt_needed =
9146             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9147         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9148         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9149     } else {
9150         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9151         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9152         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9153         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9154         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9155             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9156         } else {
9157             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9158             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9159         }
9160         dc->sme_trap_nonstreaming =
9161             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9162     }
9163     dc->lse2 = false; /* applies only to aarch64 */
9164     dc->cp_regs = cpu->cp_regs;
9165     dc->features = env->features;
9166 
9167     /* Single step state. The code-generation logic here is:
9168      *  SS_ACTIVE == 0:
9169      *   generate code with no special handling for single-stepping (except
9170      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9171      *   this happens anyway because those changes are all system register or
9172      *   PSTATE writes).
9173      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9174      *   emit code for one insn
9175      *   emit code to clear PSTATE.SS
9176      *   emit code to generate software step exception for completed step
9177      *   end TB (as usual for having generated an exception)
9178      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9179      *   emit code to generate a software step exception
9180      *   end the TB
9181      */
9182     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9183     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9184     dc->is_ldex = false;
9185 
9186     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9187 
9188     /* If architectural single step active, limit to 1.  */
9189     if (dc->ss_active) {
9190         dc->base.max_insns = 1;
9191     }
9192 
9193     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9194        to those left on the page.  */
9195     if (!dc->thumb) {
9196         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9197         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9198     }
9199 
9200     cpu_V0 = tcg_temp_new_i64();
9201     cpu_V1 = tcg_temp_new_i64();
9202     cpu_M0 = tcg_temp_new_i64();
9203 }
9204 
9205 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9206 {
9207     DisasContext *dc = container_of(dcbase, DisasContext, base);
9208 
9209     /* A note on handling of the condexec (IT) bits:
9210      *
9211      * We want to avoid the overhead of having to write the updated condexec
9212      * bits back to the CPUARMState for every instruction in an IT block. So:
9213      * (1) if the condexec bits are not already zero then we write
9214      * zero back into the CPUARMState now. This avoids complications trying
9215      * to do it at the end of the block. (For example if we don't do this
9216      * it's hard to identify whether we can safely skip writing condexec
9217      * at the end of the TB, which we definitely want to do for the case
9218      * where a TB doesn't do anything with the IT state at all.)
9219      * (2) if we are going to leave the TB then we call gen_set_condexec()
9220      * which will write the correct value into CPUARMState if zero is wrong.
9221      * This is done both for leaving the TB at the end, and for leaving
9222      * it because of an exception we know will happen, which is done in
9223      * gen_exception_insn(). The latter is necessary because we need to
9224      * leave the TB with the PC/IT state just prior to execution of the
9225      * instruction which caused the exception.
9226      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9227      * then the CPUARMState will be wrong and we need to reset it.
9228      * This is handled in the same way as restoration of the
9229      * PC in these situations; we save the value of the condexec bits
9230      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9231      * then uses this to restore them after an exception.
9232      *
9233      * Note that there are no instructions which can read the condexec
9234      * bits, and none which can write non-static values to them, so
9235      * we don't need to care about whether CPUARMState is correct in the
9236      * middle of a TB.
9237      */
9238 
9239     /* Reset the conditional execution bits immediately. This avoids
9240        complications trying to do it at the end of the block.  */
9241     if (dc->condexec_mask || dc->condexec_cond) {
9242         store_cpu_field_constant(0, condexec_bits);
9243     }
9244 }
9245 
9246 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9247 {
9248     DisasContext *dc = container_of(dcbase, DisasContext, base);
9249     /*
9250      * The ECI/ICI bits share PSR bits with the IT bits, so we
9251      * need to reconstitute the bits from the split-out DisasContext
9252      * fields here.
9253      */
9254     uint32_t condexec_bits;
9255     target_ulong pc_arg = dc->base.pc_next;
9256 
9257     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9258         pc_arg &= ~TARGET_PAGE_MASK;
9259     }
9260     if (dc->eci) {
9261         condexec_bits = dc->eci << 4;
9262     } else {
9263         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9264     }
9265     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9266     dc->insn_start = tcg_last_op();
9267 }
9268 
9269 static bool arm_check_kernelpage(DisasContext *dc)
9270 {
9271 #ifdef CONFIG_USER_ONLY
9272     /* Intercept jump to the magic kernel page.  */
9273     if (dc->base.pc_next >= 0xffff0000) {
9274         /* We always get here via a jump, so know we are not in a
9275            conditional execution block.  */
9276         gen_exception_internal(EXCP_KERNEL_TRAP);
9277         dc->base.is_jmp = DISAS_NORETURN;
9278         return true;
9279     }
9280 #endif
9281     return false;
9282 }
9283 
9284 static bool arm_check_ss_active(DisasContext *dc)
9285 {
9286     if (dc->ss_active && !dc->pstate_ss) {
9287         /* Singlestep state is Active-pending.
9288          * If we're in this state at the start of a TB then either
9289          *  a) we just took an exception to an EL which is being debugged
9290          *     and this is the first insn in the exception handler
9291          *  b) debug exceptions were masked and we just unmasked them
9292          *     without changing EL (eg by clearing PSTATE.D)
9293          * In either case we're going to take a swstep exception in the
9294          * "did not step an insn" case, and so the syndrome ISV and EX
9295          * bits should be zero.
9296          */
9297         assert(dc->base.num_insns == 1);
9298         gen_swstep_exception(dc, 0, 0);
9299         dc->base.is_jmp = DISAS_NORETURN;
9300         return true;
9301     }
9302 
9303     return false;
9304 }
9305 
9306 static void arm_post_translate_insn(DisasContext *dc)
9307 {
9308     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9309         if (dc->pc_save != dc->condlabel.pc_save) {
9310             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9311         }
9312         gen_set_label(dc->condlabel.label);
9313         dc->condjmp = 0;
9314     }
9315 }
9316 
9317 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9318 {
9319     DisasContext *dc = container_of(dcbase, DisasContext, base);
9320     CPUARMState *env = cpu_env(cpu);
9321     uint32_t pc = dc->base.pc_next;
9322     unsigned int insn;
9323 
9324     /* Singlestep exceptions have the highest priority. */
9325     if (arm_check_ss_active(dc)) {
9326         dc->base.pc_next = pc + 4;
9327         return;
9328     }
9329 
9330     if (pc & 3) {
9331         /*
9332          * PC alignment fault.  This has priority over the instruction abort
9333          * that we would receive from a translation fault via arm_ldl_code
9334          * (or the execution of the kernelpage entrypoint). This should only
9335          * be possible after an indirect branch, at the start of the TB.
9336          */
9337         assert(dc->base.num_insns == 1);
9338         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9339         dc->base.is_jmp = DISAS_NORETURN;
9340         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9341         return;
9342     }
9343 
9344     if (arm_check_kernelpage(dc)) {
9345         dc->base.pc_next = pc + 4;
9346         return;
9347     }
9348 
9349     dc->pc_curr = pc;
9350     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9351     dc->insn = insn;
9352     dc->base.pc_next = pc + 4;
9353     disas_arm_insn(dc, insn);
9354 
9355     arm_post_translate_insn(dc);
9356 
9357     /* ARM is a fixed-length ISA.  We performed the cross-page check
9358        in init_disas_context by adjusting max_insns.  */
9359 }
9360 
9361 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9362 {
9363     /* Return true if this Thumb insn is always unconditional,
9364      * even inside an IT block. This is true of only a very few
9365      * instructions: BKPT, HLT, and SG.
9366      *
9367      * A larger class of instructions are UNPREDICTABLE if used
9368      * inside an IT block; we do not need to detect those here, because
9369      * what we do by default (perform the cc check and update the IT
9370      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9371      * choice for those situations.
9372      *
9373      * insn is either a 16-bit or a 32-bit instruction; the two are
9374      * distinguishable because for the 16-bit case the top 16 bits
9375      * are zeroes, and that isn't a valid 32-bit encoding.
9376      */
9377     if ((insn & 0xffffff00) == 0xbe00) {
9378         /* BKPT */
9379         return true;
9380     }
9381 
9382     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9383         !arm_dc_feature(s, ARM_FEATURE_M)) {
9384         /* HLT: v8A only. This is unconditional even when it is going to
9385          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9386          * For v7 cores this was a plain old undefined encoding and so
9387          * honours its cc check. (We might be using the encoding as
9388          * a semihosting trap, but we don't change the cc check behaviour
9389          * on that account, because a debugger connected to a real v7A
9390          * core and emulating semihosting traps by catching the UNDEF
9391          * exception would also only see cases where the cc check passed.
9392          * No guest code should be trying to do a HLT semihosting trap
9393          * in an IT block anyway.
9394          */
9395         return true;
9396     }
9397 
9398     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9399         arm_dc_feature(s, ARM_FEATURE_M)) {
9400         /* SG: v8M only */
9401         return true;
9402     }
9403 
9404     return false;
9405 }
9406 
9407 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9408 {
9409     DisasContext *dc = container_of(dcbase, DisasContext, base);
9410     CPUARMState *env = cpu_env(cpu);
9411     uint32_t pc = dc->base.pc_next;
9412     uint32_t insn;
9413     bool is_16bit;
9414     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9415     TCGOp *insn_eci_rewind = NULL;
9416     target_ulong insn_eci_pc_save = -1;
9417 
9418     /* Misaligned thumb PC is architecturally impossible. */
9419     assert((dc->base.pc_next & 1) == 0);
9420 
9421     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9422         dc->base.pc_next = pc + 2;
9423         return;
9424     }
9425 
9426     dc->pc_curr = pc;
9427     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9428     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9429     pc += 2;
9430     if (!is_16bit) {
9431         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9432         insn = insn << 16 | insn2;
9433         pc += 2;
9434     }
9435     dc->base.pc_next = pc;
9436     dc->insn = insn;
9437 
9438     if (dc->pstate_il) {
9439         /*
9440          * Illegal execution state. This has priority over BTI
9441          * exceptions, but comes after instruction abort exceptions.
9442          */
9443         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9444         return;
9445     }
9446 
9447     if (dc->eci) {
9448         /*
9449          * For M-profile continuable instructions, ECI/ICI handling
9450          * falls into these cases:
9451          *  - interrupt-continuable instructions
9452          *     These are the various load/store multiple insns (both
9453          *     integer and fp). The ICI bits indicate the register
9454          *     where the load/store can resume. We make the IMPDEF
9455          *     choice to always do "instruction restart", ie ignore
9456          *     the ICI value and always execute the ldm/stm from the
9457          *     start. So all we need to do is zero PSR.ICI if the
9458          *     insn executes.
9459          *  - MVE instructions subject to beat-wise execution
9460          *     Here the ECI bits indicate which beats have already been
9461          *     executed, and we must honour this. Each insn of this
9462          *     type will handle it correctly. We will update PSR.ECI
9463          *     in the helper function for the insn (some ECI values
9464          *     mean that the following insn also has been partially
9465          *     executed).
9466          *  - Special cases which don't advance ECI
9467          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9468          *     bits untouched.
9469          *  - all other insns (the common case)
9470          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9471          *     We place a rewind-marker here. Insns in the previous
9472          *     three categories will set a flag in the DisasContext.
9473          *     If the flag isn't set after we call disas_thumb_insn()
9474          *     or disas_thumb2_insn() then we know we have a "some other
9475          *     insn" case. We will rewind to the marker (ie throwing away
9476          *     all the generated code) and instead emit "take exception".
9477          */
9478         insn_eci_rewind = tcg_last_op();
9479         insn_eci_pc_save = dc->pc_save;
9480     }
9481 
9482     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9483         uint32_t cond = dc->condexec_cond;
9484 
9485         /*
9486          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9487          * "always"; 0xf is not "never".
9488          */
9489         if (cond < 0x0e) {
9490             arm_skip_unless(dc, cond);
9491         }
9492     }
9493 
9494     if (is_16bit) {
9495         disas_thumb_insn(dc, insn);
9496     } else {
9497         disas_thumb2_insn(dc, insn);
9498     }
9499 
9500     /* Advance the Thumb condexec condition.  */
9501     if (dc->condexec_mask) {
9502         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9503                              ((dc->condexec_mask >> 4) & 1));
9504         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9505         if (dc->condexec_mask == 0) {
9506             dc->condexec_cond = 0;
9507         }
9508     }
9509 
9510     if (dc->eci && !dc->eci_handled) {
9511         /*
9512          * Insn wasn't valid for ECI/ICI at all: undo what we
9513          * just generated and instead emit an exception
9514          */
9515         tcg_remove_ops_after(insn_eci_rewind);
9516         dc->pc_save = insn_eci_pc_save;
9517         dc->condjmp = 0;
9518         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9519     }
9520 
9521     arm_post_translate_insn(dc);
9522 
9523     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9524      * will touch a new page.  This ensures that prefetch aborts occur at
9525      * the right place.
9526      *
9527      * We want to stop the TB if the next insn starts in a new page,
9528      * or if it spans between this page and the next. This means that
9529      * if we're looking at the last halfword in the page we need to
9530      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9531      * or a 32-bit Thumb insn (which won't).
9532      * This is to avoid generating a silly TB with a single 16-bit insn
9533      * in it at the end of this page (which would execute correctly
9534      * but isn't very efficient).
9535      */
9536     if (dc->base.is_jmp == DISAS_NEXT
9537         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9538             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9539                 && insn_crosses_page(env, dc)))) {
9540         dc->base.is_jmp = DISAS_TOO_MANY;
9541     }
9542 }
9543 
9544 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9545 {
9546     DisasContext *dc = container_of(dcbase, DisasContext, base);
9547 
9548     /* At this stage dc->condjmp will only be set when the skipped
9549        instruction was a conditional branch or trap, and the PC has
9550        already been written.  */
9551     gen_set_condexec(dc);
9552     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9553         /* Exception return branches need some special case code at the
9554          * end of the TB, which is complex enough that it has to
9555          * handle the single-step vs not and the condition-failed
9556          * insn codepath itself.
9557          */
9558         gen_bx_excret_final_code(dc);
9559     } else if (unlikely(dc->ss_active)) {
9560         /* Unconditional and "condition passed" instruction codepath. */
9561         switch (dc->base.is_jmp) {
9562         case DISAS_SWI:
9563             gen_ss_advance(dc);
9564             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9565             break;
9566         case DISAS_HVC:
9567             gen_ss_advance(dc);
9568             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9569             break;
9570         case DISAS_SMC:
9571             gen_ss_advance(dc);
9572             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9573             break;
9574         case DISAS_NEXT:
9575         case DISAS_TOO_MANY:
9576         case DISAS_UPDATE_EXIT:
9577         case DISAS_UPDATE_NOCHAIN:
9578             gen_update_pc(dc, curr_insn_len(dc));
9579             /* fall through */
9580         default:
9581             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9582             gen_singlestep_exception(dc);
9583             break;
9584         case DISAS_NORETURN:
9585             break;
9586         }
9587     } else {
9588         /* While branches must always occur at the end of an IT block,
9589            there are a few other things that can cause us to terminate
9590            the TB in the middle of an IT block:
9591             - Exception generating instructions (bkpt, swi, undefined).
9592             - Page boundaries.
9593             - Hardware watchpoints.
9594            Hardware breakpoints have already been handled and skip this code.
9595          */
9596         switch (dc->base.is_jmp) {
9597         case DISAS_NEXT:
9598         case DISAS_TOO_MANY:
9599             gen_goto_tb(dc, 1, curr_insn_len(dc));
9600             break;
9601         case DISAS_UPDATE_NOCHAIN:
9602             gen_update_pc(dc, curr_insn_len(dc));
9603             /* fall through */
9604         case DISAS_JUMP:
9605             gen_goto_ptr();
9606             break;
9607         case DISAS_UPDATE_EXIT:
9608             gen_update_pc(dc, curr_insn_len(dc));
9609             /* fall through */
9610         default:
9611             /* indicate that the hash table must be used to find the next TB */
9612             tcg_gen_exit_tb(NULL, 0);
9613             break;
9614         case DISAS_NORETURN:
9615             /* nothing more to generate */
9616             break;
9617         case DISAS_WFI:
9618             gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc)));
9619             /*
9620              * The helper doesn't necessarily throw an exception, but we
9621              * must go back to the main loop to check for interrupts anyway.
9622              */
9623             tcg_gen_exit_tb(NULL, 0);
9624             break;
9625         case DISAS_WFE:
9626             gen_helper_wfe(tcg_env);
9627             break;
9628         case DISAS_YIELD:
9629             gen_helper_yield(tcg_env);
9630             break;
9631         case DISAS_SWI:
9632             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9633             break;
9634         case DISAS_HVC:
9635             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9636             break;
9637         case DISAS_SMC:
9638             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9639             break;
9640         }
9641     }
9642 
9643     if (dc->condjmp) {
9644         /* "Condition failed" instruction codepath for the branch/trap insn */
9645         set_disas_label(dc, dc->condlabel);
9646         gen_set_condexec(dc);
9647         if (unlikely(dc->ss_active)) {
9648             gen_update_pc(dc, curr_insn_len(dc));
9649             gen_singlestep_exception(dc);
9650         } else {
9651             gen_goto_tb(dc, 1, curr_insn_len(dc));
9652         }
9653     }
9654 }
9655 
9656 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9657                              CPUState *cpu, FILE *logfile)
9658 {
9659     DisasContext *dc = container_of(dcbase, DisasContext, base);
9660 
9661     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9662     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9663 }
9664 
9665 static const TranslatorOps arm_translator_ops = {
9666     .init_disas_context = arm_tr_init_disas_context,
9667     .tb_start           = arm_tr_tb_start,
9668     .insn_start         = arm_tr_insn_start,
9669     .translate_insn     = arm_tr_translate_insn,
9670     .tb_stop            = arm_tr_tb_stop,
9671     .disas_log          = arm_tr_disas_log,
9672 };
9673 
9674 static const TranslatorOps thumb_translator_ops = {
9675     .init_disas_context = arm_tr_init_disas_context,
9676     .tb_start           = arm_tr_tb_start,
9677     .insn_start         = arm_tr_insn_start,
9678     .translate_insn     = thumb_tr_translate_insn,
9679     .tb_stop            = arm_tr_tb_stop,
9680     .disas_log          = arm_tr_disas_log,
9681 };
9682 
9683 /* generate intermediate code for basic block 'tb'.  */
9684 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9685                            target_ulong pc, void *host_pc)
9686 {
9687     DisasContext dc = { };
9688     const TranslatorOps *ops = &arm_translator_ops;
9689     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9690 
9691     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9692         ops = &thumb_translator_ops;
9693     }
9694 #ifdef TARGET_AARCH64
9695     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9696         ops = &aarch64_translator_ops;
9697     }
9698 #endif
9699 
9700     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9701 }
9702