xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 35e56881)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* Table based decoder typedefs - used when the relevant bits for decode
79  * are too awkwardly scattered across the instruction (eg SIMD).
80  */
81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
82 
83 typedef struct AArch64DecodeTable {
84     uint32_t pattern;
85     uint32_t mask;
86     AArch64DecodeFn *disas_fn;
87 } AArch64DecodeTable;
88 
89 /* initialize TCG globals.  */
90 void a64_translate_init(void)
91 {
92     int i;
93 
94     cpu_pc = tcg_global_mem_new_i64(tcg_env,
95                                     offsetof(CPUARMState, pc),
96                                     "pc");
97     for (i = 0; i < 32; i++) {
98         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
99                                           offsetof(CPUARMState, xregs[i]),
100                                           regnames[i]);
101     }
102 
103     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
105 }
106 
107 /*
108  * Return the core mmu_idx to use for A64 load/store insns which
109  * have a "unprivileged load/store" variant. Those insns access
110  * EL0 if executed from an EL which has control over EL0 (usually
111  * EL1) but behave like normal loads and stores if executed from
112  * elsewhere (eg EL3).
113  *
114  * @unpriv : true for the unprivileged encoding; false for the
115  *           normal encoding (in which case we will return the same
116  *           thing as get_mem_index().
117  */
118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120     /*
121      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122      * which is the usual mmu_idx for this cpu state.
123      */
124     ARMMMUIdx useridx = s->mmu_idx;
125 
126     if (unpriv && s->unpriv) {
127         /*
128          * We have pre-computed the condition for AccType_UNPRIV.
129          * Therefore we should never get here with a mmu_idx for
130          * which we do not know the corresponding user mmu_idx.
131          */
132         switch (useridx) {
133         case ARMMMUIdx_E10_1:
134         case ARMMMUIdx_E10_1_PAN:
135             useridx = ARMMMUIdx_E10_0;
136             break;
137         case ARMMMUIdx_E20_2:
138         case ARMMMUIdx_E20_2_PAN:
139             useridx = ARMMMUIdx_E20_0;
140             break;
141         default:
142             g_assert_not_reached();
143         }
144     }
145     return arm_to_core_mmu_idx(useridx);
146 }
147 
148 static void set_btype_raw(int val)
149 {
150     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
151                    offsetof(CPUARMState, btype));
152 }
153 
154 static void set_btype(DisasContext *s, int val)
155 {
156     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
157     tcg_debug_assert(val >= 1 && val <= 3);
158     set_btype_raw(val);
159     s->btype = -1;
160 }
161 
162 static void reset_btype(DisasContext *s)
163 {
164     if (s->btype != 0) {
165         set_btype_raw(0);
166         s->btype = 0;
167     }
168 }
169 
170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
171 {
172     assert(s->pc_save != -1);
173     if (tb_cflags(s->base.tb) & CF_PCREL) {
174         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
175     } else {
176         tcg_gen_movi_i64(dest, s->pc_curr + diff);
177     }
178 }
179 
180 void gen_a64_update_pc(DisasContext *s, target_long diff)
181 {
182     gen_pc_plus_diff(s, cpu_pc, diff);
183     s->pc_save = s->pc_curr + diff;
184 }
185 
186 /*
187  * Handle Top Byte Ignore (TBI) bits.
188  *
189  * If address tagging is enabled via the TCR TBI bits:
190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
191  *    then the address is zero-extended, clearing bits [63:56]
192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
193  *    and TBI1 controls addresses with bit 55 == 1.
194  *    If the appropriate TBI bit is set for the address then
195  *    the address is sign-extended from bit 55 into bits [63:56]
196  *
197  * Here We have concatenated TBI{1,0} into tbi.
198  */
199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
200                                 TCGv_i64 src, int tbi)
201 {
202     if (tbi == 0) {
203         /* Load unmodified address */
204         tcg_gen_mov_i64(dst, src);
205     } else if (!regime_has_2_ranges(s->mmu_idx)) {
206         /* Force tag byte to all zero */
207         tcg_gen_extract_i64(dst, src, 0, 56);
208     } else {
209         /* Sign-extend from bit 55.  */
210         tcg_gen_sextract_i64(dst, src, 0, 56);
211 
212         switch (tbi) {
213         case 1:
214             /* tbi0 but !tbi1: only use the extension if positive */
215             tcg_gen_and_i64(dst, dst, src);
216             break;
217         case 2:
218             /* !tbi0 but tbi1: only use the extension if negative */
219             tcg_gen_or_i64(dst, dst, src);
220             break;
221         case 3:
222             /* tbi0 and tbi1: always use the extension */
223             break;
224         default:
225             g_assert_not_reached();
226         }
227     }
228 }
229 
230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
231 {
232     /*
233      * If address tagging is enabled for instructions via the TCR TBI bits,
234      * then loading an address into the PC will clear out any tag.
235      */
236     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
237     s->pc_save = -1;
238 }
239 
240 /*
241  * Handle MTE and/or TBI.
242  *
243  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
244  * for the tag to be present in the FAR_ELx register.  But for user-only
245  * mode we do not have a TLB with which to implement this, so we must
246  * remove the top byte now.
247  *
248  * Always return a fresh temporary that we can increment independently
249  * of the write-back address.
250  */
251 
252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
253 {
254     TCGv_i64 clean = tcg_temp_new_i64();
255 #ifdef CONFIG_USER_ONLY
256     gen_top_byte_ignore(s, clean, addr, s->tbid);
257 #else
258     tcg_gen_mov_i64(clean, addr);
259 #endif
260     return clean;
261 }
262 
263 /* Insert a zero tag into src, with the result at dst. */
264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
265 {
266     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
267 }
268 
269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
270                              MMUAccessType acc, int log2_size)
271 {
272     gen_helper_probe_access(tcg_env, ptr,
273                             tcg_constant_i32(acc),
274                             tcg_constant_i32(get_mem_index(s)),
275                             tcg_constant_i32(1 << log2_size));
276 }
277 
278 /*
279  * For MTE, check a single logical or atomic access.  This probes a single
280  * address, the exact one specified.  The size and alignment of the access
281  * is not relevant to MTE, per se, but watchpoints do require the size,
282  * and we want to recognize those before making any other changes to state.
283  */
284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
285                                       bool is_write, bool tag_checked,
286                                       MemOp memop, bool is_unpriv,
287                                       int core_idx)
288 {
289     if (tag_checked && s->mte_active[is_unpriv]) {
290         TCGv_i64 ret;
291         int desc = 0;
292 
293         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
294         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
295         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
296         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
297         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
309                         bool tag_checked, MemOp memop)
310 {
311     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
312                                  false, get_mem_index(s));
313 }
314 
315 /*
316  * For MTE, check multiple logical sequential accesses.
317  */
318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
319                         bool tag_checked, int total_size, MemOp single_mop)
320 {
321     if (tag_checked && s->mte_active[0]) {
322         TCGv_i64 ret;
323         int desc = 0;
324 
325         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
326         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
327         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
328         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
329         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
330         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
331 
332         ret = tcg_temp_new_i64();
333         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
334 
335         return ret;
336     }
337     return clean_data_tbi(s, addr);
338 }
339 
340 /*
341  * Generate the special alignment check that applies to AccType_ATOMIC
342  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
343  * naturally aligned, but it must not cross a 16-byte boundary.
344  * See AArch64.CheckAlignment().
345  */
346 static void check_lse2_align(DisasContext *s, int rn, int imm,
347                              bool is_write, MemOp mop)
348 {
349     TCGv_i32 tmp;
350     TCGv_i64 addr;
351     TCGLabel *over_label;
352     MMUAccessType type;
353     int mmu_idx;
354 
355     tmp = tcg_temp_new_i32();
356     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
357     tcg_gen_addi_i32(tmp, tmp, imm & 15);
358     tcg_gen_andi_i32(tmp, tmp, 15);
359     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
360 
361     over_label = gen_new_label();
362     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
363 
364     addr = tcg_temp_new_i64();
365     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
366 
367     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
368     mmu_idx = get_mem_index(s);
369     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
370                                 tcg_constant_i32(mmu_idx));
371 
372     gen_set_label(over_label);
373 
374 }
375 
376 /* Handle the alignment check for AccType_ATOMIC instructions. */
377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
378 {
379     MemOp size = mop & MO_SIZE;
380 
381     if (size == MO_8) {
382         return mop;
383     }
384 
385     /*
386      * If size == MO_128, this is a LDXP, and the operation is single-copy
387      * atomic for each doubleword, not the entire quadword; it still must
388      * be quadword aligned.
389      */
390     if (size == MO_128) {
391         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
392                                    MO_ATOM_IFALIGN_PAIR);
393     }
394     if (dc_isar_feature(aa64_lse2, s)) {
395         check_lse2_align(s, rn, 0, true, mop);
396     } else {
397         mop |= MO_ALIGN;
398     }
399     return finalize_memop(s, mop);
400 }
401 
402 /* Handle the alignment check for AccType_ORDERED instructions. */
403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
404                                  bool is_write, MemOp mop)
405 {
406     MemOp size = mop & MO_SIZE;
407 
408     if (size == MO_8) {
409         return mop;
410     }
411     if (size == MO_128) {
412         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
413                                    MO_ATOM_IFALIGN_PAIR);
414     }
415     if (!dc_isar_feature(aa64_lse2, s)) {
416         mop |= MO_ALIGN;
417     } else if (!s->naa) {
418         check_lse2_align(s, rn, imm, is_write, mop);
419     }
420     return finalize_memop(s, mop);
421 }
422 
423 typedef struct DisasCompare64 {
424     TCGCond cond;
425     TCGv_i64 value;
426 } DisasCompare64;
427 
428 static void a64_test_cc(DisasCompare64 *c64, int cc)
429 {
430     DisasCompare c32;
431 
432     arm_test_cc(&c32, cc);
433 
434     /*
435      * Sign-extend the 32-bit value so that the GE/LT comparisons work
436      * properly.  The NE/EQ comparisons are also fine with this choice.
437       */
438     c64->cond = c32.cond;
439     c64->value = tcg_temp_new_i64();
440     tcg_gen_ext_i32_i64(c64->value, c32.value);
441 }
442 
443 static void gen_rebuild_hflags(DisasContext *s)
444 {
445     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
446 }
447 
448 static void gen_exception_internal(int excp)
449 {
450     assert(excp_is_internal(excp));
451     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
452 }
453 
454 static void gen_exception_internal_insn(DisasContext *s, int excp)
455 {
456     gen_a64_update_pc(s, 0);
457     gen_exception_internal(excp);
458     s->base.is_jmp = DISAS_NORETURN;
459 }
460 
461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
462 {
463     gen_a64_update_pc(s, 0);
464     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
465     s->base.is_jmp = DISAS_NORETURN;
466 }
467 
468 static void gen_step_complete_exception(DisasContext *s)
469 {
470     /* We just completed step of an insn. Move from Active-not-pending
471      * to Active-pending, and then also take the swstep exception.
472      * This corresponds to making the (IMPDEF) choice to prioritize
473      * swstep exceptions over asynchronous exceptions taken to an exception
474      * level where debug is disabled. This choice has the advantage that
475      * we do not need to maintain internal state corresponding to the
476      * ISV/EX syndrome bits between completion of the step and generation
477      * of the exception, and our syndrome information is always correct.
478      */
479     gen_ss_advance(s);
480     gen_swstep_exception(s, 1, s->is_ldex);
481     s->base.is_jmp = DISAS_NORETURN;
482 }
483 
484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
485 {
486     if (s->ss_active) {
487         return false;
488     }
489     return translator_use_goto_tb(&s->base, dest);
490 }
491 
492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
493 {
494     if (use_goto_tb(s, s->pc_curr + diff)) {
495         /*
496          * For pcrel, the pc must always be up-to-date on entry to
497          * the linked TB, so that it can use simple additions for all
498          * further adjustments.  For !pcrel, the linked TB is compiled
499          * to know its full virtual address, so we can delay the
500          * update to pc to the unlinked path.  A long chain of links
501          * can thus avoid many updates to the PC.
502          */
503         if (tb_cflags(s->base.tb) & CF_PCREL) {
504             gen_a64_update_pc(s, diff);
505             tcg_gen_goto_tb(n);
506         } else {
507             tcg_gen_goto_tb(n);
508             gen_a64_update_pc(s, diff);
509         }
510         tcg_gen_exit_tb(s->base.tb, n);
511         s->base.is_jmp = DISAS_NORETURN;
512     } else {
513         gen_a64_update_pc(s, diff);
514         if (s->ss_active) {
515             gen_step_complete_exception(s);
516         } else {
517             tcg_gen_lookup_and_goto_ptr();
518             s->base.is_jmp = DISAS_NORETURN;
519         }
520     }
521 }
522 
523 /*
524  * Register access functions
525  *
526  * These functions are used for directly accessing a register in where
527  * changes to the final register value are likely to be made. If you
528  * need to use a register for temporary calculation (e.g. index type
529  * operations) use the read_* form.
530  *
531  * B1.2.1 Register mappings
532  *
533  * In instruction register encoding 31 can refer to ZR (zero register) or
534  * the SP (stack pointer) depending on context. In QEMU's case we map SP
535  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
536  * This is the point of the _sp forms.
537  */
538 TCGv_i64 cpu_reg(DisasContext *s, int reg)
539 {
540     if (reg == 31) {
541         TCGv_i64 t = tcg_temp_new_i64();
542         tcg_gen_movi_i64(t, 0);
543         return t;
544     } else {
545         return cpu_X[reg];
546     }
547 }
548 
549 /* register access for when 31 == SP */
550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
551 {
552     return cpu_X[reg];
553 }
554 
555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
556  * representing the register contents. This TCGv is an auto-freed
557  * temporary so it need not be explicitly freed, and may be modified.
558  */
559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
560 {
561     TCGv_i64 v = tcg_temp_new_i64();
562     if (reg != 31) {
563         if (sf) {
564             tcg_gen_mov_i64(v, cpu_X[reg]);
565         } else {
566             tcg_gen_ext32u_i64(v, cpu_X[reg]);
567         }
568     } else {
569         tcg_gen_movi_i64(v, 0);
570     }
571     return v;
572 }
573 
574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
575 {
576     TCGv_i64 v = tcg_temp_new_i64();
577     if (sf) {
578         tcg_gen_mov_i64(v, cpu_X[reg]);
579     } else {
580         tcg_gen_ext32u_i64(v, cpu_X[reg]);
581     }
582     return v;
583 }
584 
585 /* Return the offset into CPUARMState of a slice (from
586  * the least significant end) of FP register Qn (ie
587  * Dn, Sn, Hn or Bn).
588  * (Note that this is not the same mapping as for A32; see cpu.h)
589  */
590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
591 {
592     return vec_reg_offset(s, regno, 0, size);
593 }
594 
595 /* Offset of the high half of the 128 bit vector Qn */
596 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
597 {
598     return vec_reg_offset(s, regno, 1, MO_64);
599 }
600 
601 /* Convenience accessors for reading and writing single and double
602  * FP registers. Writing clears the upper parts of the associated
603  * 128 bit vector register, as required by the architecture.
604  * Note that unlike the GP register accessors, the values returned
605  * by the read functions must be manually freed.
606  */
607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
608 {
609     TCGv_i64 v = tcg_temp_new_i64();
610 
611     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
612     return v;
613 }
614 
615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
616 {
617     TCGv_i32 v = tcg_temp_new_i32();
618 
619     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
620     return v;
621 }
622 
623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
624 {
625     TCGv_i32 v = tcg_temp_new_i32();
626 
627     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
628     return v;
629 }
630 
631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
632  * If SVE is not enabled, then there are only 128 bits in the vector.
633  */
634 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
635 {
636     unsigned ofs = fp_reg_offset(s, rd, MO_64);
637     unsigned vsz = vec_full_reg_size(s);
638 
639     /* Nop move, with side effect of clearing the tail. */
640     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
641 }
642 
643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
644 {
645     unsigned ofs = fp_reg_offset(s, reg, MO_64);
646 
647     tcg_gen_st_i64(v, tcg_env, ofs);
648     clear_vec_high(s, false, reg);
649 }
650 
651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
652 {
653     TCGv_i64 tmp = tcg_temp_new_i64();
654 
655     tcg_gen_extu_i32_i64(tmp, v);
656     write_fp_dreg(s, reg, tmp);
657 }
658 
659 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
661                          GVecGen2Fn *gvec_fn, int vece)
662 {
663     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
664             is_q ? 16 : 8, vec_full_reg_size(s));
665 }
666 
667 /* Expand a 2-operand + immediate AdvSIMD vector operation using
668  * an expander function.
669  */
670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
671                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
672 {
673     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
674             imm, is_q ? 16 : 8, vec_full_reg_size(s));
675 }
676 
677 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
679                          GVecGen3Fn *gvec_fn, int vece)
680 {
681     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
682             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
683 }
684 
685 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
687                          int rx, GVecGen4Fn *gvec_fn, int vece)
688 {
689     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
690             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
691             is_q ? 16 : 8, vec_full_reg_size(s));
692 }
693 
694 /* Expand a 2-operand operation using an out-of-line helper.  */
695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
696                              int rn, int data, gen_helper_gvec_2 *fn)
697 {
698     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
699                        vec_full_reg_offset(s, rn),
700                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
701 }
702 
703 /* Expand a 3-operand operation using an out-of-line helper.  */
704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
705                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
706 {
707     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
708                        vec_full_reg_offset(s, rn),
709                        vec_full_reg_offset(s, rm),
710                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
711 }
712 
713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
714  * an out-of-line helper.
715  */
716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
717                               int rm, bool is_fp16, int data,
718                               gen_helper_gvec_3_ptr *fn)
719 {
720     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
721     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
722                        vec_full_reg_offset(s, rn),
723                        vec_full_reg_offset(s, rm), fpst,
724                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
725 }
726 
727 /* Expand a 4-operand operation using an out-of-line helper.  */
728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
729                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
730 {
731     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
732                        vec_full_reg_offset(s, rn),
733                        vec_full_reg_offset(s, rm),
734                        vec_full_reg_offset(s, ra),
735                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
736 }
737 
738 /*
739  * Expand a 4-operand + fpstatus pointer + simd data value operation using
740  * an out-of-line helper.
741  */
742 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
743                               int rm, int ra, bool is_fp16, int data,
744                               gen_helper_gvec_4_ptr *fn)
745 {
746     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
747     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
748                        vec_full_reg_offset(s, rn),
749                        vec_full_reg_offset(s, rm),
750                        vec_full_reg_offset(s, ra), fpst,
751                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
752 }
753 
754 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
755  * than the 32 bit equivalent.
756  */
757 static inline void gen_set_NZ64(TCGv_i64 result)
758 {
759     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
760     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
761 }
762 
763 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
764 static inline void gen_logic_CC(int sf, TCGv_i64 result)
765 {
766     if (sf) {
767         gen_set_NZ64(result);
768     } else {
769         tcg_gen_extrl_i64_i32(cpu_ZF, result);
770         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
771     }
772     tcg_gen_movi_i32(cpu_CF, 0);
773     tcg_gen_movi_i32(cpu_VF, 0);
774 }
775 
776 /* dest = T0 + T1; compute C, N, V and Z flags */
777 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
778 {
779     TCGv_i64 result, flag, tmp;
780     result = tcg_temp_new_i64();
781     flag = tcg_temp_new_i64();
782     tmp = tcg_temp_new_i64();
783 
784     tcg_gen_movi_i64(tmp, 0);
785     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
786 
787     tcg_gen_extrl_i64_i32(cpu_CF, flag);
788 
789     gen_set_NZ64(result);
790 
791     tcg_gen_xor_i64(flag, result, t0);
792     tcg_gen_xor_i64(tmp, t0, t1);
793     tcg_gen_andc_i64(flag, flag, tmp);
794     tcg_gen_extrh_i64_i32(cpu_VF, flag);
795 
796     tcg_gen_mov_i64(dest, result);
797 }
798 
799 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
800 {
801     TCGv_i32 t0_32 = tcg_temp_new_i32();
802     TCGv_i32 t1_32 = tcg_temp_new_i32();
803     TCGv_i32 tmp = tcg_temp_new_i32();
804 
805     tcg_gen_movi_i32(tmp, 0);
806     tcg_gen_extrl_i64_i32(t0_32, t0);
807     tcg_gen_extrl_i64_i32(t1_32, t1);
808     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
809     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
810     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
811     tcg_gen_xor_i32(tmp, t0_32, t1_32);
812     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
813     tcg_gen_extu_i32_i64(dest, cpu_NF);
814 }
815 
816 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
817 {
818     if (sf) {
819         gen_add64_CC(dest, t0, t1);
820     } else {
821         gen_add32_CC(dest, t0, t1);
822     }
823 }
824 
825 /* dest = T0 - T1; compute C, N, V and Z flags */
826 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
827 {
828     /* 64 bit arithmetic */
829     TCGv_i64 result, flag, tmp;
830 
831     result = tcg_temp_new_i64();
832     flag = tcg_temp_new_i64();
833     tcg_gen_sub_i64(result, t0, t1);
834 
835     gen_set_NZ64(result);
836 
837     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
838     tcg_gen_extrl_i64_i32(cpu_CF, flag);
839 
840     tcg_gen_xor_i64(flag, result, t0);
841     tmp = tcg_temp_new_i64();
842     tcg_gen_xor_i64(tmp, t0, t1);
843     tcg_gen_and_i64(flag, flag, tmp);
844     tcg_gen_extrh_i64_i32(cpu_VF, flag);
845     tcg_gen_mov_i64(dest, result);
846 }
847 
848 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
849 {
850     /* 32 bit arithmetic */
851     TCGv_i32 t0_32 = tcg_temp_new_i32();
852     TCGv_i32 t1_32 = tcg_temp_new_i32();
853     TCGv_i32 tmp;
854 
855     tcg_gen_extrl_i64_i32(t0_32, t0);
856     tcg_gen_extrl_i64_i32(t1_32, t1);
857     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
858     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
859     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
860     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
861     tmp = tcg_temp_new_i32();
862     tcg_gen_xor_i32(tmp, t0_32, t1_32);
863     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
864     tcg_gen_extu_i32_i64(dest, cpu_NF);
865 }
866 
867 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
868 {
869     if (sf) {
870         gen_sub64_CC(dest, t0, t1);
871     } else {
872         gen_sub32_CC(dest, t0, t1);
873     }
874 }
875 
876 /* dest = T0 + T1 + CF; do not compute flags. */
877 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
878 {
879     TCGv_i64 flag = tcg_temp_new_i64();
880     tcg_gen_extu_i32_i64(flag, cpu_CF);
881     tcg_gen_add_i64(dest, t0, t1);
882     tcg_gen_add_i64(dest, dest, flag);
883 
884     if (!sf) {
885         tcg_gen_ext32u_i64(dest, dest);
886     }
887 }
888 
889 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
890 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
891 {
892     if (sf) {
893         TCGv_i64 result = tcg_temp_new_i64();
894         TCGv_i64 cf_64 = tcg_temp_new_i64();
895         TCGv_i64 vf_64 = tcg_temp_new_i64();
896         TCGv_i64 tmp = tcg_temp_new_i64();
897         TCGv_i64 zero = tcg_constant_i64(0);
898 
899         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
900         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
901         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
902         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
903         gen_set_NZ64(result);
904 
905         tcg_gen_xor_i64(vf_64, result, t0);
906         tcg_gen_xor_i64(tmp, t0, t1);
907         tcg_gen_andc_i64(vf_64, vf_64, tmp);
908         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
909 
910         tcg_gen_mov_i64(dest, result);
911     } else {
912         TCGv_i32 t0_32 = tcg_temp_new_i32();
913         TCGv_i32 t1_32 = tcg_temp_new_i32();
914         TCGv_i32 tmp = tcg_temp_new_i32();
915         TCGv_i32 zero = tcg_constant_i32(0);
916 
917         tcg_gen_extrl_i64_i32(t0_32, t0);
918         tcg_gen_extrl_i64_i32(t1_32, t1);
919         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
920         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
921 
922         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
923         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
924         tcg_gen_xor_i32(tmp, t0_32, t1_32);
925         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
926         tcg_gen_extu_i32_i64(dest, cpu_NF);
927     }
928 }
929 
930 /*
931  * Load/Store generators
932  */
933 
934 /*
935  * Store from GPR register to memory.
936  */
937 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
938                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
939                              bool iss_valid,
940                              unsigned int iss_srt,
941                              bool iss_sf, bool iss_ar)
942 {
943     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
944 
945     if (iss_valid) {
946         uint32_t syn;
947 
948         syn = syn_data_abort_with_iss(0,
949                                       (memop & MO_SIZE),
950                                       false,
951                                       iss_srt,
952                                       iss_sf,
953                                       iss_ar,
954                                       0, 0, 0, 0, 0, false);
955         disas_set_insn_syndrome(s, syn);
956     }
957 }
958 
959 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
960                       TCGv_i64 tcg_addr, MemOp memop,
961                       bool iss_valid,
962                       unsigned int iss_srt,
963                       bool iss_sf, bool iss_ar)
964 {
965     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
966                      iss_valid, iss_srt, iss_sf, iss_ar);
967 }
968 
969 /*
970  * Load from memory to GPR register
971  */
972 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
973                              MemOp memop, bool extend, int memidx,
974                              bool iss_valid, unsigned int iss_srt,
975                              bool iss_sf, bool iss_ar)
976 {
977     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
978 
979     if (extend && (memop & MO_SIGN)) {
980         g_assert((memop & MO_SIZE) <= MO_32);
981         tcg_gen_ext32u_i64(dest, dest);
982     }
983 
984     if (iss_valid) {
985         uint32_t syn;
986 
987         syn = syn_data_abort_with_iss(0,
988                                       (memop & MO_SIZE),
989                                       (memop & MO_SIGN) != 0,
990                                       iss_srt,
991                                       iss_sf,
992                                       iss_ar,
993                                       0, 0, 0, 0, 0, false);
994         disas_set_insn_syndrome(s, syn);
995     }
996 }
997 
998 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
999                       MemOp memop, bool extend,
1000                       bool iss_valid, unsigned int iss_srt,
1001                       bool iss_sf, bool iss_ar)
1002 {
1003     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1004                      iss_valid, iss_srt, iss_sf, iss_ar);
1005 }
1006 
1007 /*
1008  * Store from FP register to memory
1009  */
1010 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1011 {
1012     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1013     TCGv_i64 tmplo = tcg_temp_new_i64();
1014 
1015     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1016 
1017     if ((mop & MO_SIZE) < MO_128) {
1018         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1019     } else {
1020         TCGv_i64 tmphi = tcg_temp_new_i64();
1021         TCGv_i128 t16 = tcg_temp_new_i128();
1022 
1023         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1024         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1025 
1026         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1027     }
1028 }
1029 
1030 /*
1031  * Load from memory to FP register
1032  */
1033 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1034 {
1035     /* This always zero-extends and writes to a full 128 bit wide vector */
1036     TCGv_i64 tmplo = tcg_temp_new_i64();
1037     TCGv_i64 tmphi = NULL;
1038 
1039     if ((mop & MO_SIZE) < MO_128) {
1040         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1041     } else {
1042         TCGv_i128 t16 = tcg_temp_new_i128();
1043 
1044         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1045 
1046         tmphi = tcg_temp_new_i64();
1047         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1048     }
1049 
1050     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1051 
1052     if (tmphi) {
1053         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1054     }
1055     clear_vec_high(s, tmphi != NULL, destidx);
1056 }
1057 
1058 /*
1059  * Vector load/store helpers.
1060  *
1061  * The principal difference between this and a FP load is that we don't
1062  * zero extend as we are filling a partial chunk of the vector register.
1063  * These functions don't support 128 bit loads/stores, which would be
1064  * normal load/store operations.
1065  *
1066  * The _i32 versions are useful when operating on 32 bit quantities
1067  * (eg for floating point single or using Neon helper functions).
1068  */
1069 
1070 /* Get value of an element within a vector register */
1071 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1072                              int element, MemOp memop)
1073 {
1074     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1075     switch ((unsigned)memop) {
1076     case MO_8:
1077         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1078         break;
1079     case MO_16:
1080         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1081         break;
1082     case MO_32:
1083         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1084         break;
1085     case MO_8|MO_SIGN:
1086         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1087         break;
1088     case MO_16|MO_SIGN:
1089         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1090         break;
1091     case MO_32|MO_SIGN:
1092         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1093         break;
1094     case MO_64:
1095     case MO_64|MO_SIGN:
1096         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1097         break;
1098     default:
1099         g_assert_not_reached();
1100     }
1101 }
1102 
1103 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1104                                  int element, MemOp memop)
1105 {
1106     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1107     switch (memop) {
1108     case MO_8:
1109         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1110         break;
1111     case MO_16:
1112         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1113         break;
1114     case MO_8|MO_SIGN:
1115         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1116         break;
1117     case MO_16|MO_SIGN:
1118         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1119         break;
1120     case MO_32:
1121     case MO_32|MO_SIGN:
1122         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1123         break;
1124     default:
1125         g_assert_not_reached();
1126     }
1127 }
1128 
1129 /* Set value of an element within a vector register */
1130 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1131                               int element, MemOp memop)
1132 {
1133     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1134     switch (memop) {
1135     case MO_8:
1136         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1137         break;
1138     case MO_16:
1139         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1140         break;
1141     case MO_32:
1142         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1143         break;
1144     case MO_64:
1145         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1146         break;
1147     default:
1148         g_assert_not_reached();
1149     }
1150 }
1151 
1152 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1153                                   int destidx, int element, MemOp memop)
1154 {
1155     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1156     switch (memop) {
1157     case MO_8:
1158         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1159         break;
1160     case MO_16:
1161         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1162         break;
1163     case MO_32:
1164         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1165         break;
1166     default:
1167         g_assert_not_reached();
1168     }
1169 }
1170 
1171 /* Store from vector register to memory */
1172 static void do_vec_st(DisasContext *s, int srcidx, int element,
1173                       TCGv_i64 tcg_addr, MemOp mop)
1174 {
1175     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1176 
1177     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1178     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1179 }
1180 
1181 /* Load from memory to vector register */
1182 static void do_vec_ld(DisasContext *s, int destidx, int element,
1183                       TCGv_i64 tcg_addr, MemOp mop)
1184 {
1185     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1186 
1187     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1188     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1189 }
1190 
1191 /* Check that FP/Neon access is enabled. If it is, return
1192  * true. If not, emit code to generate an appropriate exception,
1193  * and return false; the caller should not emit any code for
1194  * the instruction. Note that this check must happen after all
1195  * unallocated-encoding checks (otherwise the syndrome information
1196  * for the resulting exception will be incorrect).
1197  */
1198 static bool fp_access_check_only(DisasContext *s)
1199 {
1200     if (s->fp_excp_el) {
1201         assert(!s->fp_access_checked);
1202         s->fp_access_checked = true;
1203 
1204         gen_exception_insn_el(s, 0, EXCP_UDEF,
1205                               syn_fp_access_trap(1, 0xe, false, 0),
1206                               s->fp_excp_el);
1207         return false;
1208     }
1209     s->fp_access_checked = true;
1210     return true;
1211 }
1212 
1213 static bool fp_access_check(DisasContext *s)
1214 {
1215     if (!fp_access_check_only(s)) {
1216         return false;
1217     }
1218     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1219         gen_exception_insn(s, 0, EXCP_UDEF,
1220                            syn_smetrap(SME_ET_Streaming, false));
1221         return false;
1222     }
1223     return true;
1224 }
1225 
1226 /*
1227  * Check that SVE access is enabled.  If it is, return true.
1228  * If not, emit code to generate an appropriate exception and return false.
1229  * This function corresponds to CheckSVEEnabled().
1230  */
1231 bool sve_access_check(DisasContext *s)
1232 {
1233     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1234         assert(dc_isar_feature(aa64_sme, s));
1235         if (!sme_sm_enabled_check(s)) {
1236             goto fail_exit;
1237         }
1238     } else if (s->sve_excp_el) {
1239         gen_exception_insn_el(s, 0, EXCP_UDEF,
1240                               syn_sve_access_trap(), s->sve_excp_el);
1241         goto fail_exit;
1242     }
1243     s->sve_access_checked = true;
1244     return fp_access_check(s);
1245 
1246  fail_exit:
1247     /* Assert that we only raise one exception per instruction. */
1248     assert(!s->sve_access_checked);
1249     s->sve_access_checked = true;
1250     return false;
1251 }
1252 
1253 /*
1254  * Check that SME access is enabled, raise an exception if not.
1255  * Note that this function corresponds to CheckSMEAccess and is
1256  * only used directly for cpregs.
1257  */
1258 static bool sme_access_check(DisasContext *s)
1259 {
1260     if (s->sme_excp_el) {
1261         gen_exception_insn_el(s, 0, EXCP_UDEF,
1262                               syn_smetrap(SME_ET_AccessTrap, false),
1263                               s->sme_excp_el);
1264         return false;
1265     }
1266     return true;
1267 }
1268 
1269 /* This function corresponds to CheckSMEEnabled. */
1270 bool sme_enabled_check(DisasContext *s)
1271 {
1272     /*
1273      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1274      * to be zero when fp_excp_el has priority.  This is because we need
1275      * sme_excp_el by itself for cpregs access checks.
1276      */
1277     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1278         s->fp_access_checked = true;
1279         return sme_access_check(s);
1280     }
1281     return fp_access_check_only(s);
1282 }
1283 
1284 /* Common subroutine for CheckSMEAnd*Enabled. */
1285 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1286 {
1287     if (!sme_enabled_check(s)) {
1288         return false;
1289     }
1290     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1291         gen_exception_insn(s, 0, EXCP_UDEF,
1292                            syn_smetrap(SME_ET_NotStreaming, false));
1293         return false;
1294     }
1295     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1296         gen_exception_insn(s, 0, EXCP_UDEF,
1297                            syn_smetrap(SME_ET_InactiveZA, false));
1298         return false;
1299     }
1300     return true;
1301 }
1302 
1303 /*
1304  * Expanders for AdvSIMD translation functions.
1305  */
1306 
1307 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1308                             gen_helper_gvec_2 *fn)
1309 {
1310     if (!a->q && a->esz == MO_64) {
1311         return false;
1312     }
1313     if (fp_access_check(s)) {
1314         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1315     }
1316     return true;
1317 }
1318 
1319 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1320                             gen_helper_gvec_3 *fn)
1321 {
1322     if (!a->q && a->esz == MO_64) {
1323         return false;
1324     }
1325     if (fp_access_check(s)) {
1326         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1327     }
1328     return true;
1329 }
1330 
1331 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1332 {
1333     if (!a->q && a->esz == MO_64) {
1334         return false;
1335     }
1336     if (fp_access_check(s)) {
1337         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1338     }
1339     return true;
1340 }
1341 
1342 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1343 {
1344     if (a->esz == MO_64) {
1345         return false;
1346     }
1347     if (fp_access_check(s)) {
1348         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1349     }
1350     return true;
1351 }
1352 
1353 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1354 {
1355     if (a->esz == MO_8) {
1356         return false;
1357     }
1358     return do_gvec_fn3_no64(s, a, fn);
1359 }
1360 
1361 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1362 {
1363     if (!a->q && a->esz == MO_64) {
1364         return false;
1365     }
1366     if (fp_access_check(s)) {
1367         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1368     }
1369     return true;
1370 }
1371 
1372 /*
1373  * This utility function is for doing register extension with an
1374  * optional shift. You will likely want to pass a temporary for the
1375  * destination register. See DecodeRegExtend() in the ARM ARM.
1376  */
1377 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1378                               int option, unsigned int shift)
1379 {
1380     int extsize = extract32(option, 0, 2);
1381     bool is_signed = extract32(option, 2, 1);
1382 
1383     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1384     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1385 }
1386 
1387 static inline void gen_check_sp_alignment(DisasContext *s)
1388 {
1389     /* The AArch64 architecture mandates that (if enabled via PSTATE
1390      * or SCTLR bits) there is a check that SP is 16-aligned on every
1391      * SP-relative load or store (with an exception generated if it is not).
1392      * In line with general QEMU practice regarding misaligned accesses,
1393      * we omit these checks for the sake of guest program performance.
1394      * This function is provided as a hook so we can more easily add these
1395      * checks in future (possibly as a "favour catching guest program bugs
1396      * over speed" user selectable option).
1397      */
1398 }
1399 
1400 /*
1401  * This provides a simple table based table lookup decoder. It is
1402  * intended to be used when the relevant bits for decode are too
1403  * awkwardly placed and switch/if based logic would be confusing and
1404  * deeply nested. Since it's a linear search through the table, tables
1405  * should be kept small.
1406  *
1407  * It returns the first handler where insn & mask == pattern, or
1408  * NULL if there is no match.
1409  * The table is terminated by an empty mask (i.e. 0)
1410  */
1411 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1412                                                uint32_t insn)
1413 {
1414     const AArch64DecodeTable *tptr = table;
1415 
1416     while (tptr->mask) {
1417         if ((insn & tptr->mask) == tptr->pattern) {
1418             return tptr->disas_fn;
1419         }
1420         tptr++;
1421     }
1422     return NULL;
1423 }
1424 
1425 /*
1426  * The instruction disassembly implemented here matches
1427  * the instruction encoding classifications in chapter C4
1428  * of the ARM Architecture Reference Manual (DDI0487B_a);
1429  * classification names and decode diagrams here should generally
1430  * match up with those in the manual.
1431  */
1432 
1433 static bool trans_B(DisasContext *s, arg_i *a)
1434 {
1435     reset_btype(s);
1436     gen_goto_tb(s, 0, a->imm);
1437     return true;
1438 }
1439 
1440 static bool trans_BL(DisasContext *s, arg_i *a)
1441 {
1442     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1443     reset_btype(s);
1444     gen_goto_tb(s, 0, a->imm);
1445     return true;
1446 }
1447 
1448 
1449 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1450 {
1451     DisasLabel match;
1452     TCGv_i64 tcg_cmp;
1453 
1454     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1455     reset_btype(s);
1456 
1457     match = gen_disas_label(s);
1458     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1459                         tcg_cmp, 0, match.label);
1460     gen_goto_tb(s, 0, 4);
1461     set_disas_label(s, match);
1462     gen_goto_tb(s, 1, a->imm);
1463     return true;
1464 }
1465 
1466 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1467 {
1468     DisasLabel match;
1469     TCGv_i64 tcg_cmp;
1470 
1471     tcg_cmp = tcg_temp_new_i64();
1472     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1473 
1474     reset_btype(s);
1475 
1476     match = gen_disas_label(s);
1477     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1478                         tcg_cmp, 0, match.label);
1479     gen_goto_tb(s, 0, 4);
1480     set_disas_label(s, match);
1481     gen_goto_tb(s, 1, a->imm);
1482     return true;
1483 }
1484 
1485 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1486 {
1487     /* BC.cond is only present with FEAT_HBC */
1488     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1489         return false;
1490     }
1491     reset_btype(s);
1492     if (a->cond < 0x0e) {
1493         /* genuinely conditional branches */
1494         DisasLabel match = gen_disas_label(s);
1495         arm_gen_test_cc(a->cond, match.label);
1496         gen_goto_tb(s, 0, 4);
1497         set_disas_label(s, match);
1498         gen_goto_tb(s, 1, a->imm);
1499     } else {
1500         /* 0xe and 0xf are both "always" conditions */
1501         gen_goto_tb(s, 0, a->imm);
1502     }
1503     return true;
1504 }
1505 
1506 static void set_btype_for_br(DisasContext *s, int rn)
1507 {
1508     if (dc_isar_feature(aa64_bti, s)) {
1509         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1510         if (rn == 16 || rn == 17) {
1511             set_btype(s, 1);
1512         } else {
1513             TCGv_i64 pc = tcg_temp_new_i64();
1514             gen_pc_plus_diff(s, pc, 0);
1515             gen_helper_guarded_page_br(tcg_env, pc);
1516             s->btype = -1;
1517         }
1518     }
1519 }
1520 
1521 static void set_btype_for_blr(DisasContext *s)
1522 {
1523     if (dc_isar_feature(aa64_bti, s)) {
1524         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1525         set_btype(s, 2);
1526     }
1527 }
1528 
1529 static bool trans_BR(DisasContext *s, arg_r *a)
1530 {
1531     set_btype_for_br(s, a->rn);
1532     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1533     s->base.is_jmp = DISAS_JUMP;
1534     return true;
1535 }
1536 
1537 static bool trans_BLR(DisasContext *s, arg_r *a)
1538 {
1539     TCGv_i64 dst = cpu_reg(s, a->rn);
1540     TCGv_i64 lr = cpu_reg(s, 30);
1541     if (dst == lr) {
1542         TCGv_i64 tmp = tcg_temp_new_i64();
1543         tcg_gen_mov_i64(tmp, dst);
1544         dst = tmp;
1545     }
1546     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1547     gen_a64_set_pc(s, dst);
1548     set_btype_for_blr(s);
1549     s->base.is_jmp = DISAS_JUMP;
1550     return true;
1551 }
1552 
1553 static bool trans_RET(DisasContext *s, arg_r *a)
1554 {
1555     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1556     s->base.is_jmp = DISAS_JUMP;
1557     return true;
1558 }
1559 
1560 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1561                                    TCGv_i64 modifier, bool use_key_a)
1562 {
1563     TCGv_i64 truedst;
1564     /*
1565      * Return the branch target for a BRAA/RETA/etc, which is either
1566      * just the destination dst, or that value with the pauth check
1567      * done and the code removed from the high bits.
1568      */
1569     if (!s->pauth_active) {
1570         return dst;
1571     }
1572 
1573     truedst = tcg_temp_new_i64();
1574     if (use_key_a) {
1575         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1576     } else {
1577         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1578     }
1579     return truedst;
1580 }
1581 
1582 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1583 {
1584     TCGv_i64 dst;
1585 
1586     if (!dc_isar_feature(aa64_pauth, s)) {
1587         return false;
1588     }
1589 
1590     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1591     set_btype_for_br(s, a->rn);
1592     gen_a64_set_pc(s, dst);
1593     s->base.is_jmp = DISAS_JUMP;
1594     return true;
1595 }
1596 
1597 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1598 {
1599     TCGv_i64 dst, lr;
1600 
1601     if (!dc_isar_feature(aa64_pauth, s)) {
1602         return false;
1603     }
1604 
1605     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1606     lr = cpu_reg(s, 30);
1607     if (dst == lr) {
1608         TCGv_i64 tmp = tcg_temp_new_i64();
1609         tcg_gen_mov_i64(tmp, dst);
1610         dst = tmp;
1611     }
1612     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1613     gen_a64_set_pc(s, dst);
1614     set_btype_for_blr(s);
1615     s->base.is_jmp = DISAS_JUMP;
1616     return true;
1617 }
1618 
1619 static bool trans_RETA(DisasContext *s, arg_reta *a)
1620 {
1621     TCGv_i64 dst;
1622 
1623     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1624     gen_a64_set_pc(s, dst);
1625     s->base.is_jmp = DISAS_JUMP;
1626     return true;
1627 }
1628 
1629 static bool trans_BRA(DisasContext *s, arg_bra *a)
1630 {
1631     TCGv_i64 dst;
1632 
1633     if (!dc_isar_feature(aa64_pauth, s)) {
1634         return false;
1635     }
1636     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1637     gen_a64_set_pc(s, dst);
1638     set_btype_for_br(s, a->rn);
1639     s->base.is_jmp = DISAS_JUMP;
1640     return true;
1641 }
1642 
1643 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1644 {
1645     TCGv_i64 dst, lr;
1646 
1647     if (!dc_isar_feature(aa64_pauth, s)) {
1648         return false;
1649     }
1650     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1651     lr = cpu_reg(s, 30);
1652     if (dst == lr) {
1653         TCGv_i64 tmp = tcg_temp_new_i64();
1654         tcg_gen_mov_i64(tmp, dst);
1655         dst = tmp;
1656     }
1657     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1658     gen_a64_set_pc(s, dst);
1659     set_btype_for_blr(s);
1660     s->base.is_jmp = DISAS_JUMP;
1661     return true;
1662 }
1663 
1664 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1665 {
1666     TCGv_i64 dst;
1667 
1668     if (s->current_el == 0) {
1669         return false;
1670     }
1671     if (s->trap_eret) {
1672         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1673         return true;
1674     }
1675     dst = tcg_temp_new_i64();
1676     tcg_gen_ld_i64(dst, tcg_env,
1677                    offsetof(CPUARMState, elr_el[s->current_el]));
1678 
1679     translator_io_start(&s->base);
1680 
1681     gen_helper_exception_return(tcg_env, dst);
1682     /* Must exit loop to check un-masked IRQs */
1683     s->base.is_jmp = DISAS_EXIT;
1684     return true;
1685 }
1686 
1687 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1688 {
1689     TCGv_i64 dst;
1690 
1691     if (!dc_isar_feature(aa64_pauth, s)) {
1692         return false;
1693     }
1694     if (s->current_el == 0) {
1695         return false;
1696     }
1697     /* The FGT trap takes precedence over an auth trap. */
1698     if (s->trap_eret) {
1699         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1700         return true;
1701     }
1702     dst = tcg_temp_new_i64();
1703     tcg_gen_ld_i64(dst, tcg_env,
1704                    offsetof(CPUARMState, elr_el[s->current_el]));
1705 
1706     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1707 
1708     translator_io_start(&s->base);
1709 
1710     gen_helper_exception_return(tcg_env, dst);
1711     /* Must exit loop to check un-masked IRQs */
1712     s->base.is_jmp = DISAS_EXIT;
1713     return true;
1714 }
1715 
1716 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1717 {
1718     return true;
1719 }
1720 
1721 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1722 {
1723     /*
1724      * When running in MTTCG we don't generate jumps to the yield and
1725      * WFE helpers as it won't affect the scheduling of other vCPUs.
1726      * If we wanted to more completely model WFE/SEV so we don't busy
1727      * spin unnecessarily we would need to do something more involved.
1728      */
1729     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1730         s->base.is_jmp = DISAS_YIELD;
1731     }
1732     return true;
1733 }
1734 
1735 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1736 {
1737     s->base.is_jmp = DISAS_WFI;
1738     return true;
1739 }
1740 
1741 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1742 {
1743     /*
1744      * When running in MTTCG we don't generate jumps to the yield and
1745      * WFE helpers as it won't affect the scheduling of other vCPUs.
1746      * If we wanted to more completely model WFE/SEV so we don't busy
1747      * spin unnecessarily we would need to do something more involved.
1748      */
1749     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1750         s->base.is_jmp = DISAS_WFE;
1751     }
1752     return true;
1753 }
1754 
1755 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1756 {
1757     if (!dc_isar_feature(aa64_wfxt, s)) {
1758         return false;
1759     }
1760 
1761     /*
1762      * Because we need to pass the register value to the helper,
1763      * it's easier to emit the code now, unlike trans_WFI which
1764      * defers it to aarch64_tr_tb_stop(). That means we need to
1765      * check ss_active so that single-stepping a WFIT doesn't halt.
1766      */
1767     if (s->ss_active) {
1768         /* Act like a NOP under architectural singlestep */
1769         return true;
1770     }
1771 
1772     gen_a64_update_pc(s, 4);
1773     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1774     /* Go back to the main loop to check for interrupts */
1775     s->base.is_jmp = DISAS_EXIT;
1776     return true;
1777 }
1778 
1779 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1780 {
1781     if (!dc_isar_feature(aa64_wfxt, s)) {
1782         return false;
1783     }
1784 
1785     /*
1786      * We rely here on our WFE implementation being a NOP, so we
1787      * don't need to do anything different to handle the WFET timeout
1788      * from what trans_WFE does.
1789      */
1790     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1791         s->base.is_jmp = DISAS_WFE;
1792     }
1793     return true;
1794 }
1795 
1796 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1797 {
1798     if (s->pauth_active) {
1799         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1800     }
1801     return true;
1802 }
1803 
1804 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1805 {
1806     if (s->pauth_active) {
1807         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1808     }
1809     return true;
1810 }
1811 
1812 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1813 {
1814     if (s->pauth_active) {
1815         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1816     }
1817     return true;
1818 }
1819 
1820 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1821 {
1822     if (s->pauth_active) {
1823         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1824     }
1825     return true;
1826 }
1827 
1828 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1829 {
1830     if (s->pauth_active) {
1831         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1832     }
1833     return true;
1834 }
1835 
1836 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1837 {
1838     /* Without RAS, we must implement this as NOP. */
1839     if (dc_isar_feature(aa64_ras, s)) {
1840         /*
1841          * QEMU does not have a source of physical SErrors,
1842          * so we are only concerned with virtual SErrors.
1843          * The pseudocode in the ARM for this case is
1844          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1845          *      AArch64.vESBOperation();
1846          * Most of the condition can be evaluated at translation time.
1847          * Test for EL2 present, and defer test for SEL2 to runtime.
1848          */
1849         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1850             gen_helper_vesb(tcg_env);
1851         }
1852     }
1853     return true;
1854 }
1855 
1856 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1857 {
1858     if (s->pauth_active) {
1859         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1860     }
1861     return true;
1862 }
1863 
1864 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1865 {
1866     if (s->pauth_active) {
1867         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1868     }
1869     return true;
1870 }
1871 
1872 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1873 {
1874     if (s->pauth_active) {
1875         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1876     }
1877     return true;
1878 }
1879 
1880 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1881 {
1882     if (s->pauth_active) {
1883         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1884     }
1885     return true;
1886 }
1887 
1888 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1889 {
1890     if (s->pauth_active) {
1891         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1892     }
1893     return true;
1894 }
1895 
1896 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1897 {
1898     if (s->pauth_active) {
1899         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1900     }
1901     return true;
1902 }
1903 
1904 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1905 {
1906     if (s->pauth_active) {
1907         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1908     }
1909     return true;
1910 }
1911 
1912 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1913 {
1914     if (s->pauth_active) {
1915         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1916     }
1917     return true;
1918 }
1919 
1920 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1921 {
1922     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1923     return true;
1924 }
1925 
1926 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1927 {
1928     /* We handle DSB and DMB the same way */
1929     TCGBar bar;
1930 
1931     switch (a->types) {
1932     case 1: /* MBReqTypes_Reads */
1933         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1934         break;
1935     case 2: /* MBReqTypes_Writes */
1936         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1937         break;
1938     default: /* MBReqTypes_All */
1939         bar = TCG_BAR_SC | TCG_MO_ALL;
1940         break;
1941     }
1942     tcg_gen_mb(bar);
1943     return true;
1944 }
1945 
1946 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1947 {
1948     /*
1949      * We need to break the TB after this insn to execute
1950      * self-modifying code correctly and also to take
1951      * any pending interrupts immediately.
1952      */
1953     reset_btype(s);
1954     gen_goto_tb(s, 0, 4);
1955     return true;
1956 }
1957 
1958 static bool trans_SB(DisasContext *s, arg_SB *a)
1959 {
1960     if (!dc_isar_feature(aa64_sb, s)) {
1961         return false;
1962     }
1963     /*
1964      * TODO: There is no speculation barrier opcode for TCG;
1965      * MB and end the TB instead.
1966      */
1967     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1968     gen_goto_tb(s, 0, 4);
1969     return true;
1970 }
1971 
1972 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1973 {
1974     if (!dc_isar_feature(aa64_condm_4, s)) {
1975         return false;
1976     }
1977     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1978     return true;
1979 }
1980 
1981 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1982 {
1983     TCGv_i32 z;
1984 
1985     if (!dc_isar_feature(aa64_condm_5, s)) {
1986         return false;
1987     }
1988 
1989     z = tcg_temp_new_i32();
1990 
1991     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1992 
1993     /*
1994      * (!C & !Z) << 31
1995      * (!(C | Z)) << 31
1996      * ~((C | Z) << 31)
1997      * ~-(C | Z)
1998      * (C | Z) - 1
1999      */
2000     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2001     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2002 
2003     /* !(Z & C) */
2004     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2005     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2006 
2007     /* (!C & Z) << 31 -> -(Z & ~C) */
2008     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2009     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2010 
2011     /* C | Z */
2012     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2013 
2014     return true;
2015 }
2016 
2017 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2018 {
2019     if (!dc_isar_feature(aa64_condm_5, s)) {
2020         return false;
2021     }
2022 
2023     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2024     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2025 
2026     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2027     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2028 
2029     tcg_gen_movi_i32(cpu_NF, 0);
2030     tcg_gen_movi_i32(cpu_VF, 0);
2031 
2032     return true;
2033 }
2034 
2035 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2036 {
2037     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2038         return false;
2039     }
2040     if (a->imm & 1) {
2041         set_pstate_bits(PSTATE_UAO);
2042     } else {
2043         clear_pstate_bits(PSTATE_UAO);
2044     }
2045     gen_rebuild_hflags(s);
2046     s->base.is_jmp = DISAS_TOO_MANY;
2047     return true;
2048 }
2049 
2050 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2051 {
2052     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2053         return false;
2054     }
2055     if (a->imm & 1) {
2056         set_pstate_bits(PSTATE_PAN);
2057     } else {
2058         clear_pstate_bits(PSTATE_PAN);
2059     }
2060     gen_rebuild_hflags(s);
2061     s->base.is_jmp = DISAS_TOO_MANY;
2062     return true;
2063 }
2064 
2065 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2066 {
2067     if (s->current_el == 0) {
2068         return false;
2069     }
2070     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2071     s->base.is_jmp = DISAS_TOO_MANY;
2072     return true;
2073 }
2074 
2075 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2076 {
2077     if (!dc_isar_feature(aa64_ssbs, s)) {
2078         return false;
2079     }
2080     if (a->imm & 1) {
2081         set_pstate_bits(PSTATE_SSBS);
2082     } else {
2083         clear_pstate_bits(PSTATE_SSBS);
2084     }
2085     /* Don't need to rebuild hflags since SSBS is a nop */
2086     s->base.is_jmp = DISAS_TOO_MANY;
2087     return true;
2088 }
2089 
2090 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2091 {
2092     if (!dc_isar_feature(aa64_dit, s)) {
2093         return false;
2094     }
2095     if (a->imm & 1) {
2096         set_pstate_bits(PSTATE_DIT);
2097     } else {
2098         clear_pstate_bits(PSTATE_DIT);
2099     }
2100     /* There's no need to rebuild hflags because DIT is a nop */
2101     s->base.is_jmp = DISAS_TOO_MANY;
2102     return true;
2103 }
2104 
2105 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2106 {
2107     if (dc_isar_feature(aa64_mte, s)) {
2108         /* Full MTE is enabled -- set the TCO bit as directed. */
2109         if (a->imm & 1) {
2110             set_pstate_bits(PSTATE_TCO);
2111         } else {
2112             clear_pstate_bits(PSTATE_TCO);
2113         }
2114         gen_rebuild_hflags(s);
2115         /* Many factors, including TCO, go into MTE_ACTIVE. */
2116         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2117         return true;
2118     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2119         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2120         return true;
2121     } else {
2122         /* Insn not present */
2123         return false;
2124     }
2125 }
2126 
2127 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2128 {
2129     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2130     s->base.is_jmp = DISAS_TOO_MANY;
2131     return true;
2132 }
2133 
2134 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2135 {
2136     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2137     /* Exit the cpu loop to re-evaluate pending IRQs. */
2138     s->base.is_jmp = DISAS_UPDATE_EXIT;
2139     return true;
2140 }
2141 
2142 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2143 {
2144     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2145         return false;
2146     }
2147 
2148     if (a->imm == 0) {
2149         clear_pstate_bits(PSTATE_ALLINT);
2150     } else if (s->current_el > 1) {
2151         set_pstate_bits(PSTATE_ALLINT);
2152     } else {
2153         gen_helper_msr_set_allint_el1(tcg_env);
2154     }
2155 
2156     /* Exit the cpu loop to re-evaluate pending IRQs. */
2157     s->base.is_jmp = DISAS_UPDATE_EXIT;
2158     return true;
2159 }
2160 
2161 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2162 {
2163     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2164         return false;
2165     }
2166     if (sme_access_check(s)) {
2167         int old = s->pstate_sm | (s->pstate_za << 1);
2168         int new = a->imm * 3;
2169 
2170         if ((old ^ new) & a->mask) {
2171             /* At least one bit changes. */
2172             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2173                                 tcg_constant_i32(a->mask));
2174             s->base.is_jmp = DISAS_TOO_MANY;
2175         }
2176     }
2177     return true;
2178 }
2179 
2180 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2181 {
2182     TCGv_i32 tmp = tcg_temp_new_i32();
2183     TCGv_i32 nzcv = tcg_temp_new_i32();
2184 
2185     /* build bit 31, N */
2186     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2187     /* build bit 30, Z */
2188     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2189     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2190     /* build bit 29, C */
2191     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2192     /* build bit 28, V */
2193     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2194     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2195     /* generate result */
2196     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2197 }
2198 
2199 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2200 {
2201     TCGv_i32 nzcv = tcg_temp_new_i32();
2202 
2203     /* take NZCV from R[t] */
2204     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2205 
2206     /* bit 31, N */
2207     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2208     /* bit 30, Z */
2209     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2210     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2211     /* bit 29, C */
2212     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2213     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2214     /* bit 28, V */
2215     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2216     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2217 }
2218 
2219 static void gen_sysreg_undef(DisasContext *s, bool isread,
2220                              uint8_t op0, uint8_t op1, uint8_t op2,
2221                              uint8_t crn, uint8_t crm, uint8_t rt)
2222 {
2223     /*
2224      * Generate code to emit an UNDEF with correct syndrome
2225      * information for a failed system register access.
2226      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2227      * but if FEAT_IDST is implemented then read accesses to registers
2228      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2229      * syndrome.
2230      */
2231     uint32_t syndrome;
2232 
2233     if (isread && dc_isar_feature(aa64_ids, s) &&
2234         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2235         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2236     } else {
2237         syndrome = syn_uncategorized();
2238     }
2239     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2240 }
2241 
2242 /* MRS - move from system register
2243  * MSR (register) - move to system register
2244  * SYS
2245  * SYSL
2246  * These are all essentially the same insn in 'read' and 'write'
2247  * versions, with varying op0 fields.
2248  */
2249 static void handle_sys(DisasContext *s, bool isread,
2250                        unsigned int op0, unsigned int op1, unsigned int op2,
2251                        unsigned int crn, unsigned int crm, unsigned int rt)
2252 {
2253     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2254                                       crn, crm, op0, op1, op2);
2255     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2256     bool need_exit_tb = false;
2257     bool nv_trap_to_el2 = false;
2258     bool nv_redirect_reg = false;
2259     bool skip_fp_access_checks = false;
2260     bool nv2_mem_redirect = false;
2261     TCGv_ptr tcg_ri = NULL;
2262     TCGv_i64 tcg_rt;
2263     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2264 
2265     if (crn == 11 || crn == 15) {
2266         /*
2267          * Check for TIDCP trap, which must take precedence over
2268          * the UNDEF for "no such register" etc.
2269          */
2270         switch (s->current_el) {
2271         case 0:
2272             if (dc_isar_feature(aa64_tidcp1, s)) {
2273                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2274             }
2275             break;
2276         case 1:
2277             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2278             break;
2279         }
2280     }
2281 
2282     if (!ri) {
2283         /* Unknown register; this might be a guest error or a QEMU
2284          * unimplemented feature.
2285          */
2286         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2287                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2288                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2289         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2290         return;
2291     }
2292 
2293     if (s->nv2 && ri->nv2_redirect_offset) {
2294         /*
2295          * Some registers always redirect to memory; some only do so if
2296          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2297          * pairs which share an offset; see the table in R_CSRPQ).
2298          */
2299         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2300             nv2_mem_redirect = s->nv1;
2301         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2302             nv2_mem_redirect = !s->nv1;
2303         } else {
2304             nv2_mem_redirect = true;
2305         }
2306     }
2307 
2308     /* Check access permissions */
2309     if (!cp_access_ok(s->current_el, ri, isread)) {
2310         /*
2311          * FEAT_NV/NV2 handling does not do the usual FP access checks
2312          * for registers only accessible at EL2 (though it *does* do them
2313          * for registers accessible at EL1).
2314          */
2315         skip_fp_access_checks = true;
2316         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2317             /*
2318              * This is one of the few EL2 registers which should redirect
2319              * to the equivalent EL1 register. We do that after running
2320              * the EL2 register's accessfn.
2321              */
2322             nv_redirect_reg = true;
2323             assert(!nv2_mem_redirect);
2324         } else if (nv2_mem_redirect) {
2325             /*
2326              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2327              * UNDEF to EL1.
2328              */
2329         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2330             /*
2331              * This register / instruction exists and is an EL2 register, so
2332              * we must trap to EL2 if accessed in nested virtualization EL1
2333              * instead of UNDEFing. We'll do that after the usual access checks.
2334              * (This makes a difference only for a couple of registers like
2335              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2336              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2337              * an accessfn which does nothing when called from EL1, because
2338              * the trap-to-EL3 controls which would apply to that register
2339              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2340              */
2341             nv_trap_to_el2 = true;
2342         } else {
2343             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2344             return;
2345         }
2346     }
2347 
2348     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2349         /* Emit code to perform further access permissions checks at
2350          * runtime; this may result in an exception.
2351          */
2352         gen_a64_update_pc(s, 0);
2353         tcg_ri = tcg_temp_new_ptr();
2354         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2355                                        tcg_constant_i32(key),
2356                                        tcg_constant_i32(syndrome),
2357                                        tcg_constant_i32(isread));
2358     } else if (ri->type & ARM_CP_RAISES_EXC) {
2359         /*
2360          * The readfn or writefn might raise an exception;
2361          * synchronize the CPU state in case it does.
2362          */
2363         gen_a64_update_pc(s, 0);
2364     }
2365 
2366     if (!skip_fp_access_checks) {
2367         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2368             return;
2369         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2370             return;
2371         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2372             return;
2373         }
2374     }
2375 
2376     if (nv_trap_to_el2) {
2377         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2378         return;
2379     }
2380 
2381     if (nv_redirect_reg) {
2382         /*
2383          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2384          * Conveniently in all cases the encoding of the EL1 register is
2385          * identical to the EL2 register except that opc1 is 0.
2386          * Get the reginfo for the EL1 register to use for the actual access.
2387          * We don't use the EL1 register's access function, and
2388          * fine-grained-traps on EL1 also do not apply here.
2389          */
2390         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2391                                  crn, crm, op0, 0, op2);
2392         ri = get_arm_cp_reginfo(s->cp_regs, key);
2393         assert(ri);
2394         assert(cp_access_ok(s->current_el, ri, isread));
2395         /*
2396          * We might not have done an update_pc earlier, so check we don't
2397          * need it. We could support this in future if necessary.
2398          */
2399         assert(!(ri->type & ARM_CP_RAISES_EXC));
2400     }
2401 
2402     if (nv2_mem_redirect) {
2403         /*
2404          * This system register is being redirected into an EL2 memory access.
2405          * This means it is not an IO operation, doesn't change hflags,
2406          * and need not end the TB, because it has no side effects.
2407          *
2408          * The access is 64-bit single copy atomic, guaranteed aligned because
2409          * of the definition of VCNR_EL2. Its endianness depends on
2410          * SCTLR_EL2.EE, not on the data endianness of EL1.
2411          * It is done under either the EL2 translation regime or the EL2&0
2412          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2413          * PSTATE.PAN is 0.
2414          */
2415         TCGv_i64 ptr = tcg_temp_new_i64();
2416         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2417         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2418         int memidx = arm_to_core_mmu_idx(armmemidx);
2419         uint32_t syn;
2420 
2421         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2422 
2423         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2424         tcg_gen_addi_i64(ptr, ptr,
2425                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2426         tcg_rt = cpu_reg(s, rt);
2427 
2428         syn = syn_data_abort_vncr(0, !isread, 0);
2429         disas_set_insn_syndrome(s, syn);
2430         if (isread) {
2431             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2432         } else {
2433             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2434         }
2435         return;
2436     }
2437 
2438     /* Handle special cases first */
2439     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2440     case 0:
2441         break;
2442     case ARM_CP_NOP:
2443         return;
2444     case ARM_CP_NZCV:
2445         tcg_rt = cpu_reg(s, rt);
2446         if (isread) {
2447             gen_get_nzcv(tcg_rt);
2448         } else {
2449             gen_set_nzcv(tcg_rt);
2450         }
2451         return;
2452     case ARM_CP_CURRENTEL:
2453     {
2454         /*
2455          * Reads as current EL value from pstate, which is
2456          * guaranteed to be constant by the tb flags.
2457          * For nested virt we should report EL2.
2458          */
2459         int el = s->nv ? 2 : s->current_el;
2460         tcg_rt = cpu_reg(s, rt);
2461         tcg_gen_movi_i64(tcg_rt, el << 2);
2462         return;
2463     }
2464     case ARM_CP_DC_ZVA:
2465         /* Writes clear the aligned block of memory which rt points into. */
2466         if (s->mte_active[0]) {
2467             int desc = 0;
2468 
2469             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2470             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2471             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2472 
2473             tcg_rt = tcg_temp_new_i64();
2474             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2475                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2476         } else {
2477             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2478         }
2479         gen_helper_dc_zva(tcg_env, tcg_rt);
2480         return;
2481     case ARM_CP_DC_GVA:
2482         {
2483             TCGv_i64 clean_addr, tag;
2484 
2485             /*
2486              * DC_GVA, like DC_ZVA, requires that we supply the original
2487              * pointer for an invalid page.  Probe that address first.
2488              */
2489             tcg_rt = cpu_reg(s, rt);
2490             clean_addr = clean_data_tbi(s, tcg_rt);
2491             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2492 
2493             if (s->ata[0]) {
2494                 /* Extract the tag from the register to match STZGM.  */
2495                 tag = tcg_temp_new_i64();
2496                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2497                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2498             }
2499         }
2500         return;
2501     case ARM_CP_DC_GZVA:
2502         {
2503             TCGv_i64 clean_addr, tag;
2504 
2505             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2506             tcg_rt = cpu_reg(s, rt);
2507             clean_addr = clean_data_tbi(s, tcg_rt);
2508             gen_helper_dc_zva(tcg_env, clean_addr);
2509 
2510             if (s->ata[0]) {
2511                 /* Extract the tag from the register to match STZGM.  */
2512                 tag = tcg_temp_new_i64();
2513                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2514                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2515             }
2516         }
2517         return;
2518     default:
2519         g_assert_not_reached();
2520     }
2521 
2522     if (ri->type & ARM_CP_IO) {
2523         /* I/O operations must end the TB here (whether read or write) */
2524         need_exit_tb = translator_io_start(&s->base);
2525     }
2526 
2527     tcg_rt = cpu_reg(s, rt);
2528 
2529     if (isread) {
2530         if (ri->type & ARM_CP_CONST) {
2531             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2532         } else if (ri->readfn) {
2533             if (!tcg_ri) {
2534                 tcg_ri = gen_lookup_cp_reg(key);
2535             }
2536             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2537         } else {
2538             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2539         }
2540     } else {
2541         if (ri->type & ARM_CP_CONST) {
2542             /* If not forbidden by access permissions, treat as WI */
2543             return;
2544         } else if (ri->writefn) {
2545             if (!tcg_ri) {
2546                 tcg_ri = gen_lookup_cp_reg(key);
2547             }
2548             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2549         } else {
2550             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2551         }
2552     }
2553 
2554     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2555         /*
2556          * A write to any coprocessor register that ends a TB
2557          * must rebuild the hflags for the next TB.
2558          */
2559         gen_rebuild_hflags(s);
2560         /*
2561          * We default to ending the TB on a coprocessor register write,
2562          * but allow this to be suppressed by the register definition
2563          * (usually only necessary to work around guest bugs).
2564          */
2565         need_exit_tb = true;
2566     }
2567     if (need_exit_tb) {
2568         s->base.is_jmp = DISAS_UPDATE_EXIT;
2569     }
2570 }
2571 
2572 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2573 {
2574     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2575     return true;
2576 }
2577 
2578 static bool trans_SVC(DisasContext *s, arg_i *a)
2579 {
2580     /*
2581      * For SVC, HVC and SMC we advance the single-step state
2582      * machine before taking the exception. This is architecturally
2583      * mandated, to ensure that single-stepping a system call
2584      * instruction works properly.
2585      */
2586     uint32_t syndrome = syn_aa64_svc(a->imm);
2587     if (s->fgt_svc) {
2588         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2589         return true;
2590     }
2591     gen_ss_advance(s);
2592     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2593     return true;
2594 }
2595 
2596 static bool trans_HVC(DisasContext *s, arg_i *a)
2597 {
2598     int target_el = s->current_el == 3 ? 3 : 2;
2599 
2600     if (s->current_el == 0) {
2601         unallocated_encoding(s);
2602         return true;
2603     }
2604     /*
2605      * The pre HVC helper handles cases when HVC gets trapped
2606      * as an undefined insn by runtime configuration.
2607      */
2608     gen_a64_update_pc(s, 0);
2609     gen_helper_pre_hvc(tcg_env);
2610     /* Architecture requires ss advance before we do the actual work */
2611     gen_ss_advance(s);
2612     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2613     return true;
2614 }
2615 
2616 static bool trans_SMC(DisasContext *s, arg_i *a)
2617 {
2618     if (s->current_el == 0) {
2619         unallocated_encoding(s);
2620         return true;
2621     }
2622     gen_a64_update_pc(s, 0);
2623     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2624     /* Architecture requires ss advance before we do the actual work */
2625     gen_ss_advance(s);
2626     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2627     return true;
2628 }
2629 
2630 static bool trans_BRK(DisasContext *s, arg_i *a)
2631 {
2632     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2633     return true;
2634 }
2635 
2636 static bool trans_HLT(DisasContext *s, arg_i *a)
2637 {
2638     /*
2639      * HLT. This has two purposes.
2640      * Architecturally, it is an external halting debug instruction.
2641      * Since QEMU doesn't implement external debug, we treat this as
2642      * it is required for halting debug disabled: it will UNDEF.
2643      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2644      */
2645     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2646         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2647     } else {
2648         unallocated_encoding(s);
2649     }
2650     return true;
2651 }
2652 
2653 /*
2654  * Load/Store exclusive instructions are implemented by remembering
2655  * the value/address loaded, and seeing if these are the same
2656  * when the store is performed. This is not actually the architecturally
2657  * mandated semantics, but it works for typical guest code sequences
2658  * and avoids having to monitor regular stores.
2659  *
2660  * The store exclusive uses the atomic cmpxchg primitives to avoid
2661  * races in multi-threaded linux-user and when MTTCG softmmu is
2662  * enabled.
2663  */
2664 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2665                                int size, bool is_pair)
2666 {
2667     int idx = get_mem_index(s);
2668     TCGv_i64 dirty_addr, clean_addr;
2669     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2670 
2671     s->is_ldex = true;
2672     dirty_addr = cpu_reg_sp(s, rn);
2673     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2674 
2675     g_assert(size <= 3);
2676     if (is_pair) {
2677         g_assert(size >= 2);
2678         if (size == 2) {
2679             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2680             if (s->be_data == MO_LE) {
2681                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2682                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2683             } else {
2684                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2685                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2686             }
2687         } else {
2688             TCGv_i128 t16 = tcg_temp_new_i128();
2689 
2690             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2691 
2692             if (s->be_data == MO_LE) {
2693                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2694                                       cpu_exclusive_high, t16);
2695             } else {
2696                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2697                                       cpu_exclusive_val, t16);
2698             }
2699             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2700             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2701         }
2702     } else {
2703         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2704         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2705     }
2706     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2707 }
2708 
2709 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2710                                 int rn, int size, int is_pair)
2711 {
2712     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2713      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2714      *     [addr] = {Rt};
2715      *     if (is_pair) {
2716      *         [addr + datasize] = {Rt2};
2717      *     }
2718      *     {Rd} = 0;
2719      * } else {
2720      *     {Rd} = 1;
2721      * }
2722      * env->exclusive_addr = -1;
2723      */
2724     TCGLabel *fail_label = gen_new_label();
2725     TCGLabel *done_label = gen_new_label();
2726     TCGv_i64 tmp, clean_addr;
2727     MemOp memop;
2728 
2729     /*
2730      * FIXME: We are out of spec here.  We have recorded only the address
2731      * from load_exclusive, not the entire range, and we assume that the
2732      * size of the access on both sides match.  The architecture allows the
2733      * store to be smaller than the load, so long as the stored bytes are
2734      * within the range recorded by the load.
2735      */
2736 
2737     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2738     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2739     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2740 
2741     /*
2742      * The write, and any associated faults, only happen if the virtual
2743      * and physical addresses pass the exclusive monitor check.  These
2744      * faults are exceedingly unlikely, because normally the guest uses
2745      * the exact same address register for the load_exclusive, and we
2746      * would have recognized these faults there.
2747      *
2748      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2749      * unaligned 4-byte write within the range of an aligned 8-byte load.
2750      * With LSE2, the store would need to cross a 16-byte boundary when the
2751      * load did not, which would mean the store is outside the range
2752      * recorded for the monitor, which would have failed a corrected monitor
2753      * check above.  For now, we assume no size change and retain the
2754      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2755      *
2756      * It is possible to trigger an MTE fault, by performing the load with
2757      * a virtual address with a valid tag and performing the store with the
2758      * same virtual address and a different invalid tag.
2759      */
2760     memop = size + is_pair;
2761     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2762         memop |= MO_ALIGN;
2763     }
2764     memop = finalize_memop(s, memop);
2765     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2766 
2767     tmp = tcg_temp_new_i64();
2768     if (is_pair) {
2769         if (size == 2) {
2770             if (s->be_data == MO_LE) {
2771                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2772             } else {
2773                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2774             }
2775             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2776                                        cpu_exclusive_val, tmp,
2777                                        get_mem_index(s), memop);
2778             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2779         } else {
2780             TCGv_i128 t16 = tcg_temp_new_i128();
2781             TCGv_i128 c16 = tcg_temp_new_i128();
2782             TCGv_i64 a, b;
2783 
2784             if (s->be_data == MO_LE) {
2785                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2786                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2787                                         cpu_exclusive_high);
2788             } else {
2789                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2790                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2791                                         cpu_exclusive_val);
2792             }
2793 
2794             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2795                                         get_mem_index(s), memop);
2796 
2797             a = tcg_temp_new_i64();
2798             b = tcg_temp_new_i64();
2799             if (s->be_data == MO_LE) {
2800                 tcg_gen_extr_i128_i64(a, b, t16);
2801             } else {
2802                 tcg_gen_extr_i128_i64(b, a, t16);
2803             }
2804 
2805             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2806             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2807             tcg_gen_or_i64(tmp, a, b);
2808 
2809             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2810         }
2811     } else {
2812         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2813                                    cpu_reg(s, rt), get_mem_index(s), memop);
2814         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2815     }
2816     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2817     tcg_gen_br(done_label);
2818 
2819     gen_set_label(fail_label);
2820     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2821     gen_set_label(done_label);
2822     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2823 }
2824 
2825 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2826                                  int rn, int size)
2827 {
2828     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2829     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2830     int memidx = get_mem_index(s);
2831     TCGv_i64 clean_addr;
2832     MemOp memop;
2833 
2834     if (rn == 31) {
2835         gen_check_sp_alignment(s);
2836     }
2837     memop = check_atomic_align(s, rn, size);
2838     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2839     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2840                                memidx, memop);
2841 }
2842 
2843 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2844                                       int rn, int size)
2845 {
2846     TCGv_i64 s1 = cpu_reg(s, rs);
2847     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2848     TCGv_i64 t1 = cpu_reg(s, rt);
2849     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2850     TCGv_i64 clean_addr;
2851     int memidx = get_mem_index(s);
2852     MemOp memop;
2853 
2854     if (rn == 31) {
2855         gen_check_sp_alignment(s);
2856     }
2857 
2858     /* This is a single atomic access, despite the "pair". */
2859     memop = check_atomic_align(s, rn, size + 1);
2860     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2861 
2862     if (size == 2) {
2863         TCGv_i64 cmp = tcg_temp_new_i64();
2864         TCGv_i64 val = tcg_temp_new_i64();
2865 
2866         if (s->be_data == MO_LE) {
2867             tcg_gen_concat32_i64(val, t1, t2);
2868             tcg_gen_concat32_i64(cmp, s1, s2);
2869         } else {
2870             tcg_gen_concat32_i64(val, t2, t1);
2871             tcg_gen_concat32_i64(cmp, s2, s1);
2872         }
2873 
2874         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2875 
2876         if (s->be_data == MO_LE) {
2877             tcg_gen_extr32_i64(s1, s2, cmp);
2878         } else {
2879             tcg_gen_extr32_i64(s2, s1, cmp);
2880         }
2881     } else {
2882         TCGv_i128 cmp = tcg_temp_new_i128();
2883         TCGv_i128 val = tcg_temp_new_i128();
2884 
2885         if (s->be_data == MO_LE) {
2886             tcg_gen_concat_i64_i128(val, t1, t2);
2887             tcg_gen_concat_i64_i128(cmp, s1, s2);
2888         } else {
2889             tcg_gen_concat_i64_i128(val, t2, t1);
2890             tcg_gen_concat_i64_i128(cmp, s2, s1);
2891         }
2892 
2893         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2894 
2895         if (s->be_data == MO_LE) {
2896             tcg_gen_extr_i128_i64(s1, s2, cmp);
2897         } else {
2898             tcg_gen_extr_i128_i64(s2, s1, cmp);
2899         }
2900     }
2901 }
2902 
2903 /*
2904  * Compute the ISS.SF bit for syndrome information if an exception
2905  * is taken on a load or store. This indicates whether the instruction
2906  * is accessing a 32-bit or 64-bit register. This logic is derived
2907  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2908  */
2909 static bool ldst_iss_sf(int size, bool sign, bool ext)
2910 {
2911 
2912     if (sign) {
2913         /*
2914          * Signed loads are 64 bit results if we are not going to
2915          * do a zero-extend from 32 to 64 after the load.
2916          * (For a store, sign and ext are always false.)
2917          */
2918         return !ext;
2919     } else {
2920         /* Unsigned loads/stores work at the specified size */
2921         return size == MO_64;
2922     }
2923 }
2924 
2925 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2926 {
2927     if (a->rn == 31) {
2928         gen_check_sp_alignment(s);
2929     }
2930     if (a->lasr) {
2931         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2932     }
2933     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2934     return true;
2935 }
2936 
2937 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2938 {
2939     if (a->rn == 31) {
2940         gen_check_sp_alignment(s);
2941     }
2942     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2943     if (a->lasr) {
2944         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2945     }
2946     return true;
2947 }
2948 
2949 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2950 {
2951     TCGv_i64 clean_addr;
2952     MemOp memop;
2953     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2954 
2955     /*
2956      * StoreLORelease is the same as Store-Release for QEMU, but
2957      * needs the feature-test.
2958      */
2959     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2960         return false;
2961     }
2962     /* Generate ISS for non-exclusive accesses including LASR.  */
2963     if (a->rn == 31) {
2964         gen_check_sp_alignment(s);
2965     }
2966     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2967     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2968     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2969                                 true, a->rn != 31, memop);
2970     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2971               iss_sf, a->lasr);
2972     return true;
2973 }
2974 
2975 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2976 {
2977     TCGv_i64 clean_addr;
2978     MemOp memop;
2979     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2980 
2981     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2982     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2983         return false;
2984     }
2985     /* Generate ISS for non-exclusive accesses including LASR.  */
2986     if (a->rn == 31) {
2987         gen_check_sp_alignment(s);
2988     }
2989     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
2990     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2991                                 false, a->rn != 31, memop);
2992     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
2993               a->rt, iss_sf, a->lasr);
2994     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2995     return true;
2996 }
2997 
2998 static bool trans_STXP(DisasContext *s, arg_stxr *a)
2999 {
3000     if (a->rn == 31) {
3001         gen_check_sp_alignment(s);
3002     }
3003     if (a->lasr) {
3004         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3005     }
3006     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3007     return true;
3008 }
3009 
3010 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3011 {
3012     if (a->rn == 31) {
3013         gen_check_sp_alignment(s);
3014     }
3015     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3016     if (a->lasr) {
3017         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3018     }
3019     return true;
3020 }
3021 
3022 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3023 {
3024     if (!dc_isar_feature(aa64_atomics, s)) {
3025         return false;
3026     }
3027     if (((a->rt | a->rs) & 1) != 0) {
3028         return false;
3029     }
3030 
3031     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3032     return true;
3033 }
3034 
3035 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3036 {
3037     if (!dc_isar_feature(aa64_atomics, s)) {
3038         return false;
3039     }
3040     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3041     return true;
3042 }
3043 
3044 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3045 {
3046     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3047     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3048     TCGv_i64 clean_addr = tcg_temp_new_i64();
3049     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3050 
3051     gen_pc_plus_diff(s, clean_addr, a->imm);
3052     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3053               false, true, a->rt, iss_sf, false);
3054     return true;
3055 }
3056 
3057 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3058 {
3059     /* Load register (literal), vector version */
3060     TCGv_i64 clean_addr;
3061     MemOp memop;
3062 
3063     if (!fp_access_check(s)) {
3064         return true;
3065     }
3066     memop = finalize_memop_asimd(s, a->sz);
3067     clean_addr = tcg_temp_new_i64();
3068     gen_pc_plus_diff(s, clean_addr, a->imm);
3069     do_fp_ld(s, a->rt, clean_addr, memop);
3070     return true;
3071 }
3072 
3073 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3074                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3075                                  uint64_t offset, bool is_store, MemOp mop)
3076 {
3077     if (a->rn == 31) {
3078         gen_check_sp_alignment(s);
3079     }
3080 
3081     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3082     if (!a->p) {
3083         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3084     }
3085 
3086     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3087                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3088 }
3089 
3090 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3091                                   TCGv_i64 dirty_addr, uint64_t offset)
3092 {
3093     if (a->w) {
3094         if (a->p) {
3095             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3096         }
3097         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3098     }
3099 }
3100 
3101 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3102 {
3103     uint64_t offset = a->imm << a->sz;
3104     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3105     MemOp mop = finalize_memop(s, a->sz);
3106 
3107     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3108     tcg_rt = cpu_reg(s, a->rt);
3109     tcg_rt2 = cpu_reg(s, a->rt2);
3110     /*
3111      * We built mop above for the single logical access -- rebuild it
3112      * now for the paired operation.
3113      *
3114      * With LSE2, non-sign-extending pairs are treated atomically if
3115      * aligned, and if unaligned one of the pair will be completely
3116      * within a 16-byte block and that element will be atomic.
3117      * Otherwise each element is separately atomic.
3118      * In all cases, issue one operation with the correct atomicity.
3119      */
3120     mop = a->sz + 1;
3121     if (s->align_mem) {
3122         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3123     }
3124     mop = finalize_memop_pair(s, mop);
3125     if (a->sz == 2) {
3126         TCGv_i64 tmp = tcg_temp_new_i64();
3127 
3128         if (s->be_data == MO_LE) {
3129             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3130         } else {
3131             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3132         }
3133         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3134     } else {
3135         TCGv_i128 tmp = tcg_temp_new_i128();
3136 
3137         if (s->be_data == MO_LE) {
3138             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3139         } else {
3140             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3141         }
3142         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3143     }
3144     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3145     return true;
3146 }
3147 
3148 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3149 {
3150     uint64_t offset = a->imm << a->sz;
3151     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3152     MemOp mop = finalize_memop(s, a->sz);
3153 
3154     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3155     tcg_rt = cpu_reg(s, a->rt);
3156     tcg_rt2 = cpu_reg(s, a->rt2);
3157 
3158     /*
3159      * We built mop above for the single logical access -- rebuild it
3160      * now for the paired operation.
3161      *
3162      * With LSE2, non-sign-extending pairs are treated atomically if
3163      * aligned, and if unaligned one of the pair will be completely
3164      * within a 16-byte block and that element will be atomic.
3165      * Otherwise each element is separately atomic.
3166      * In all cases, issue one operation with the correct atomicity.
3167      *
3168      * This treats sign-extending loads like zero-extending loads,
3169      * since that reuses the most code below.
3170      */
3171     mop = a->sz + 1;
3172     if (s->align_mem) {
3173         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3174     }
3175     mop = finalize_memop_pair(s, mop);
3176     if (a->sz == 2) {
3177         int o2 = s->be_data == MO_LE ? 32 : 0;
3178         int o1 = o2 ^ 32;
3179 
3180         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3181         if (a->sign) {
3182             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3183             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3184         } else {
3185             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3186             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3187         }
3188     } else {
3189         TCGv_i128 tmp = tcg_temp_new_i128();
3190 
3191         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3192         if (s->be_data == MO_LE) {
3193             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3194         } else {
3195             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3196         }
3197     }
3198     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3199     return true;
3200 }
3201 
3202 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3203 {
3204     uint64_t offset = a->imm << a->sz;
3205     TCGv_i64 clean_addr, dirty_addr;
3206     MemOp mop;
3207 
3208     if (!fp_access_check(s)) {
3209         return true;
3210     }
3211 
3212     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3213     mop = finalize_memop_asimd(s, a->sz);
3214     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3215     do_fp_st(s, a->rt, clean_addr, mop);
3216     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3217     do_fp_st(s, a->rt2, clean_addr, mop);
3218     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3219     return true;
3220 }
3221 
3222 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3223 {
3224     uint64_t offset = a->imm << a->sz;
3225     TCGv_i64 clean_addr, dirty_addr;
3226     MemOp mop;
3227 
3228     if (!fp_access_check(s)) {
3229         return true;
3230     }
3231 
3232     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3233     mop = finalize_memop_asimd(s, a->sz);
3234     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3235     do_fp_ld(s, a->rt, clean_addr, mop);
3236     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3237     do_fp_ld(s, a->rt2, clean_addr, mop);
3238     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3239     return true;
3240 }
3241 
3242 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3243 {
3244     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3245     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3246     MemOp mop;
3247     TCGv_i128 tmp;
3248 
3249     /* STGP only comes in one size. */
3250     tcg_debug_assert(a->sz == MO_64);
3251 
3252     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3253         return false;
3254     }
3255 
3256     if (a->rn == 31) {
3257         gen_check_sp_alignment(s);
3258     }
3259 
3260     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3261     if (!a->p) {
3262         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3263     }
3264 
3265     clean_addr = clean_data_tbi(s, dirty_addr);
3266     tcg_rt = cpu_reg(s, a->rt);
3267     tcg_rt2 = cpu_reg(s, a->rt2);
3268 
3269     /*
3270      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3271      * and one tag operation.  We implement it as one single aligned 16-byte
3272      * memory operation for convenience.  Note that the alignment ensures
3273      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3274      */
3275     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3276 
3277     tmp = tcg_temp_new_i128();
3278     if (s->be_data == MO_LE) {
3279         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3280     } else {
3281         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3282     }
3283     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3284 
3285     /* Perform the tag store, if tag access enabled. */
3286     if (s->ata[0]) {
3287         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3288             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3289         } else {
3290             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3291         }
3292     }
3293 
3294     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3295     return true;
3296 }
3297 
3298 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3299                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3300                                  uint64_t offset, bool is_store, MemOp mop)
3301 {
3302     int memidx;
3303 
3304     if (a->rn == 31) {
3305         gen_check_sp_alignment(s);
3306     }
3307 
3308     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3309     if (!a->p) {
3310         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3311     }
3312     memidx = get_a64_user_mem_index(s, a->unpriv);
3313     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3314                                         a->w || a->rn != 31,
3315                                         mop, a->unpriv, memidx);
3316 }
3317 
3318 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3319                                   TCGv_i64 dirty_addr, uint64_t offset)
3320 {
3321     if (a->w) {
3322         if (a->p) {
3323             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3324         }
3325         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3326     }
3327 }
3328 
3329 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3330 {
3331     bool iss_sf, iss_valid = !a->w;
3332     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3333     int memidx = get_a64_user_mem_index(s, a->unpriv);
3334     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3335 
3336     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3337 
3338     tcg_rt = cpu_reg(s, a->rt);
3339     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3340 
3341     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3342                      iss_valid, a->rt, iss_sf, false);
3343     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3344     return true;
3345 }
3346 
3347 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3348 {
3349     bool iss_sf, iss_valid = !a->w;
3350     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3351     int memidx = get_a64_user_mem_index(s, a->unpriv);
3352     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3353 
3354     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3355 
3356     tcg_rt = cpu_reg(s, a->rt);
3357     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3358 
3359     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3360                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3361     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3362     return true;
3363 }
3364 
3365 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3366 {
3367     TCGv_i64 clean_addr, dirty_addr;
3368     MemOp mop;
3369 
3370     if (!fp_access_check(s)) {
3371         return true;
3372     }
3373     mop = finalize_memop_asimd(s, a->sz);
3374     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3375     do_fp_st(s, a->rt, clean_addr, mop);
3376     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3377     return true;
3378 }
3379 
3380 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3381 {
3382     TCGv_i64 clean_addr, dirty_addr;
3383     MemOp mop;
3384 
3385     if (!fp_access_check(s)) {
3386         return true;
3387     }
3388     mop = finalize_memop_asimd(s, a->sz);
3389     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3390     do_fp_ld(s, a->rt, clean_addr, mop);
3391     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3392     return true;
3393 }
3394 
3395 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3396                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3397                              bool is_store, MemOp memop)
3398 {
3399     TCGv_i64 tcg_rm;
3400 
3401     if (a->rn == 31) {
3402         gen_check_sp_alignment(s);
3403     }
3404     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3405 
3406     tcg_rm = read_cpu_reg(s, a->rm, 1);
3407     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3408 
3409     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3410     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3411 }
3412 
3413 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3414 {
3415     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3416     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3417     MemOp memop;
3418 
3419     if (extract32(a->opt, 1, 1) == 0) {
3420         return false;
3421     }
3422 
3423     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3424     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3425     tcg_rt = cpu_reg(s, a->rt);
3426     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3427               a->ext, true, a->rt, iss_sf, false);
3428     return true;
3429 }
3430 
3431 static bool trans_STR(DisasContext *s, arg_ldst *a)
3432 {
3433     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3434     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3435     MemOp memop;
3436 
3437     if (extract32(a->opt, 1, 1) == 0) {
3438         return false;
3439     }
3440 
3441     memop = finalize_memop(s, a->sz);
3442     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3443     tcg_rt = cpu_reg(s, a->rt);
3444     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3445     return true;
3446 }
3447 
3448 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3449 {
3450     TCGv_i64 clean_addr, dirty_addr;
3451     MemOp memop;
3452 
3453     if (extract32(a->opt, 1, 1) == 0) {
3454         return false;
3455     }
3456 
3457     if (!fp_access_check(s)) {
3458         return true;
3459     }
3460 
3461     memop = finalize_memop_asimd(s, a->sz);
3462     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3463     do_fp_ld(s, a->rt, clean_addr, memop);
3464     return true;
3465 }
3466 
3467 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3468 {
3469     TCGv_i64 clean_addr, dirty_addr;
3470     MemOp memop;
3471 
3472     if (extract32(a->opt, 1, 1) == 0) {
3473         return false;
3474     }
3475 
3476     if (!fp_access_check(s)) {
3477         return true;
3478     }
3479 
3480     memop = finalize_memop_asimd(s, a->sz);
3481     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3482     do_fp_st(s, a->rt, clean_addr, memop);
3483     return true;
3484 }
3485 
3486 
3487 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3488                          int sign, bool invert)
3489 {
3490     MemOp mop = a->sz | sign;
3491     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3492 
3493     if (a->rn == 31) {
3494         gen_check_sp_alignment(s);
3495     }
3496     mop = check_atomic_align(s, a->rn, mop);
3497     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3498                                 a->rn != 31, mop);
3499     tcg_rs = read_cpu_reg(s, a->rs, true);
3500     tcg_rt = cpu_reg(s, a->rt);
3501     if (invert) {
3502         tcg_gen_not_i64(tcg_rs, tcg_rs);
3503     }
3504     /*
3505      * The tcg atomic primitives are all full barriers.  Therefore we
3506      * can ignore the Acquire and Release bits of this instruction.
3507      */
3508     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3509 
3510     if (mop & MO_SIGN) {
3511         switch (a->sz) {
3512         case MO_8:
3513             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3514             break;
3515         case MO_16:
3516             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3517             break;
3518         case MO_32:
3519             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3520             break;
3521         case MO_64:
3522             break;
3523         default:
3524             g_assert_not_reached();
3525         }
3526     }
3527     return true;
3528 }
3529 
3530 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3531 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3532 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3533 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3534 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3535 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3536 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3537 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3538 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3539 
3540 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3541 {
3542     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3543     TCGv_i64 clean_addr;
3544     MemOp mop;
3545 
3546     if (!dc_isar_feature(aa64_atomics, s) ||
3547         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3548         return false;
3549     }
3550     if (a->rn == 31) {
3551         gen_check_sp_alignment(s);
3552     }
3553     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3554     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3555                                 a->rn != 31, mop);
3556     /*
3557      * LDAPR* are a special case because they are a simple load, not a
3558      * fetch-and-do-something op.
3559      * The architectural consistency requirements here are weaker than
3560      * full load-acquire (we only need "load-acquire processor consistent"),
3561      * but we choose to implement them as full LDAQ.
3562      */
3563     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3564               true, a->rt, iss_sf, true);
3565     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3566     return true;
3567 }
3568 
3569 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3570 {
3571     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3572     MemOp memop;
3573 
3574     /* Load with pointer authentication */
3575     if (!dc_isar_feature(aa64_pauth, s)) {
3576         return false;
3577     }
3578 
3579     if (a->rn == 31) {
3580         gen_check_sp_alignment(s);
3581     }
3582     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3583 
3584     if (s->pauth_active) {
3585         if (!a->m) {
3586             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3587                                       tcg_constant_i64(0));
3588         } else {
3589             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3590                                       tcg_constant_i64(0));
3591         }
3592     }
3593 
3594     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3595 
3596     memop = finalize_memop(s, MO_64);
3597 
3598     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3599     clean_addr = gen_mte_check1(s, dirty_addr, false,
3600                                 a->w || a->rn != 31, memop);
3601 
3602     tcg_rt = cpu_reg(s, a->rt);
3603     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3604               /* extend */ false, /* iss_valid */ !a->w,
3605               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3606 
3607     if (a->w) {
3608         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3609     }
3610     return true;
3611 }
3612 
3613 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3614 {
3615     TCGv_i64 clean_addr, dirty_addr;
3616     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3617     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3618 
3619     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3620         return false;
3621     }
3622 
3623     if (a->rn == 31) {
3624         gen_check_sp_alignment(s);
3625     }
3626 
3627     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3628     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3629     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3630     clean_addr = clean_data_tbi(s, dirty_addr);
3631 
3632     /*
3633      * Load-AcquirePC semantics; we implement as the slightly more
3634      * restrictive Load-Acquire.
3635      */
3636     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3637               a->rt, iss_sf, true);
3638     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3639     return true;
3640 }
3641 
3642 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3643 {
3644     TCGv_i64 clean_addr, dirty_addr;
3645     MemOp mop = a->sz;
3646     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3647 
3648     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3649         return false;
3650     }
3651 
3652     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3653 
3654     if (a->rn == 31) {
3655         gen_check_sp_alignment(s);
3656     }
3657 
3658     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3659     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3660     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3661     clean_addr = clean_data_tbi(s, dirty_addr);
3662 
3663     /* Store-Release semantics */
3664     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3665     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3666     return true;
3667 }
3668 
3669 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3670 {
3671     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3672     MemOp endian, align, mop;
3673 
3674     int total;    /* total bytes */
3675     int elements; /* elements per vector */
3676     int r;
3677     int size = a->sz;
3678 
3679     if (!a->p && a->rm != 0) {
3680         /* For non-postindexed accesses the Rm field must be 0 */
3681         return false;
3682     }
3683     if (size == 3 && !a->q && a->selem != 1) {
3684         return false;
3685     }
3686     if (!fp_access_check(s)) {
3687         return true;
3688     }
3689 
3690     if (a->rn == 31) {
3691         gen_check_sp_alignment(s);
3692     }
3693 
3694     /* For our purposes, bytes are always little-endian.  */
3695     endian = s->be_data;
3696     if (size == 0) {
3697         endian = MO_LE;
3698     }
3699 
3700     total = a->rpt * a->selem * (a->q ? 16 : 8);
3701     tcg_rn = cpu_reg_sp(s, a->rn);
3702 
3703     /*
3704      * Issue the MTE check vs the logical repeat count, before we
3705      * promote consecutive little-endian elements below.
3706      */
3707     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3708                                 finalize_memop_asimd(s, size));
3709 
3710     /*
3711      * Consecutive little-endian elements from a single register
3712      * can be promoted to a larger little-endian operation.
3713      */
3714     align = MO_ALIGN;
3715     if (a->selem == 1 && endian == MO_LE) {
3716         align = pow2_align(size);
3717         size = 3;
3718     }
3719     if (!s->align_mem) {
3720         align = 0;
3721     }
3722     mop = endian | size | align;
3723 
3724     elements = (a->q ? 16 : 8) >> size;
3725     tcg_ebytes = tcg_constant_i64(1 << size);
3726     for (r = 0; r < a->rpt; r++) {
3727         int e;
3728         for (e = 0; e < elements; e++) {
3729             int xs;
3730             for (xs = 0; xs < a->selem; xs++) {
3731                 int tt = (a->rt + r + xs) % 32;
3732                 do_vec_ld(s, tt, e, clean_addr, mop);
3733                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3734             }
3735         }
3736     }
3737 
3738     /*
3739      * For non-quad operations, setting a slice of the low 64 bits of
3740      * the register clears the high 64 bits (in the ARM ARM pseudocode
3741      * this is implicit in the fact that 'rval' is a 64 bit wide
3742      * variable).  For quad operations, we might still need to zero
3743      * the high bits of SVE.
3744      */
3745     for (r = 0; r < a->rpt * a->selem; r++) {
3746         int tt = (a->rt + r) % 32;
3747         clear_vec_high(s, a->q, tt);
3748     }
3749 
3750     if (a->p) {
3751         if (a->rm == 31) {
3752             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3753         } else {
3754             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3755         }
3756     }
3757     return true;
3758 }
3759 
3760 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3761 {
3762     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3763     MemOp endian, align, mop;
3764 
3765     int total;    /* total bytes */
3766     int elements; /* elements per vector */
3767     int r;
3768     int size = a->sz;
3769 
3770     if (!a->p && a->rm != 0) {
3771         /* For non-postindexed accesses the Rm field must be 0 */
3772         return false;
3773     }
3774     if (size == 3 && !a->q && a->selem != 1) {
3775         return false;
3776     }
3777     if (!fp_access_check(s)) {
3778         return true;
3779     }
3780 
3781     if (a->rn == 31) {
3782         gen_check_sp_alignment(s);
3783     }
3784 
3785     /* For our purposes, bytes are always little-endian.  */
3786     endian = s->be_data;
3787     if (size == 0) {
3788         endian = MO_LE;
3789     }
3790 
3791     total = a->rpt * a->selem * (a->q ? 16 : 8);
3792     tcg_rn = cpu_reg_sp(s, a->rn);
3793 
3794     /*
3795      * Issue the MTE check vs the logical repeat count, before we
3796      * promote consecutive little-endian elements below.
3797      */
3798     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3799                                 finalize_memop_asimd(s, size));
3800 
3801     /*
3802      * Consecutive little-endian elements from a single register
3803      * can be promoted to a larger little-endian operation.
3804      */
3805     align = MO_ALIGN;
3806     if (a->selem == 1 && endian == MO_LE) {
3807         align = pow2_align(size);
3808         size = 3;
3809     }
3810     if (!s->align_mem) {
3811         align = 0;
3812     }
3813     mop = endian | size | align;
3814 
3815     elements = (a->q ? 16 : 8) >> size;
3816     tcg_ebytes = tcg_constant_i64(1 << size);
3817     for (r = 0; r < a->rpt; r++) {
3818         int e;
3819         for (e = 0; e < elements; e++) {
3820             int xs;
3821             for (xs = 0; xs < a->selem; xs++) {
3822                 int tt = (a->rt + r + xs) % 32;
3823                 do_vec_st(s, tt, e, clean_addr, mop);
3824                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3825             }
3826         }
3827     }
3828 
3829     if (a->p) {
3830         if (a->rm == 31) {
3831             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3832         } else {
3833             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3834         }
3835     }
3836     return true;
3837 }
3838 
3839 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3840 {
3841     int xs, total, rt;
3842     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3843     MemOp mop;
3844 
3845     if (!a->p && a->rm != 0) {
3846         return false;
3847     }
3848     if (!fp_access_check(s)) {
3849         return true;
3850     }
3851 
3852     if (a->rn == 31) {
3853         gen_check_sp_alignment(s);
3854     }
3855 
3856     total = a->selem << a->scale;
3857     tcg_rn = cpu_reg_sp(s, a->rn);
3858 
3859     mop = finalize_memop_asimd(s, a->scale);
3860     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3861                                 total, mop);
3862 
3863     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3864     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3865         do_vec_st(s, rt, a->index, clean_addr, mop);
3866         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3867     }
3868 
3869     if (a->p) {
3870         if (a->rm == 31) {
3871             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3872         } else {
3873             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3874         }
3875     }
3876     return true;
3877 }
3878 
3879 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3880 {
3881     int xs, total, rt;
3882     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3883     MemOp mop;
3884 
3885     if (!a->p && a->rm != 0) {
3886         return false;
3887     }
3888     if (!fp_access_check(s)) {
3889         return true;
3890     }
3891 
3892     if (a->rn == 31) {
3893         gen_check_sp_alignment(s);
3894     }
3895 
3896     total = a->selem << a->scale;
3897     tcg_rn = cpu_reg_sp(s, a->rn);
3898 
3899     mop = finalize_memop_asimd(s, a->scale);
3900     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3901                                 total, mop);
3902 
3903     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3904     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3905         do_vec_ld(s, rt, a->index, clean_addr, mop);
3906         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3907     }
3908 
3909     if (a->p) {
3910         if (a->rm == 31) {
3911             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3912         } else {
3913             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3914         }
3915     }
3916     return true;
3917 }
3918 
3919 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3920 {
3921     int xs, total, rt;
3922     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3923     MemOp mop;
3924 
3925     if (!a->p && a->rm != 0) {
3926         return false;
3927     }
3928     if (!fp_access_check(s)) {
3929         return true;
3930     }
3931 
3932     if (a->rn == 31) {
3933         gen_check_sp_alignment(s);
3934     }
3935 
3936     total = a->selem << a->scale;
3937     tcg_rn = cpu_reg_sp(s, a->rn);
3938 
3939     mop = finalize_memop_asimd(s, a->scale);
3940     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3941                                 total, mop);
3942 
3943     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3944     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3945         /* Load and replicate to all elements */
3946         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3947 
3948         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3949         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3950                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3951         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3952     }
3953 
3954     if (a->p) {
3955         if (a->rm == 31) {
3956             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3957         } else {
3958             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3959         }
3960     }
3961     return true;
3962 }
3963 
3964 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3965 {
3966     TCGv_i64 addr, clean_addr, tcg_rt;
3967     int size = 4 << s->dcz_blocksize;
3968 
3969     if (!dc_isar_feature(aa64_mte, s)) {
3970         return false;
3971     }
3972     if (s->current_el == 0) {
3973         return false;
3974     }
3975 
3976     if (a->rn == 31) {
3977         gen_check_sp_alignment(s);
3978     }
3979 
3980     addr = read_cpu_reg_sp(s, a->rn, true);
3981     tcg_gen_addi_i64(addr, addr, a->imm);
3982     tcg_rt = cpu_reg(s, a->rt);
3983 
3984     if (s->ata[0]) {
3985         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
3986     }
3987     /*
3988      * The non-tags portion of STZGM is mostly like DC_ZVA,
3989      * except the alignment happens before the access.
3990      */
3991     clean_addr = clean_data_tbi(s, addr);
3992     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3993     gen_helper_dc_zva(tcg_env, clean_addr);
3994     return true;
3995 }
3996 
3997 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
3998 {
3999     TCGv_i64 addr, clean_addr, tcg_rt;
4000 
4001     if (!dc_isar_feature(aa64_mte, s)) {
4002         return false;
4003     }
4004     if (s->current_el == 0) {
4005         return false;
4006     }
4007 
4008     if (a->rn == 31) {
4009         gen_check_sp_alignment(s);
4010     }
4011 
4012     addr = read_cpu_reg_sp(s, a->rn, true);
4013     tcg_gen_addi_i64(addr, addr, a->imm);
4014     tcg_rt = cpu_reg(s, a->rt);
4015 
4016     if (s->ata[0]) {
4017         gen_helper_stgm(tcg_env, addr, tcg_rt);
4018     } else {
4019         MMUAccessType acc = MMU_DATA_STORE;
4020         int size = 4 << s->gm_blocksize;
4021 
4022         clean_addr = clean_data_tbi(s, addr);
4023         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4024         gen_probe_access(s, clean_addr, acc, size);
4025     }
4026     return true;
4027 }
4028 
4029 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4030 {
4031     TCGv_i64 addr, clean_addr, tcg_rt;
4032 
4033     if (!dc_isar_feature(aa64_mte, s)) {
4034         return false;
4035     }
4036     if (s->current_el == 0) {
4037         return false;
4038     }
4039 
4040     if (a->rn == 31) {
4041         gen_check_sp_alignment(s);
4042     }
4043 
4044     addr = read_cpu_reg_sp(s, a->rn, true);
4045     tcg_gen_addi_i64(addr, addr, a->imm);
4046     tcg_rt = cpu_reg(s, a->rt);
4047 
4048     if (s->ata[0]) {
4049         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4050     } else {
4051         MMUAccessType acc = MMU_DATA_LOAD;
4052         int size = 4 << s->gm_blocksize;
4053 
4054         clean_addr = clean_data_tbi(s, addr);
4055         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4056         gen_probe_access(s, clean_addr, acc, size);
4057         /* The result tags are zeros.  */
4058         tcg_gen_movi_i64(tcg_rt, 0);
4059     }
4060     return true;
4061 }
4062 
4063 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4064 {
4065     TCGv_i64 addr, clean_addr, tcg_rt;
4066 
4067     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4068         return false;
4069     }
4070 
4071     if (a->rn == 31) {
4072         gen_check_sp_alignment(s);
4073     }
4074 
4075     addr = read_cpu_reg_sp(s, a->rn, true);
4076     if (!a->p) {
4077         /* pre-index or signed offset */
4078         tcg_gen_addi_i64(addr, addr, a->imm);
4079     }
4080 
4081     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4082     tcg_rt = cpu_reg(s, a->rt);
4083     if (s->ata[0]) {
4084         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4085     } else {
4086         /*
4087          * Tag access disabled: we must check for aborts on the load
4088          * load from [rn+offset], and then insert a 0 tag into rt.
4089          */
4090         clean_addr = clean_data_tbi(s, addr);
4091         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4092         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4093     }
4094 
4095     if (a->w) {
4096         /* pre-index or post-index */
4097         if (a->p) {
4098             /* post-index */
4099             tcg_gen_addi_i64(addr, addr, a->imm);
4100         }
4101         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4102     }
4103     return true;
4104 }
4105 
4106 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4107 {
4108     TCGv_i64 addr, tcg_rt;
4109 
4110     if (a->rn == 31) {
4111         gen_check_sp_alignment(s);
4112     }
4113 
4114     addr = read_cpu_reg_sp(s, a->rn, true);
4115     if (!a->p) {
4116         /* pre-index or signed offset */
4117         tcg_gen_addi_i64(addr, addr, a->imm);
4118     }
4119     tcg_rt = cpu_reg_sp(s, a->rt);
4120     if (!s->ata[0]) {
4121         /*
4122          * For STG and ST2G, we need to check alignment and probe memory.
4123          * TODO: For STZG and STZ2G, we could rely on the stores below,
4124          * at least for system mode; user-only won't enforce alignment.
4125          */
4126         if (is_pair) {
4127             gen_helper_st2g_stub(tcg_env, addr);
4128         } else {
4129             gen_helper_stg_stub(tcg_env, addr);
4130         }
4131     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4132         if (is_pair) {
4133             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4134         } else {
4135             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4136         }
4137     } else {
4138         if (is_pair) {
4139             gen_helper_st2g(tcg_env, addr, tcg_rt);
4140         } else {
4141             gen_helper_stg(tcg_env, addr, tcg_rt);
4142         }
4143     }
4144 
4145     if (is_zero) {
4146         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4147         TCGv_i64 zero64 = tcg_constant_i64(0);
4148         TCGv_i128 zero128 = tcg_temp_new_i128();
4149         int mem_index = get_mem_index(s);
4150         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4151 
4152         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4153 
4154         /* This is 1 or 2 atomic 16-byte operations. */
4155         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4156         if (is_pair) {
4157             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4158             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4159         }
4160     }
4161 
4162     if (a->w) {
4163         /* pre-index or post-index */
4164         if (a->p) {
4165             /* post-index */
4166             tcg_gen_addi_i64(addr, addr, a->imm);
4167         }
4168         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4169     }
4170     return true;
4171 }
4172 
4173 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4174 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4175 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4176 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4177 
4178 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4179 
4180 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4181                    bool is_setg, SetFn fn)
4182 {
4183     int memidx;
4184     uint32_t syndrome, desc = 0;
4185 
4186     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4187         return false;
4188     }
4189 
4190     /*
4191      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4192      * us to pull this check before the CheckMOPSEnabled() test
4193      * (which we do in the helper function)
4194      */
4195     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4196         a->rd == 31 || a->rn == 31) {
4197         return false;
4198     }
4199 
4200     memidx = get_a64_user_mem_index(s, a->unpriv);
4201 
4202     /*
4203      * We pass option_a == true, matching our implementation;
4204      * we pass wrong_option == false: helper function may set that bit.
4205      */
4206     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4207                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4208 
4209     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4210         /* We may need to do MTE tag checking, so assemble the descriptor */
4211         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4212         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4213         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4214         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4215     }
4216     /* The helper function always needs the memidx even with MTE disabled */
4217     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4218 
4219     /*
4220      * The helper needs the register numbers, but since they're in
4221      * the syndrome anyway, we let it extract them from there rather
4222      * than passing in an extra three integer arguments.
4223      */
4224     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4225     return true;
4226 }
4227 
4228 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4229 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4230 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4231 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4232 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4233 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4234 
4235 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4236 
4237 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4238 {
4239     int rmemidx, wmemidx;
4240     uint32_t syndrome, rdesc = 0, wdesc = 0;
4241     bool wunpriv = extract32(a->options, 0, 1);
4242     bool runpriv = extract32(a->options, 1, 1);
4243 
4244     /*
4245      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4246      * us to pull this check before the CheckMOPSEnabled() test
4247      * (which we do in the helper function)
4248      */
4249     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4250         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4251         return false;
4252     }
4253 
4254     rmemidx = get_a64_user_mem_index(s, runpriv);
4255     wmemidx = get_a64_user_mem_index(s, wunpriv);
4256 
4257     /*
4258      * We pass option_a == true, matching our implementation;
4259      * we pass wrong_option == false: helper function may set that bit.
4260      */
4261     syndrome = syn_mop(false, false, a->options, is_epilogue,
4262                        false, true, a->rd, a->rs, a->rn);
4263 
4264     /* If we need to do MTE tag checking, assemble the descriptors */
4265     if (s->mte_active[runpriv]) {
4266         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4267         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4268     }
4269     if (s->mte_active[wunpriv]) {
4270         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4271         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4272         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4273     }
4274     /* The helper function needs these parts of the descriptor regardless */
4275     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4276     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4277 
4278     /*
4279      * The helper needs the register numbers, but since they're in
4280      * the syndrome anyway, we let it extract them from there rather
4281      * than passing in an extra three integer arguments.
4282      */
4283     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4284        tcg_constant_i32(rdesc));
4285     return true;
4286 }
4287 
4288 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4289 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4290 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4291 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4292 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4293 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4294 
4295 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4296 
4297 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4298                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4299 {
4300     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4301     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4302     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4303 
4304     fn(tcg_rd, tcg_rn, tcg_imm);
4305     if (!a->sf) {
4306         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4307     }
4308     return true;
4309 }
4310 
4311 /*
4312  * PC-rel. addressing
4313  */
4314 
4315 static bool trans_ADR(DisasContext *s, arg_ri *a)
4316 {
4317     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4318     return true;
4319 }
4320 
4321 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4322 {
4323     int64_t offset = (int64_t)a->imm << 12;
4324 
4325     /* The page offset is ok for CF_PCREL. */
4326     offset -= s->pc_curr & 0xfff;
4327     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4328     return true;
4329 }
4330 
4331 /*
4332  * Add/subtract (immediate)
4333  */
4334 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4335 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4336 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4337 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4338 
4339 /*
4340  * Add/subtract (immediate, with tags)
4341  */
4342 
4343 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4344                                       bool sub_op)
4345 {
4346     TCGv_i64 tcg_rn, tcg_rd;
4347     int imm;
4348 
4349     imm = a->uimm6 << LOG2_TAG_GRANULE;
4350     if (sub_op) {
4351         imm = -imm;
4352     }
4353 
4354     tcg_rn = cpu_reg_sp(s, a->rn);
4355     tcg_rd = cpu_reg_sp(s, a->rd);
4356 
4357     if (s->ata[0]) {
4358         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4359                            tcg_constant_i32(imm),
4360                            tcg_constant_i32(a->uimm4));
4361     } else {
4362         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4363         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4364     }
4365     return true;
4366 }
4367 
4368 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4369 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4370 
4371 /* The input should be a value in the bottom e bits (with higher
4372  * bits zero); returns that value replicated into every element
4373  * of size e in a 64 bit integer.
4374  */
4375 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4376 {
4377     assert(e != 0);
4378     while (e < 64) {
4379         mask |= mask << e;
4380         e *= 2;
4381     }
4382     return mask;
4383 }
4384 
4385 /*
4386  * Logical (immediate)
4387  */
4388 
4389 /*
4390  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4391  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4392  * value (ie should cause a guest UNDEF exception), and true if they are
4393  * valid, in which case the decoded bit pattern is written to result.
4394  */
4395 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4396                             unsigned int imms, unsigned int immr)
4397 {
4398     uint64_t mask;
4399     unsigned e, levels, s, r;
4400     int len;
4401 
4402     assert(immn < 2 && imms < 64 && immr < 64);
4403 
4404     /* The bit patterns we create here are 64 bit patterns which
4405      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4406      * 64 bits each. Each element contains the same value: a run
4407      * of between 1 and e-1 non-zero bits, rotated within the
4408      * element by between 0 and e-1 bits.
4409      *
4410      * The element size and run length are encoded into immn (1 bit)
4411      * and imms (6 bits) as follows:
4412      * 64 bit elements: immn = 1, imms = <length of run - 1>
4413      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4414      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4415      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4416      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4417      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4418      * Notice that immn = 0, imms = 11111x is the only combination
4419      * not covered by one of the above options; this is reserved.
4420      * Further, <length of run - 1> all-ones is a reserved pattern.
4421      *
4422      * In all cases the rotation is by immr % e (and immr is 6 bits).
4423      */
4424 
4425     /* First determine the element size */
4426     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4427     if (len < 1) {
4428         /* This is the immn == 0, imms == 0x11111x case */
4429         return false;
4430     }
4431     e = 1 << len;
4432 
4433     levels = e - 1;
4434     s = imms & levels;
4435     r = immr & levels;
4436 
4437     if (s == levels) {
4438         /* <length of run - 1> mustn't be all-ones. */
4439         return false;
4440     }
4441 
4442     /* Create the value of one element: s+1 set bits rotated
4443      * by r within the element (which is e bits wide)...
4444      */
4445     mask = MAKE_64BIT_MASK(0, s + 1);
4446     if (r) {
4447         mask = (mask >> r) | (mask << (e - r));
4448         mask &= MAKE_64BIT_MASK(0, e);
4449     }
4450     /* ...then replicate the element over the whole 64 bit value */
4451     mask = bitfield_replicate(mask, e);
4452     *result = mask;
4453     return true;
4454 }
4455 
4456 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4457                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4458 {
4459     TCGv_i64 tcg_rd, tcg_rn;
4460     uint64_t imm;
4461 
4462     /* Some immediate field values are reserved. */
4463     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4464                                 extract32(a->dbm, 0, 6),
4465                                 extract32(a->dbm, 6, 6))) {
4466         return false;
4467     }
4468     if (!a->sf) {
4469         imm &= 0xffffffffull;
4470     }
4471 
4472     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4473     tcg_rn = cpu_reg(s, a->rn);
4474 
4475     fn(tcg_rd, tcg_rn, imm);
4476     if (set_cc) {
4477         gen_logic_CC(a->sf, tcg_rd);
4478     }
4479     if (!a->sf) {
4480         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4481     }
4482     return true;
4483 }
4484 
4485 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4486 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4487 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4488 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4489 
4490 /*
4491  * Move wide (immediate)
4492  */
4493 
4494 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4495 {
4496     int pos = a->hw << 4;
4497     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4498     return true;
4499 }
4500 
4501 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4502 {
4503     int pos = a->hw << 4;
4504     uint64_t imm = a->imm;
4505 
4506     imm = ~(imm << pos);
4507     if (!a->sf) {
4508         imm = (uint32_t)imm;
4509     }
4510     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4511     return true;
4512 }
4513 
4514 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4515 {
4516     int pos = a->hw << 4;
4517     TCGv_i64 tcg_rd, tcg_im;
4518 
4519     tcg_rd = cpu_reg(s, a->rd);
4520     tcg_im = tcg_constant_i64(a->imm);
4521     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4522     if (!a->sf) {
4523         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4524     }
4525     return true;
4526 }
4527 
4528 /*
4529  * Bitfield
4530  */
4531 
4532 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4533 {
4534     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4535     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4536     unsigned int bitsize = a->sf ? 64 : 32;
4537     unsigned int ri = a->immr;
4538     unsigned int si = a->imms;
4539     unsigned int pos, len;
4540 
4541     if (si >= ri) {
4542         /* Wd<s-r:0> = Wn<s:r> */
4543         len = (si - ri) + 1;
4544         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4545         if (!a->sf) {
4546             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4547         }
4548     } else {
4549         /* Wd<32+s-r,32-r> = Wn<s:0> */
4550         len = si + 1;
4551         pos = (bitsize - ri) & (bitsize - 1);
4552 
4553         if (len < ri) {
4554             /*
4555              * Sign extend the destination field from len to fill the
4556              * balance of the word.  Let the deposit below insert all
4557              * of those sign bits.
4558              */
4559             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4560             len = ri;
4561         }
4562 
4563         /*
4564          * We start with zero, and we haven't modified any bits outside
4565          * bitsize, therefore no final zero-extension is unneeded for !sf.
4566          */
4567         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4568     }
4569     return true;
4570 }
4571 
4572 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4573 {
4574     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4575     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4576     unsigned int bitsize = a->sf ? 64 : 32;
4577     unsigned int ri = a->immr;
4578     unsigned int si = a->imms;
4579     unsigned int pos, len;
4580 
4581     tcg_rd = cpu_reg(s, a->rd);
4582     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4583 
4584     if (si >= ri) {
4585         /* Wd<s-r:0> = Wn<s:r> */
4586         len = (si - ri) + 1;
4587         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4588     } else {
4589         /* Wd<32+s-r,32-r> = Wn<s:0> */
4590         len = si + 1;
4591         pos = (bitsize - ri) & (bitsize - 1);
4592         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4593     }
4594     return true;
4595 }
4596 
4597 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4598 {
4599     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4600     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4601     unsigned int bitsize = a->sf ? 64 : 32;
4602     unsigned int ri = a->immr;
4603     unsigned int si = a->imms;
4604     unsigned int pos, len;
4605 
4606     tcg_rd = cpu_reg(s, a->rd);
4607     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4608 
4609     if (si >= ri) {
4610         /* Wd<s-r:0> = Wn<s:r> */
4611         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4612         len = (si - ri) + 1;
4613         pos = 0;
4614     } else {
4615         /* Wd<32+s-r,32-r> = Wn<s:0> */
4616         len = si + 1;
4617         pos = (bitsize - ri) & (bitsize - 1);
4618     }
4619 
4620     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4621     if (!a->sf) {
4622         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4623     }
4624     return true;
4625 }
4626 
4627 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4628 {
4629     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4630 
4631     tcg_rd = cpu_reg(s, a->rd);
4632 
4633     if (unlikely(a->imm == 0)) {
4634         /*
4635          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4636          * so an extract from bit 0 is a special case.
4637          */
4638         if (a->sf) {
4639             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4640         } else {
4641             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4642         }
4643     } else {
4644         tcg_rm = cpu_reg(s, a->rm);
4645         tcg_rn = cpu_reg(s, a->rn);
4646 
4647         if (a->sf) {
4648             /* Specialization to ROR happens in EXTRACT2.  */
4649             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4650         } else {
4651             TCGv_i32 t0 = tcg_temp_new_i32();
4652 
4653             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4654             if (a->rm == a->rn) {
4655                 tcg_gen_rotri_i32(t0, t0, a->imm);
4656             } else {
4657                 TCGv_i32 t1 = tcg_temp_new_i32();
4658                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4659                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4660             }
4661             tcg_gen_extu_i32_i64(tcg_rd, t0);
4662         }
4663     }
4664     return true;
4665 }
4666 
4667 /*
4668  * Cryptographic AES, SHA, SHA512
4669  */
4670 
4671 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4672 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4673 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4674 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4675 
4676 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4677 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4678 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4679 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4680 
4681 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4682 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4683 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4684 
4685 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4686 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4687 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4688 
4689 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4690 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4691 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4692 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4693 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4694 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4695 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4696 
4697 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4698 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4699 
4700 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4701 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4702 
4703 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4704 {
4705     if (!dc_isar_feature(aa64_sm3, s)) {
4706         return false;
4707     }
4708     if (fp_access_check(s)) {
4709         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4710         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4711         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4712         TCGv_i32 tcg_res = tcg_temp_new_i32();
4713         unsigned vsz, dofs;
4714 
4715         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4716         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4717         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4718 
4719         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4720         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4721         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4722         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4723 
4724         /* Clear the whole register first, then store bits [127:96]. */
4725         vsz = vec_full_reg_size(s);
4726         dofs = vec_full_reg_offset(s, a->rd);
4727         tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
4728         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4729     }
4730     return true;
4731 }
4732 
4733 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4734 {
4735     if (fp_access_check(s)) {
4736         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4737     }
4738     return true;
4739 }
4740 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4741 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4742 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4743 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4744 
4745 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4746 {
4747     if (!dc_isar_feature(aa64_sha3, s)) {
4748         return false;
4749     }
4750     if (fp_access_check(s)) {
4751         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4752                      vec_full_reg_offset(s, a->rn),
4753                      vec_full_reg_offset(s, a->rm), a->imm, 16,
4754                      vec_full_reg_size(s));
4755     }
4756     return true;
4757 }
4758 
4759 /*
4760  * Advanced SIMD copy
4761  */
4762 
4763 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4764 {
4765     unsigned esz = ctz32(imm);
4766     if (esz <= MO_64) {
4767         *pesz = esz;
4768         *pidx = imm >> (esz + 1);
4769         return true;
4770     }
4771     return false;
4772 }
4773 
4774 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4775 {
4776     MemOp esz;
4777     unsigned idx;
4778 
4779     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4780         return false;
4781     }
4782     if (fp_access_check(s)) {
4783         /*
4784          * This instruction just extracts the specified element and
4785          * zero-extends it into the bottom of the destination register.
4786          */
4787         TCGv_i64 tmp = tcg_temp_new_i64();
4788         read_vec_element(s, tmp, a->rn, idx, esz);
4789         write_fp_dreg(s, a->rd, tmp);
4790     }
4791     return true;
4792 }
4793 
4794 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4795 {
4796     MemOp esz;
4797     unsigned idx;
4798 
4799     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4800         return false;
4801     }
4802     if (esz == MO_64 && !a->q) {
4803         return false;
4804     }
4805     if (fp_access_check(s)) {
4806         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4807                              vec_reg_offset(s, a->rn, idx, esz),
4808                              a->q ? 16 : 8, vec_full_reg_size(s));
4809     }
4810     return true;
4811 }
4812 
4813 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4814 {
4815     MemOp esz;
4816     unsigned idx;
4817 
4818     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4819         return false;
4820     }
4821     if (esz == MO_64 && !a->q) {
4822         return false;
4823     }
4824     if (fp_access_check(s)) {
4825         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4826                              a->q ? 16 : 8, vec_full_reg_size(s),
4827                              cpu_reg(s, a->rn));
4828     }
4829     return true;
4830 }
4831 
4832 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4833 {
4834     MemOp esz;
4835     unsigned idx;
4836 
4837     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4838         return false;
4839     }
4840     if (is_signed) {
4841         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4842             return false;
4843         }
4844     } else {
4845         if (esz == MO_64 ? !a->q : a->q) {
4846             return false;
4847         }
4848     }
4849     if (fp_access_check(s)) {
4850         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4851         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4852         if (is_signed && !a->q) {
4853             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4854         }
4855     }
4856     return true;
4857 }
4858 
4859 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4860 TRANS(UMOV, do_smov_umov, a, 0)
4861 
4862 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4863 {
4864     MemOp esz;
4865     unsigned idx;
4866 
4867     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4868         return false;
4869     }
4870     if (fp_access_check(s)) {
4871         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
4872         clear_vec_high(s, true, a->rd);
4873     }
4874     return true;
4875 }
4876 
4877 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
4878 {
4879     MemOp esz;
4880     unsigned didx, sidx;
4881 
4882     if (!decode_esz_idx(a->di, &esz, &didx)) {
4883         return false;
4884     }
4885     sidx = a->si >> esz;
4886     if (fp_access_check(s)) {
4887         TCGv_i64 tmp = tcg_temp_new_i64();
4888 
4889         read_vec_element(s, tmp, a->rn, sidx, esz);
4890         write_vec_element(s, tmp, a->rd, didx, esz);
4891 
4892         /* INS is considered a 128-bit write for SVE. */
4893         clear_vec_high(s, true, a->rd);
4894     }
4895     return true;
4896 }
4897 
4898 /*
4899  * Advanced SIMD three same
4900  */
4901 
4902 typedef struct FPScalar {
4903     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
4904     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
4905     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
4906 } FPScalar;
4907 
4908 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
4909 {
4910     switch (a->esz) {
4911     case MO_64:
4912         if (fp_access_check(s)) {
4913             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
4914             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
4915             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
4916             write_fp_dreg(s, a->rd, t0);
4917         }
4918         break;
4919     case MO_32:
4920         if (fp_access_check(s)) {
4921             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
4922             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
4923             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
4924             write_fp_sreg(s, a->rd, t0);
4925         }
4926         break;
4927     case MO_16:
4928         if (!dc_isar_feature(aa64_fp16, s)) {
4929             return false;
4930         }
4931         if (fp_access_check(s)) {
4932             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
4933             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
4934             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
4935             write_fp_sreg(s, a->rd, t0);
4936         }
4937         break;
4938     default:
4939         return false;
4940     }
4941     return true;
4942 }
4943 
4944 static const FPScalar f_scalar_fadd = {
4945     gen_helper_vfp_addh,
4946     gen_helper_vfp_adds,
4947     gen_helper_vfp_addd,
4948 };
4949 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
4950 
4951 static const FPScalar f_scalar_fsub = {
4952     gen_helper_vfp_subh,
4953     gen_helper_vfp_subs,
4954     gen_helper_vfp_subd,
4955 };
4956 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
4957 
4958 static const FPScalar f_scalar_fdiv = {
4959     gen_helper_vfp_divh,
4960     gen_helper_vfp_divs,
4961     gen_helper_vfp_divd,
4962 };
4963 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
4964 
4965 static const FPScalar f_scalar_fmul = {
4966     gen_helper_vfp_mulh,
4967     gen_helper_vfp_muls,
4968     gen_helper_vfp_muld,
4969 };
4970 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
4971 
4972 static const FPScalar f_scalar_fmax = {
4973     gen_helper_advsimd_maxh,
4974     gen_helper_vfp_maxs,
4975     gen_helper_vfp_maxd,
4976 };
4977 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
4978 
4979 static const FPScalar f_scalar_fmin = {
4980     gen_helper_advsimd_minh,
4981     gen_helper_vfp_mins,
4982     gen_helper_vfp_mind,
4983 };
4984 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
4985 
4986 static const FPScalar f_scalar_fmaxnm = {
4987     gen_helper_advsimd_maxnumh,
4988     gen_helper_vfp_maxnums,
4989     gen_helper_vfp_maxnumd,
4990 };
4991 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
4992 
4993 static const FPScalar f_scalar_fminnm = {
4994     gen_helper_advsimd_minnumh,
4995     gen_helper_vfp_minnums,
4996     gen_helper_vfp_minnumd,
4997 };
4998 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
4999 
5000 static const FPScalar f_scalar_fmulx = {
5001     gen_helper_advsimd_mulxh,
5002     gen_helper_vfp_mulxs,
5003     gen_helper_vfp_mulxd,
5004 };
5005 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5006 
5007 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5008 {
5009     gen_helper_vfp_mulh(d, n, m, s);
5010     gen_vfp_negh(d, d);
5011 }
5012 
5013 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5014 {
5015     gen_helper_vfp_muls(d, n, m, s);
5016     gen_vfp_negs(d, d);
5017 }
5018 
5019 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5020 {
5021     gen_helper_vfp_muld(d, n, m, s);
5022     gen_vfp_negd(d, d);
5023 }
5024 
5025 static const FPScalar f_scalar_fnmul = {
5026     gen_fnmul_h,
5027     gen_fnmul_s,
5028     gen_fnmul_d,
5029 };
5030 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5031 
5032 static const FPScalar f_scalar_fcmeq = {
5033     gen_helper_advsimd_ceq_f16,
5034     gen_helper_neon_ceq_f32,
5035     gen_helper_neon_ceq_f64,
5036 };
5037 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5038 
5039 static const FPScalar f_scalar_fcmge = {
5040     gen_helper_advsimd_cge_f16,
5041     gen_helper_neon_cge_f32,
5042     gen_helper_neon_cge_f64,
5043 };
5044 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5045 
5046 static const FPScalar f_scalar_fcmgt = {
5047     gen_helper_advsimd_cgt_f16,
5048     gen_helper_neon_cgt_f32,
5049     gen_helper_neon_cgt_f64,
5050 };
5051 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5052 
5053 static const FPScalar f_scalar_facge = {
5054     gen_helper_advsimd_acge_f16,
5055     gen_helper_neon_acge_f32,
5056     gen_helper_neon_acge_f64,
5057 };
5058 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5059 
5060 static const FPScalar f_scalar_facgt = {
5061     gen_helper_advsimd_acgt_f16,
5062     gen_helper_neon_acgt_f32,
5063     gen_helper_neon_acgt_f64,
5064 };
5065 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5066 
5067 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5068 {
5069     gen_helper_vfp_subh(d, n, m, s);
5070     gen_vfp_absh(d, d);
5071 }
5072 
5073 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5074 {
5075     gen_helper_vfp_subs(d, n, m, s);
5076     gen_vfp_abss(d, d);
5077 }
5078 
5079 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5080 {
5081     gen_helper_vfp_subd(d, n, m, s);
5082     gen_vfp_absd(d, d);
5083 }
5084 
5085 static const FPScalar f_scalar_fabd = {
5086     gen_fabd_h,
5087     gen_fabd_s,
5088     gen_fabd_d,
5089 };
5090 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5091 
5092 static const FPScalar f_scalar_frecps = {
5093     gen_helper_recpsf_f16,
5094     gen_helper_recpsf_f32,
5095     gen_helper_recpsf_f64,
5096 };
5097 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5098 
5099 static const FPScalar f_scalar_frsqrts = {
5100     gen_helper_rsqrtsf_f16,
5101     gen_helper_rsqrtsf_f32,
5102     gen_helper_rsqrtsf_f64,
5103 };
5104 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5105 
5106 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5107                 MemOp sgn_n, MemOp sgn_m,
5108                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5109                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5110 {
5111     TCGv_i64 t0, t1, t2, qc;
5112     MemOp esz = a->esz;
5113 
5114     if (!fp_access_check(s)) {
5115         return true;
5116     }
5117 
5118     t0 = tcg_temp_new_i64();
5119     t1 = tcg_temp_new_i64();
5120     t2 = tcg_temp_new_i64();
5121     qc = tcg_temp_new_i64();
5122     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5123     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5124     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5125 
5126     if (esz == MO_64) {
5127         gen_d(t0, qc, t1, t2);
5128     } else {
5129         gen_bhs(t0, qc, t1, t2, esz);
5130         tcg_gen_ext_i64(t0, t0, esz);
5131     }
5132 
5133     write_fp_dreg(s, a->rd, t0);
5134     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5135     return true;
5136 }
5137 
5138 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5139 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5140 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5141 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5142 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5143 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5144 
5145 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5146                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5147 {
5148     if (fp_access_check(s)) {
5149         TCGv_i64 t0 = tcg_temp_new_i64();
5150         TCGv_i64 t1 = tcg_temp_new_i64();
5151 
5152         read_vec_element(s, t0, a->rn, 0, MO_64);
5153         read_vec_element(s, t1, a->rm, 0, MO_64);
5154         fn(t0, t0, t1);
5155         write_fp_dreg(s, a->rd, t0);
5156     }
5157     return true;
5158 }
5159 
5160 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5161 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5162 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5163 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5164 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5165 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5166 
5167 typedef struct ENVScalar2 {
5168     NeonGenTwoOpEnvFn *gen_bhs[3];
5169     NeonGenTwo64OpEnvFn *gen_d;
5170 } ENVScalar2;
5171 
5172 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5173 {
5174     if (!fp_access_check(s)) {
5175         return true;
5176     }
5177     if (a->esz == MO_64) {
5178         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5179         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5180         f->gen_d(t0, tcg_env, t0, t1);
5181         write_fp_dreg(s, a->rd, t0);
5182     } else {
5183         TCGv_i32 t0 = tcg_temp_new_i32();
5184         TCGv_i32 t1 = tcg_temp_new_i32();
5185 
5186         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5187         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5188         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5189         write_fp_sreg(s, a->rd, t0);
5190     }
5191     return true;
5192 }
5193 
5194 static const ENVScalar2 f_scalar_sqshl = {
5195     { gen_helper_neon_qshl_s8,
5196       gen_helper_neon_qshl_s16,
5197       gen_helper_neon_qshl_s32 },
5198     gen_helper_neon_qshl_s64,
5199 };
5200 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5201 
5202 static const ENVScalar2 f_scalar_uqshl = {
5203     { gen_helper_neon_qshl_u8,
5204       gen_helper_neon_qshl_u16,
5205       gen_helper_neon_qshl_u32 },
5206     gen_helper_neon_qshl_u64,
5207 };
5208 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5209 
5210 static const ENVScalar2 f_scalar_sqrshl = {
5211     { gen_helper_neon_qrshl_s8,
5212       gen_helper_neon_qrshl_s16,
5213       gen_helper_neon_qrshl_s32 },
5214     gen_helper_neon_qrshl_s64,
5215 };
5216 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5217 
5218 static const ENVScalar2 f_scalar_uqrshl = {
5219     { gen_helper_neon_qrshl_u8,
5220       gen_helper_neon_qrshl_u16,
5221       gen_helper_neon_qrshl_u32 },
5222     gen_helper_neon_qrshl_u64,
5223 };
5224 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5225 
5226 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5227                               const ENVScalar2 *f)
5228 {
5229     if (a->esz == MO_16 || a->esz == MO_32) {
5230         return do_env_scalar2(s, a, f);
5231     }
5232     return false;
5233 }
5234 
5235 static const ENVScalar2 f_scalar_sqdmulh = {
5236     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5237 };
5238 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5239 
5240 static const ENVScalar2 f_scalar_sqrdmulh = {
5241     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5242 };
5243 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5244 
5245 typedef struct ENVScalar3 {
5246     NeonGenThreeOpEnvFn *gen_hs[2];
5247 } ENVScalar3;
5248 
5249 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5250                               const ENVScalar3 *f)
5251 {
5252     TCGv_i32 t0, t1, t2;
5253 
5254     if (a->esz != MO_16 && a->esz != MO_32) {
5255         return false;
5256     }
5257     if (!fp_access_check(s)) {
5258         return true;
5259     }
5260 
5261     t0 = tcg_temp_new_i32();
5262     t1 = tcg_temp_new_i32();
5263     t2 = tcg_temp_new_i32();
5264     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5265     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5266     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5267     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5268     write_fp_sreg(s, a->rd, t0);
5269     return true;
5270 }
5271 
5272 static const ENVScalar3 f_scalar_sqrdmlah = {
5273     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5274 };
5275 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5276 
5277 static const ENVScalar3 f_scalar_sqrdmlsh = {
5278     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5279 };
5280 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5281 
5282 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5283 {
5284     if (fp_access_check(s)) {
5285         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5286         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5287         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5288         write_fp_dreg(s, a->rd, t0);
5289     }
5290     return true;
5291 }
5292 
5293 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5294 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5295 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5296 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5297 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5298 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5299 
5300 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5301                           gen_helper_gvec_3_ptr * const fns[3])
5302 {
5303     MemOp esz = a->esz;
5304 
5305     switch (esz) {
5306     case MO_64:
5307         if (!a->q) {
5308             return false;
5309         }
5310         break;
5311     case MO_32:
5312         break;
5313     case MO_16:
5314         if (!dc_isar_feature(aa64_fp16, s)) {
5315             return false;
5316         }
5317         break;
5318     default:
5319         return false;
5320     }
5321     if (fp_access_check(s)) {
5322         gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5323                           esz == MO_16, data, fns[esz - 1]);
5324     }
5325     return true;
5326 }
5327 
5328 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5329     gen_helper_gvec_fadd_h,
5330     gen_helper_gvec_fadd_s,
5331     gen_helper_gvec_fadd_d,
5332 };
5333 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5334 
5335 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5336     gen_helper_gvec_fsub_h,
5337     gen_helper_gvec_fsub_s,
5338     gen_helper_gvec_fsub_d,
5339 };
5340 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5341 
5342 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5343     gen_helper_gvec_fdiv_h,
5344     gen_helper_gvec_fdiv_s,
5345     gen_helper_gvec_fdiv_d,
5346 };
5347 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5348 
5349 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5350     gen_helper_gvec_fmul_h,
5351     gen_helper_gvec_fmul_s,
5352     gen_helper_gvec_fmul_d,
5353 };
5354 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5355 
5356 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5357     gen_helper_gvec_fmax_h,
5358     gen_helper_gvec_fmax_s,
5359     gen_helper_gvec_fmax_d,
5360 };
5361 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5362 
5363 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5364     gen_helper_gvec_fmin_h,
5365     gen_helper_gvec_fmin_s,
5366     gen_helper_gvec_fmin_d,
5367 };
5368 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5369 
5370 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5371     gen_helper_gvec_fmaxnum_h,
5372     gen_helper_gvec_fmaxnum_s,
5373     gen_helper_gvec_fmaxnum_d,
5374 };
5375 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5376 
5377 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5378     gen_helper_gvec_fminnum_h,
5379     gen_helper_gvec_fminnum_s,
5380     gen_helper_gvec_fminnum_d,
5381 };
5382 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5383 
5384 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5385     gen_helper_gvec_fmulx_h,
5386     gen_helper_gvec_fmulx_s,
5387     gen_helper_gvec_fmulx_d,
5388 };
5389 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5390 
5391 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5392     gen_helper_gvec_vfma_h,
5393     gen_helper_gvec_vfma_s,
5394     gen_helper_gvec_vfma_d,
5395 };
5396 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5397 
5398 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5399     gen_helper_gvec_vfms_h,
5400     gen_helper_gvec_vfms_s,
5401     gen_helper_gvec_vfms_d,
5402 };
5403 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5404 
5405 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5406     gen_helper_gvec_fceq_h,
5407     gen_helper_gvec_fceq_s,
5408     gen_helper_gvec_fceq_d,
5409 };
5410 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5411 
5412 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5413     gen_helper_gvec_fcge_h,
5414     gen_helper_gvec_fcge_s,
5415     gen_helper_gvec_fcge_d,
5416 };
5417 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5418 
5419 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5420     gen_helper_gvec_fcgt_h,
5421     gen_helper_gvec_fcgt_s,
5422     gen_helper_gvec_fcgt_d,
5423 };
5424 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5425 
5426 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5427     gen_helper_gvec_facge_h,
5428     gen_helper_gvec_facge_s,
5429     gen_helper_gvec_facge_d,
5430 };
5431 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5432 
5433 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5434     gen_helper_gvec_facgt_h,
5435     gen_helper_gvec_facgt_s,
5436     gen_helper_gvec_facgt_d,
5437 };
5438 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5439 
5440 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5441     gen_helper_gvec_fabd_h,
5442     gen_helper_gvec_fabd_s,
5443     gen_helper_gvec_fabd_d,
5444 };
5445 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5446 
5447 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5448     gen_helper_gvec_recps_h,
5449     gen_helper_gvec_recps_s,
5450     gen_helper_gvec_recps_d,
5451 };
5452 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5453 
5454 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5455     gen_helper_gvec_rsqrts_h,
5456     gen_helper_gvec_rsqrts_s,
5457     gen_helper_gvec_rsqrts_d,
5458 };
5459 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5460 
5461 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5462     gen_helper_gvec_faddp_h,
5463     gen_helper_gvec_faddp_s,
5464     gen_helper_gvec_faddp_d,
5465 };
5466 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5467 
5468 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5469     gen_helper_gvec_fmaxp_h,
5470     gen_helper_gvec_fmaxp_s,
5471     gen_helper_gvec_fmaxp_d,
5472 };
5473 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5474 
5475 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5476     gen_helper_gvec_fminp_h,
5477     gen_helper_gvec_fminp_s,
5478     gen_helper_gvec_fminp_d,
5479 };
5480 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5481 
5482 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5483     gen_helper_gvec_fmaxnump_h,
5484     gen_helper_gvec_fmaxnump_s,
5485     gen_helper_gvec_fmaxnump_d,
5486 };
5487 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5488 
5489 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5490     gen_helper_gvec_fminnump_h,
5491     gen_helper_gvec_fminnump_s,
5492     gen_helper_gvec_fminnump_d,
5493 };
5494 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5495 
5496 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5497 {
5498     if (fp_access_check(s)) {
5499         int data = (is_2 << 1) | is_s;
5500         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5501                            vec_full_reg_offset(s, a->rn),
5502                            vec_full_reg_offset(s, a->rm), tcg_env,
5503                            a->q ? 16 : 8, vec_full_reg_size(s),
5504                            data, gen_helper_gvec_fmlal_a64);
5505     }
5506     return true;
5507 }
5508 
5509 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5510 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5511 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5512 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5513 
5514 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5515 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5516 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5517 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5518 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5519 
5520 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5521 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5522 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5523 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5524 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5525 
5526 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5527 {
5528     if (fp_access_check(s)) {
5529         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5530     }
5531     return true;
5532 }
5533 
5534 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5535 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5536 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5537 
5538 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5539 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5540 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5541 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5542 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5543 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5544 
5545 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5546 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5547 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5548 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5549 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5550 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5551 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5552 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5553 
5554 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5555 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5556 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5557 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5558 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5559 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5560 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5561 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5562 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5563 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5564 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5565 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5566 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5567 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5568 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5569 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5570 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5571 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5572 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5573 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5574 
5575 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5576 {
5577     if (a->esz == MO_64 && !a->q) {
5578         return false;
5579     }
5580     if (fp_access_check(s)) {
5581         tcg_gen_gvec_cmp(cond, a->esz,
5582                          vec_full_reg_offset(s, a->rd),
5583                          vec_full_reg_offset(s, a->rn),
5584                          vec_full_reg_offset(s, a->rm),
5585                          a->q ? 16 : 8, vec_full_reg_size(s));
5586     }
5587     return true;
5588 }
5589 
5590 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5591 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5592 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5593 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5594 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5595 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5596 
5597 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5598 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5599 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5600 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5601 
5602 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5603                           gen_helper_gvec_4 *fn)
5604 {
5605     if (fp_access_check(s)) {
5606         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5607     }
5608     return true;
5609 }
5610 
5611 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5612 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5613 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5614 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
5615 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla)
5616 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5617 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5618 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5619 
5620 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5621 {
5622     if (!dc_isar_feature(aa64_bf16, s)) {
5623         return false;
5624     }
5625     if (fp_access_check(s)) {
5626         /* Q bit selects BFMLALB vs BFMLALT. */
5627         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5628                           gen_helper_gvec_bfmlal);
5629     }
5630     return true;
5631 }
5632 
5633 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5634     gen_helper_gvec_fcaddh,
5635     gen_helper_gvec_fcadds,
5636     gen_helper_gvec_fcaddd,
5637 };
5638 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5639 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5640 
5641 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5642 {
5643     gen_helper_gvec_4_ptr *fn;
5644 
5645     if (!dc_isar_feature(aa64_fcma, s)) {
5646         return false;
5647     }
5648     switch (a->esz) {
5649     case MO_64:
5650         if (!a->q) {
5651             return false;
5652         }
5653         fn = gen_helper_gvec_fcmlad;
5654         break;
5655     case MO_32:
5656         fn = gen_helper_gvec_fcmlas;
5657         break;
5658     case MO_16:
5659         if (!dc_isar_feature(aa64_fp16, s)) {
5660             return false;
5661         }
5662         fn = gen_helper_gvec_fcmlah;
5663         break;
5664     default:
5665         return false;
5666     }
5667     if (fp_access_check(s)) {
5668         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5669                           a->esz == MO_16, a->rot, fn);
5670     }
5671     return true;
5672 }
5673 
5674 /*
5675  * Widening vector x vector/indexed.
5676  *
5677  * These read from the top or bottom half of a 128-bit vector.
5678  * After widening, optionally accumulate with a 128-bit vector.
5679  * Implement these inline, as the number of elements are limited
5680  * and the related SVE and SME operations on larger vectors use
5681  * even/odd elements instead of top/bottom half.
5682  *
5683  * If idx >= 0, operand 2 is indexed, otherwise vector.
5684  * If acc, operand 0 is loaded with rd.
5685  */
5686 
5687 /* For low half, iterating up. */
5688 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5689                             int rd, int rn, int rm, int idx,
5690                             NeonGenTwo64OpFn *fn, bool acc)
5691 {
5692     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5693     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5694     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5695     MemOp esz = memop & MO_SIZE;
5696     int half = 8 >> esz;
5697     int top_swap, top_half;
5698 
5699     /* There are no 64x64->128 bit operations. */
5700     if (esz >= MO_64) {
5701         return false;
5702     }
5703     if (!fp_access_check(s)) {
5704         return true;
5705     }
5706 
5707     if (idx >= 0) {
5708         read_vec_element(s, tcg_op2, rm, idx, memop);
5709     }
5710 
5711     /*
5712      * For top half inputs, iterate forward; backward for bottom half.
5713      * This means the store to the destination will not occur until
5714      * overlapping input inputs are consumed.
5715      * Use top_swap to conditionally invert the forward iteration index.
5716      */
5717     top_swap = top ? 0 : half - 1;
5718     top_half = top ? half : 0;
5719 
5720     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5721         int elt = elt_fwd ^ top_swap;
5722 
5723         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5724         if (idx < 0) {
5725             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5726         }
5727         if (acc) {
5728             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5729         }
5730         fn(tcg_op0, tcg_op1, tcg_op2);
5731         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5732     }
5733     clear_vec_high(s, 1, rd);
5734     return true;
5735 }
5736 
5737 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5738 {
5739     TCGv_i64 t = tcg_temp_new_i64();
5740     tcg_gen_mul_i64(t, n, m);
5741     tcg_gen_add_i64(d, d, t);
5742 }
5743 
5744 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5745 {
5746     TCGv_i64 t = tcg_temp_new_i64();
5747     tcg_gen_mul_i64(t, n, m);
5748     tcg_gen_sub_i64(d, d, t);
5749 }
5750 
5751 TRANS(SMULL_v, do_3op_widening,
5752       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5753       tcg_gen_mul_i64, false)
5754 TRANS(UMULL_v, do_3op_widening,
5755       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5756       tcg_gen_mul_i64, false)
5757 TRANS(SMLAL_v, do_3op_widening,
5758       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5759       gen_muladd_i64, true)
5760 TRANS(UMLAL_v, do_3op_widening,
5761       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5762       gen_muladd_i64, true)
5763 TRANS(SMLSL_v, do_3op_widening,
5764       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5765       gen_mulsub_i64, true)
5766 TRANS(UMLSL_v, do_3op_widening,
5767       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5768       gen_mulsub_i64, true)
5769 
5770 TRANS(SMULL_vi, do_3op_widening,
5771       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5772       tcg_gen_mul_i64, false)
5773 TRANS(UMULL_vi, do_3op_widening,
5774       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5775       tcg_gen_mul_i64, false)
5776 TRANS(SMLAL_vi, do_3op_widening,
5777       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5778       gen_muladd_i64, true)
5779 TRANS(UMLAL_vi, do_3op_widening,
5780       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5781       gen_muladd_i64, true)
5782 TRANS(SMLSL_vi, do_3op_widening,
5783       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5784       gen_mulsub_i64, true)
5785 TRANS(UMLSL_vi, do_3op_widening,
5786       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5787       gen_mulsub_i64, true)
5788 
5789 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5790 {
5791     TCGv_i64 t1 = tcg_temp_new_i64();
5792     TCGv_i64 t2 = tcg_temp_new_i64();
5793 
5794     tcg_gen_sub_i64(t1, n, m);
5795     tcg_gen_sub_i64(t2, m, n);
5796     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5797 }
5798 
5799 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5800 {
5801     TCGv_i64 t1 = tcg_temp_new_i64();
5802     TCGv_i64 t2 = tcg_temp_new_i64();
5803 
5804     tcg_gen_sub_i64(t1, n, m);
5805     tcg_gen_sub_i64(t2, m, n);
5806     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5807 }
5808 
5809 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5810 {
5811     TCGv_i64 t = tcg_temp_new_i64();
5812     gen_sabd_i64(t, n, m);
5813     tcg_gen_add_i64(d, d, t);
5814 }
5815 
5816 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5817 {
5818     TCGv_i64 t = tcg_temp_new_i64();
5819     gen_uabd_i64(t, n, m);
5820     tcg_gen_add_i64(d, d, t);
5821 }
5822 
5823 TRANS(SADDL_v, do_3op_widening,
5824       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5825       tcg_gen_add_i64, false)
5826 TRANS(UADDL_v, do_3op_widening,
5827       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5828       tcg_gen_add_i64, false)
5829 TRANS(SSUBL_v, do_3op_widening,
5830       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5831       tcg_gen_sub_i64, false)
5832 TRANS(USUBL_v, do_3op_widening,
5833       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5834       tcg_gen_sub_i64, false)
5835 TRANS(SABDL_v, do_3op_widening,
5836       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5837       gen_sabd_i64, false)
5838 TRANS(UABDL_v, do_3op_widening,
5839       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5840       gen_uabd_i64, false)
5841 TRANS(SABAL_v, do_3op_widening,
5842       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5843       gen_saba_i64, true)
5844 TRANS(UABAL_v, do_3op_widening,
5845       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5846       gen_uaba_i64, true)
5847 
5848 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5849 {
5850     tcg_gen_mul_i64(d, n, m);
5851     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
5852 }
5853 
5854 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5855 {
5856     tcg_gen_mul_i64(d, n, m);
5857     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
5858 }
5859 
5860 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5861 {
5862     TCGv_i64 t = tcg_temp_new_i64();
5863 
5864     tcg_gen_mul_i64(t, n, m);
5865     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5866     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5867 }
5868 
5869 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5870 {
5871     TCGv_i64 t = tcg_temp_new_i64();
5872 
5873     tcg_gen_mul_i64(t, n, m);
5874     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
5875     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
5876 }
5877 
5878 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5879 {
5880     TCGv_i64 t = tcg_temp_new_i64();
5881 
5882     tcg_gen_mul_i64(t, n, m);
5883     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5884     tcg_gen_neg_i64(t, t);
5885     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5886 }
5887 
5888 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5889 {
5890     TCGv_i64 t = tcg_temp_new_i64();
5891 
5892     tcg_gen_mul_i64(t, n, m);
5893     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
5894     tcg_gen_neg_i64(t, t);
5895     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
5896 }
5897 
5898 TRANS(SQDMULL_v, do_3op_widening,
5899       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5900       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
5901 TRANS(SQDMLAL_v, do_3op_widening,
5902       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5903       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
5904 TRANS(SQDMLSL_v, do_3op_widening,
5905       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5906       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
5907 
5908 TRANS(SQDMULL_vi, do_3op_widening,
5909       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5910       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
5911 TRANS(SQDMLAL_vi, do_3op_widening,
5912       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5913       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
5914 TRANS(SQDMLSL_vi, do_3op_widening,
5915       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5916       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
5917 
5918 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
5919                            MemOp sign, bool sub)
5920 {
5921     TCGv_i64 tcg_op0, tcg_op1;
5922     MemOp esz = a->esz;
5923     int half = 8 >> esz;
5924     bool top = a->q;
5925     int top_swap = top ? 0 : half - 1;
5926     int top_half = top ? half : 0;
5927 
5928     /* There are no 64x64->128 bit operations. */
5929     if (esz >= MO_64) {
5930         return false;
5931     }
5932     if (!fp_access_check(s)) {
5933         return true;
5934     }
5935     tcg_op0 = tcg_temp_new_i64();
5936     tcg_op1 = tcg_temp_new_i64();
5937 
5938     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5939         int elt = elt_fwd ^ top_swap;
5940 
5941         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
5942         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
5943         if (sub) {
5944             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
5945         } else {
5946             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
5947         }
5948         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
5949     }
5950     clear_vec_high(s, 1, a->rd);
5951     return true;
5952 }
5953 
5954 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
5955 TRANS(UADDW, do_addsub_wide, a, 0, false)
5956 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
5957 TRANS(USUBW, do_addsub_wide, a, 0, true)
5958 
5959 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
5960                                  bool sub, bool round)
5961 {
5962     TCGv_i64 tcg_op0, tcg_op1;
5963     MemOp esz = a->esz;
5964     int half = 8 >> esz;
5965     bool top = a->q;
5966     int ebits = 8 << esz;
5967     uint64_t rbit = 1ull << (ebits - 1);
5968     int top_swap, top_half;
5969 
5970     /* There are no 128x128->64 bit operations. */
5971     if (esz >= MO_64) {
5972         return false;
5973     }
5974     if (!fp_access_check(s)) {
5975         return true;
5976     }
5977     tcg_op0 = tcg_temp_new_i64();
5978     tcg_op1 = tcg_temp_new_i64();
5979 
5980     /*
5981      * For top half inputs, iterate backward; forward for bottom half.
5982      * This means the store to the destination will not occur until
5983      * overlapping input inputs are consumed.
5984      */
5985     top_swap = top ? half - 1 : 0;
5986     top_half = top ? half : 0;
5987 
5988     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5989         int elt = elt_fwd ^ top_swap;
5990 
5991         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
5992         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
5993         if (sub) {
5994             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
5995         } else {
5996             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
5997         }
5998         if (round) {
5999             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6000         }
6001         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6002         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6003     }
6004     clear_vec_high(s, top, a->rd);
6005     return true;
6006 }
6007 
6008 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6009 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6010 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6011 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6012 
6013 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6014 {
6015     if (fp_access_check(s)) {
6016         /* The Q field specifies lo/hi half input for these insns.  */
6017         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6018     }
6019     return true;
6020 }
6021 
6022 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6023 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6024 
6025 /*
6026  * Advanced SIMD scalar/vector x indexed element
6027  */
6028 
6029 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6030 {
6031     switch (a->esz) {
6032     case MO_64:
6033         if (fp_access_check(s)) {
6034             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6035             TCGv_i64 t1 = tcg_temp_new_i64();
6036 
6037             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6038             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6039             write_fp_dreg(s, a->rd, t0);
6040         }
6041         break;
6042     case MO_32:
6043         if (fp_access_check(s)) {
6044             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6045             TCGv_i32 t1 = tcg_temp_new_i32();
6046 
6047             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6048             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6049             write_fp_sreg(s, a->rd, t0);
6050         }
6051         break;
6052     case MO_16:
6053         if (!dc_isar_feature(aa64_fp16, s)) {
6054             return false;
6055         }
6056         if (fp_access_check(s)) {
6057             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6058             TCGv_i32 t1 = tcg_temp_new_i32();
6059 
6060             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6061             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6062             write_fp_sreg(s, a->rd, t0);
6063         }
6064         break;
6065     default:
6066         g_assert_not_reached();
6067     }
6068     return true;
6069 }
6070 
6071 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6072 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6073 
6074 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6075 {
6076     switch (a->esz) {
6077     case MO_64:
6078         if (fp_access_check(s)) {
6079             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6080             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6081             TCGv_i64 t2 = tcg_temp_new_i64();
6082 
6083             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6084             if (neg) {
6085                 gen_vfp_negd(t1, t1);
6086             }
6087             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6088             write_fp_dreg(s, a->rd, t0);
6089         }
6090         break;
6091     case MO_32:
6092         if (fp_access_check(s)) {
6093             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6094             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6095             TCGv_i32 t2 = tcg_temp_new_i32();
6096 
6097             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6098             if (neg) {
6099                 gen_vfp_negs(t1, t1);
6100             }
6101             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6102             write_fp_sreg(s, a->rd, t0);
6103         }
6104         break;
6105     case MO_16:
6106         if (!dc_isar_feature(aa64_fp16, s)) {
6107             return false;
6108         }
6109         if (fp_access_check(s)) {
6110             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6111             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6112             TCGv_i32 t2 = tcg_temp_new_i32();
6113 
6114             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6115             if (neg) {
6116                 gen_vfp_negh(t1, t1);
6117             }
6118             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6119                                        fpstatus_ptr(FPST_FPCR_F16));
6120             write_fp_sreg(s, a->rd, t0);
6121         }
6122         break;
6123     default:
6124         g_assert_not_reached();
6125     }
6126     return true;
6127 }
6128 
6129 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6130 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6131 
6132 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6133                                   const ENVScalar2 *f)
6134 {
6135     if (a->esz < MO_16 || a->esz > MO_32) {
6136         return false;
6137     }
6138     if (fp_access_check(s)) {
6139         TCGv_i32 t0 = tcg_temp_new_i32();
6140         TCGv_i32 t1 = tcg_temp_new_i32();
6141 
6142         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6143         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6144         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6145         write_fp_sreg(s, a->rd, t0);
6146     }
6147     return true;
6148 }
6149 
6150 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6151 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6152 
6153 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6154                                   const ENVScalar3 *f)
6155 {
6156     if (a->esz < MO_16 || a->esz > MO_32) {
6157         return false;
6158     }
6159     if (fp_access_check(s)) {
6160         TCGv_i32 t0 = tcg_temp_new_i32();
6161         TCGv_i32 t1 = tcg_temp_new_i32();
6162         TCGv_i32 t2 = tcg_temp_new_i32();
6163 
6164         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6165         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6166         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6167         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6168         write_fp_sreg(s, a->rd, t0);
6169     }
6170     return true;
6171 }
6172 
6173 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6174 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6175 
6176 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6177                                           NeonGenTwo64OpFn *fn, bool acc)
6178 {
6179     if (fp_access_check(s)) {
6180         TCGv_i64 t0 = tcg_temp_new_i64();
6181         TCGv_i64 t1 = tcg_temp_new_i64();
6182         TCGv_i64 t2 = tcg_temp_new_i64();
6183         unsigned vsz, dofs;
6184 
6185         if (acc) {
6186             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6187         }
6188         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6189         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6190         fn(t0, t1, t2);
6191 
6192         /* Clear the whole register first, then store scalar. */
6193         vsz = vec_full_reg_size(s);
6194         dofs = vec_full_reg_offset(s, a->rd);
6195         tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
6196         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6197     }
6198     return true;
6199 }
6200 
6201 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6202       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6203 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6204       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6205 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6206       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6207 
6208 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6209                               gen_helper_gvec_3_ptr * const fns[3])
6210 {
6211     MemOp esz = a->esz;
6212 
6213     switch (esz) {
6214     case MO_64:
6215         if (!a->q) {
6216             return false;
6217         }
6218         break;
6219     case MO_32:
6220         break;
6221     case MO_16:
6222         if (!dc_isar_feature(aa64_fp16, s)) {
6223             return false;
6224         }
6225         break;
6226     default:
6227         g_assert_not_reached();
6228     }
6229     if (fp_access_check(s)) {
6230         gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6231                           esz == MO_16, a->idx, fns[esz - 1]);
6232     }
6233     return true;
6234 }
6235 
6236 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6237     gen_helper_gvec_fmul_idx_h,
6238     gen_helper_gvec_fmul_idx_s,
6239     gen_helper_gvec_fmul_idx_d,
6240 };
6241 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6242 
6243 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6244     gen_helper_gvec_fmulx_idx_h,
6245     gen_helper_gvec_fmulx_idx_s,
6246     gen_helper_gvec_fmulx_idx_d,
6247 };
6248 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6249 
6250 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6251 {
6252     static gen_helper_gvec_4_ptr * const fns[3] = {
6253         gen_helper_gvec_fmla_idx_h,
6254         gen_helper_gvec_fmla_idx_s,
6255         gen_helper_gvec_fmla_idx_d,
6256     };
6257     MemOp esz = a->esz;
6258 
6259     switch (esz) {
6260     case MO_64:
6261         if (!a->q) {
6262             return false;
6263         }
6264         break;
6265     case MO_32:
6266         break;
6267     case MO_16:
6268         if (!dc_isar_feature(aa64_fp16, s)) {
6269             return false;
6270         }
6271         break;
6272     default:
6273         g_assert_not_reached();
6274     }
6275     if (fp_access_check(s)) {
6276         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6277                           esz == MO_16, (a->idx << 1) | neg,
6278                           fns[esz - 1]);
6279     }
6280     return true;
6281 }
6282 
6283 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6284 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6285 
6286 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6287 {
6288     if (fp_access_check(s)) {
6289         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6290         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6291                            vec_full_reg_offset(s, a->rn),
6292                            vec_full_reg_offset(s, a->rm), tcg_env,
6293                            a->q ? 16 : 8, vec_full_reg_size(s),
6294                            data, gen_helper_gvec_fmlal_idx_a64);
6295     }
6296     return true;
6297 }
6298 
6299 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6300 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6301 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6302 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6303 
6304 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6305                                gen_helper_gvec_3 * const fns[2])
6306 {
6307     assert(a->esz == MO_16 || a->esz == MO_32);
6308     if (fp_access_check(s)) {
6309         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6310     }
6311     return true;
6312 }
6313 
6314 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6315     gen_helper_gvec_mul_idx_h,
6316     gen_helper_gvec_mul_idx_s,
6317 };
6318 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6319 
6320 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6321 {
6322     static gen_helper_gvec_4 * const fns[2][2] = {
6323         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6324         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6325     };
6326 
6327     assert(a->esz == MO_16 || a->esz == MO_32);
6328     if (fp_access_check(s)) {
6329         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6330                          a->idx, fns[a->esz - 1][sub]);
6331     }
6332     return true;
6333 }
6334 
6335 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6336 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6337 
6338 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6339                                   gen_helper_gvec_4 * const fns[2])
6340 {
6341     assert(a->esz == MO_16 || a->esz == MO_32);
6342     if (fp_access_check(s)) {
6343         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6344                            vec_full_reg_offset(s, a->rn),
6345                            vec_full_reg_offset(s, a->rm),
6346                            offsetof(CPUARMState, vfp.qc),
6347                            a->q ? 16 : 8, vec_full_reg_size(s),
6348                            a->idx, fns[a->esz - 1]);
6349     }
6350     return true;
6351 }
6352 
6353 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6354     gen_helper_neon_sqdmulh_idx_h,
6355     gen_helper_neon_sqdmulh_idx_s,
6356 };
6357 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6358 
6359 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6360     gen_helper_neon_sqrdmulh_idx_h,
6361     gen_helper_neon_sqrdmulh_idx_s,
6362 };
6363 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6364 
6365 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6366     gen_helper_neon_sqrdmlah_idx_h,
6367     gen_helper_neon_sqrdmlah_idx_s,
6368 };
6369 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6370            f_vector_idx_sqrdmlah)
6371 
6372 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6373     gen_helper_neon_sqrdmlsh_idx_h,
6374     gen_helper_neon_sqrdmlsh_idx_s,
6375 };
6376 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6377            f_vector_idx_sqrdmlsh)
6378 
6379 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6380                               gen_helper_gvec_4 *fn)
6381 {
6382     if (fp_access_check(s)) {
6383         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6384     }
6385     return true;
6386 }
6387 
6388 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6389 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6390 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6391            gen_helper_gvec_sudot_idx_b)
6392 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6393            gen_helper_gvec_usdot_idx_b)
6394 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx, a,
6395            gen_helper_gvec_bfdot_idx)
6396 
6397 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6398 {
6399     if (!dc_isar_feature(aa64_bf16, s)) {
6400         return false;
6401     }
6402     if (fp_access_check(s)) {
6403         /* Q bit selects BFMLALB vs BFMLALT. */
6404         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6405                           (a->idx << 1) | a->q,
6406                           gen_helper_gvec_bfmlal_idx);
6407     }
6408     return true;
6409 }
6410 
6411 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6412 {
6413     gen_helper_gvec_4_ptr *fn;
6414 
6415     if (!dc_isar_feature(aa64_fcma, s)) {
6416         return false;
6417     }
6418     switch (a->esz) {
6419     case MO_16:
6420         if (!dc_isar_feature(aa64_fp16, s)) {
6421             return false;
6422         }
6423         fn = gen_helper_gvec_fcmlah_idx;
6424         break;
6425     case MO_32:
6426         fn = gen_helper_gvec_fcmlas_idx;
6427         break;
6428     default:
6429         g_assert_not_reached();
6430     }
6431     if (fp_access_check(s)) {
6432         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6433                           a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6434     }
6435     return true;
6436 }
6437 
6438 /*
6439  * Advanced SIMD scalar pairwise
6440  */
6441 
6442 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6443 {
6444     switch (a->esz) {
6445     case MO_64:
6446         if (fp_access_check(s)) {
6447             TCGv_i64 t0 = tcg_temp_new_i64();
6448             TCGv_i64 t1 = tcg_temp_new_i64();
6449 
6450             read_vec_element(s, t0, a->rn, 0, MO_64);
6451             read_vec_element(s, t1, a->rn, 1, MO_64);
6452             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6453             write_fp_dreg(s, a->rd, t0);
6454         }
6455         break;
6456     case MO_32:
6457         if (fp_access_check(s)) {
6458             TCGv_i32 t0 = tcg_temp_new_i32();
6459             TCGv_i32 t1 = tcg_temp_new_i32();
6460 
6461             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6462             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6463             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6464             write_fp_sreg(s, a->rd, t0);
6465         }
6466         break;
6467     case MO_16:
6468         if (!dc_isar_feature(aa64_fp16, s)) {
6469             return false;
6470         }
6471         if (fp_access_check(s)) {
6472             TCGv_i32 t0 = tcg_temp_new_i32();
6473             TCGv_i32 t1 = tcg_temp_new_i32();
6474 
6475             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6476             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6477             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6478             write_fp_sreg(s, a->rd, t0);
6479         }
6480         break;
6481     default:
6482         g_assert_not_reached();
6483     }
6484     return true;
6485 }
6486 
6487 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6488 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6489 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6490 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6491 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6492 
6493 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6494 {
6495     if (fp_access_check(s)) {
6496         TCGv_i64 t0 = tcg_temp_new_i64();
6497         TCGv_i64 t1 = tcg_temp_new_i64();
6498 
6499         read_vec_element(s, t0, a->rn, 0, MO_64);
6500         read_vec_element(s, t1, a->rn, 1, MO_64);
6501         tcg_gen_add_i64(t0, t0, t1);
6502         write_fp_dreg(s, a->rd, t0);
6503     }
6504     return true;
6505 }
6506 
6507 /*
6508  * Floating-point conditional select
6509  */
6510 
6511 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6512 {
6513     TCGv_i64 t_true, t_false;
6514     DisasCompare64 c;
6515 
6516     switch (a->esz) {
6517     case MO_32:
6518     case MO_64:
6519         break;
6520     case MO_16:
6521         if (!dc_isar_feature(aa64_fp16, s)) {
6522             return false;
6523         }
6524         break;
6525     default:
6526         return false;
6527     }
6528 
6529     if (!fp_access_check(s)) {
6530         return true;
6531     }
6532 
6533     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6534     t_true = tcg_temp_new_i64();
6535     t_false = tcg_temp_new_i64();
6536     read_vec_element(s, t_true, a->rn, 0, a->esz);
6537     read_vec_element(s, t_false, a->rm, 0, a->esz);
6538 
6539     a64_test_cc(&c, a->cond);
6540     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6541                         t_true, t_false);
6542 
6543     /*
6544      * Note that sregs & hregs write back zeros to the high bits,
6545      * and we've already done the zero-extension.
6546      */
6547     write_fp_dreg(s, a->rd, t_true);
6548     return true;
6549 }
6550 
6551 /*
6552  * Floating-point data-processing (3 source)
6553  */
6554 
6555 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6556 {
6557     TCGv_ptr fpst;
6558 
6559     /*
6560      * These are fused multiply-add.  Note that doing the negations here
6561      * as separate steps is correct: an input NaN should come out with
6562      * its sign bit flipped if it is a negated-input.
6563      */
6564     switch (a->esz) {
6565     case MO_64:
6566         if (fp_access_check(s)) {
6567             TCGv_i64 tn = read_fp_dreg(s, a->rn);
6568             TCGv_i64 tm = read_fp_dreg(s, a->rm);
6569             TCGv_i64 ta = read_fp_dreg(s, a->ra);
6570 
6571             if (neg_a) {
6572                 gen_vfp_negd(ta, ta);
6573             }
6574             if (neg_n) {
6575                 gen_vfp_negd(tn, tn);
6576             }
6577             fpst = fpstatus_ptr(FPST_FPCR);
6578             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6579             write_fp_dreg(s, a->rd, ta);
6580         }
6581         break;
6582 
6583     case MO_32:
6584         if (fp_access_check(s)) {
6585             TCGv_i32 tn = read_fp_sreg(s, a->rn);
6586             TCGv_i32 tm = read_fp_sreg(s, a->rm);
6587             TCGv_i32 ta = read_fp_sreg(s, a->ra);
6588 
6589             if (neg_a) {
6590                 gen_vfp_negs(ta, ta);
6591             }
6592             if (neg_n) {
6593                 gen_vfp_negs(tn, tn);
6594             }
6595             fpst = fpstatus_ptr(FPST_FPCR);
6596             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6597             write_fp_sreg(s, a->rd, ta);
6598         }
6599         break;
6600 
6601     case MO_16:
6602         if (!dc_isar_feature(aa64_fp16, s)) {
6603             return false;
6604         }
6605         if (fp_access_check(s)) {
6606             TCGv_i32 tn = read_fp_hreg(s, a->rn);
6607             TCGv_i32 tm = read_fp_hreg(s, a->rm);
6608             TCGv_i32 ta = read_fp_hreg(s, a->ra);
6609 
6610             if (neg_a) {
6611                 gen_vfp_negh(ta, ta);
6612             }
6613             if (neg_n) {
6614                 gen_vfp_negh(tn, tn);
6615             }
6616             fpst = fpstatus_ptr(FPST_FPCR_F16);
6617             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6618             write_fp_sreg(s, a->rd, ta);
6619         }
6620         break;
6621 
6622     default:
6623         return false;
6624     }
6625     return true;
6626 }
6627 
6628 TRANS(FMADD, do_fmadd, a, false, false)
6629 TRANS(FNMADD, do_fmadd, a, true, true)
6630 TRANS(FMSUB, do_fmadd, a, false, true)
6631 TRANS(FNMSUB, do_fmadd, a, true, false)
6632 
6633 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
6634  * Note that it is the caller's responsibility to ensure that the
6635  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
6636  * mandated semantics for out of range shifts.
6637  */
6638 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
6639                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
6640 {
6641     switch (shift_type) {
6642     case A64_SHIFT_TYPE_LSL:
6643         tcg_gen_shl_i64(dst, src, shift_amount);
6644         break;
6645     case A64_SHIFT_TYPE_LSR:
6646         tcg_gen_shr_i64(dst, src, shift_amount);
6647         break;
6648     case A64_SHIFT_TYPE_ASR:
6649         if (!sf) {
6650             tcg_gen_ext32s_i64(dst, src);
6651         }
6652         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
6653         break;
6654     case A64_SHIFT_TYPE_ROR:
6655         if (sf) {
6656             tcg_gen_rotr_i64(dst, src, shift_amount);
6657         } else {
6658             TCGv_i32 t0, t1;
6659             t0 = tcg_temp_new_i32();
6660             t1 = tcg_temp_new_i32();
6661             tcg_gen_extrl_i64_i32(t0, src);
6662             tcg_gen_extrl_i64_i32(t1, shift_amount);
6663             tcg_gen_rotr_i32(t0, t0, t1);
6664             tcg_gen_extu_i32_i64(dst, t0);
6665         }
6666         break;
6667     default:
6668         assert(FALSE); /* all shift types should be handled */
6669         break;
6670     }
6671 
6672     if (!sf) { /* zero extend final result */
6673         tcg_gen_ext32u_i64(dst, dst);
6674     }
6675 }
6676 
6677 /* Shift a TCGv src by immediate, put result in dst.
6678  * The shift amount must be in range (this should always be true as the
6679  * relevant instructions will UNDEF on bad shift immediates).
6680  */
6681 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
6682                           enum a64_shift_type shift_type, unsigned int shift_i)
6683 {
6684     assert(shift_i < (sf ? 64 : 32));
6685 
6686     if (shift_i == 0) {
6687         tcg_gen_mov_i64(dst, src);
6688     } else {
6689         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
6690     }
6691 }
6692 
6693 /* Logical (shifted register)
6694  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
6695  * +----+-----+-----------+-------+---+------+--------+------+------+
6696  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
6697  * +----+-----+-----------+-------+---+------+--------+------+------+
6698  */
6699 static void disas_logic_reg(DisasContext *s, uint32_t insn)
6700 {
6701     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
6702     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
6703 
6704     sf = extract32(insn, 31, 1);
6705     opc = extract32(insn, 29, 2);
6706     shift_type = extract32(insn, 22, 2);
6707     invert = extract32(insn, 21, 1);
6708     rm = extract32(insn, 16, 5);
6709     shift_amount = extract32(insn, 10, 6);
6710     rn = extract32(insn, 5, 5);
6711     rd = extract32(insn, 0, 5);
6712 
6713     if (!sf && (shift_amount & (1 << 5))) {
6714         unallocated_encoding(s);
6715         return;
6716     }
6717 
6718     tcg_rd = cpu_reg(s, rd);
6719 
6720     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
6721         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
6722          * register-register MOV and MVN, so it is worth special casing.
6723          */
6724         tcg_rm = cpu_reg(s, rm);
6725         if (invert) {
6726             tcg_gen_not_i64(tcg_rd, tcg_rm);
6727             if (!sf) {
6728                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6729             }
6730         } else {
6731             if (sf) {
6732                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
6733             } else {
6734                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
6735             }
6736         }
6737         return;
6738     }
6739 
6740     tcg_rm = read_cpu_reg(s, rm, sf);
6741 
6742     if (shift_amount) {
6743         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
6744     }
6745 
6746     tcg_rn = cpu_reg(s, rn);
6747 
6748     switch (opc | (invert << 2)) {
6749     case 0: /* AND */
6750     case 3: /* ANDS */
6751         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6752         break;
6753     case 1: /* ORR */
6754         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
6755         break;
6756     case 2: /* EOR */
6757         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
6758         break;
6759     case 4: /* BIC */
6760     case 7: /* BICS */
6761         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
6762         break;
6763     case 5: /* ORN */
6764         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
6765         break;
6766     case 6: /* EON */
6767         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
6768         break;
6769     default:
6770         assert(FALSE);
6771         break;
6772     }
6773 
6774     if (!sf) {
6775         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6776     }
6777 
6778     if (opc == 3) {
6779         gen_logic_CC(sf, tcg_rd);
6780     }
6781 }
6782 
6783 /*
6784  * Add/subtract (extended register)
6785  *
6786  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
6787  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
6788  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
6789  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
6790  *
6791  *  sf: 0 -> 32bit, 1 -> 64bit
6792  *  op: 0 -> add  , 1 -> sub
6793  *   S: 1 -> set flags
6794  * opt: 00
6795  * option: extension type (see DecodeRegExtend)
6796  * imm3: optional shift to Rm
6797  *
6798  * Rd = Rn + LSL(extend(Rm), amount)
6799  */
6800 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
6801 {
6802     int rd = extract32(insn, 0, 5);
6803     int rn = extract32(insn, 5, 5);
6804     int imm3 = extract32(insn, 10, 3);
6805     int option = extract32(insn, 13, 3);
6806     int rm = extract32(insn, 16, 5);
6807     int opt = extract32(insn, 22, 2);
6808     bool setflags = extract32(insn, 29, 1);
6809     bool sub_op = extract32(insn, 30, 1);
6810     bool sf = extract32(insn, 31, 1);
6811 
6812     TCGv_i64 tcg_rm, tcg_rn; /* temps */
6813     TCGv_i64 tcg_rd;
6814     TCGv_i64 tcg_result;
6815 
6816     if (imm3 > 4 || opt != 0) {
6817         unallocated_encoding(s);
6818         return;
6819     }
6820 
6821     /* non-flag setting ops may use SP */
6822     if (!setflags) {
6823         tcg_rd = cpu_reg_sp(s, rd);
6824     } else {
6825         tcg_rd = cpu_reg(s, rd);
6826     }
6827     tcg_rn = read_cpu_reg_sp(s, rn, sf);
6828 
6829     tcg_rm = read_cpu_reg(s, rm, sf);
6830     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
6831 
6832     tcg_result = tcg_temp_new_i64();
6833 
6834     if (!setflags) {
6835         if (sub_op) {
6836             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
6837         } else {
6838             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
6839         }
6840     } else {
6841         if (sub_op) {
6842             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
6843         } else {
6844             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
6845         }
6846     }
6847 
6848     if (sf) {
6849         tcg_gen_mov_i64(tcg_rd, tcg_result);
6850     } else {
6851         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
6852     }
6853 }
6854 
6855 /*
6856  * Add/subtract (shifted register)
6857  *
6858  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
6859  * +--+--+--+-----------+-----+--+-------+---------+------+------+
6860  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
6861  * +--+--+--+-----------+-----+--+-------+---------+------+------+
6862  *
6863  *    sf: 0 -> 32bit, 1 -> 64bit
6864  *    op: 0 -> add  , 1 -> sub
6865  *     S: 1 -> set flags
6866  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
6867  *  imm6: Shift amount to apply to Rm before the add/sub
6868  */
6869 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
6870 {
6871     int rd = extract32(insn, 0, 5);
6872     int rn = extract32(insn, 5, 5);
6873     int imm6 = extract32(insn, 10, 6);
6874     int rm = extract32(insn, 16, 5);
6875     int shift_type = extract32(insn, 22, 2);
6876     bool setflags = extract32(insn, 29, 1);
6877     bool sub_op = extract32(insn, 30, 1);
6878     bool sf = extract32(insn, 31, 1);
6879 
6880     TCGv_i64 tcg_rd = cpu_reg(s, rd);
6881     TCGv_i64 tcg_rn, tcg_rm;
6882     TCGv_i64 tcg_result;
6883 
6884     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
6885         unallocated_encoding(s);
6886         return;
6887     }
6888 
6889     tcg_rn = read_cpu_reg(s, rn, sf);
6890     tcg_rm = read_cpu_reg(s, rm, sf);
6891 
6892     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
6893 
6894     tcg_result = tcg_temp_new_i64();
6895 
6896     if (!setflags) {
6897         if (sub_op) {
6898             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
6899         } else {
6900             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
6901         }
6902     } else {
6903         if (sub_op) {
6904             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
6905         } else {
6906             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
6907         }
6908     }
6909 
6910     if (sf) {
6911         tcg_gen_mov_i64(tcg_rd, tcg_result);
6912     } else {
6913         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
6914     }
6915 }
6916 
6917 /* Data-processing (3 source)
6918  *
6919  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
6920  *  +--+------+-----------+------+------+----+------+------+------+
6921  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6922  *  +--+------+-----------+------+------+----+------+------+------+
6923  */
6924 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
6925 {
6926     int rd = extract32(insn, 0, 5);
6927     int rn = extract32(insn, 5, 5);
6928     int ra = extract32(insn, 10, 5);
6929     int rm = extract32(insn, 16, 5);
6930     int op_id = (extract32(insn, 29, 3) << 4) |
6931         (extract32(insn, 21, 3) << 1) |
6932         extract32(insn, 15, 1);
6933     bool sf = extract32(insn, 31, 1);
6934     bool is_sub = extract32(op_id, 0, 1);
6935     bool is_high = extract32(op_id, 2, 1);
6936     bool is_signed = false;
6937     TCGv_i64 tcg_op1;
6938     TCGv_i64 tcg_op2;
6939     TCGv_i64 tcg_tmp;
6940 
6941     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
6942     switch (op_id) {
6943     case 0x42: /* SMADDL */
6944     case 0x43: /* SMSUBL */
6945     case 0x44: /* SMULH */
6946         is_signed = true;
6947         break;
6948     case 0x0: /* MADD (32bit) */
6949     case 0x1: /* MSUB (32bit) */
6950     case 0x40: /* MADD (64bit) */
6951     case 0x41: /* MSUB (64bit) */
6952     case 0x4a: /* UMADDL */
6953     case 0x4b: /* UMSUBL */
6954     case 0x4c: /* UMULH */
6955         break;
6956     default:
6957         unallocated_encoding(s);
6958         return;
6959     }
6960 
6961     if (is_high) {
6962         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
6963         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6964         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6965         TCGv_i64 tcg_rm = cpu_reg(s, rm);
6966 
6967         if (is_signed) {
6968             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
6969         } else {
6970             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
6971         }
6972         return;
6973     }
6974 
6975     tcg_op1 = tcg_temp_new_i64();
6976     tcg_op2 = tcg_temp_new_i64();
6977     tcg_tmp = tcg_temp_new_i64();
6978 
6979     if (op_id < 0x42) {
6980         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
6981         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
6982     } else {
6983         if (is_signed) {
6984             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
6985             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
6986         } else {
6987             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
6988             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
6989         }
6990     }
6991 
6992     if (ra == 31 && !is_sub) {
6993         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
6994         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
6995     } else {
6996         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
6997         if (is_sub) {
6998             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
6999         } else {
7000             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7001         }
7002     }
7003 
7004     if (!sf) {
7005         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
7006     }
7007 }
7008 
7009 /* Add/subtract (with carry)
7010  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
7011  * +--+--+--+------------------------+------+-------------+------+-----+
7012  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
7013  * +--+--+--+------------------------+------+-------------+------+-----+
7014  */
7015 
7016 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
7017 {
7018     unsigned int sf, op, setflags, rm, rn, rd;
7019     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
7020 
7021     sf = extract32(insn, 31, 1);
7022     op = extract32(insn, 30, 1);
7023     setflags = extract32(insn, 29, 1);
7024     rm = extract32(insn, 16, 5);
7025     rn = extract32(insn, 5, 5);
7026     rd = extract32(insn, 0, 5);
7027 
7028     tcg_rd = cpu_reg(s, rd);
7029     tcg_rn = cpu_reg(s, rn);
7030 
7031     if (op) {
7032         tcg_y = tcg_temp_new_i64();
7033         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
7034     } else {
7035         tcg_y = cpu_reg(s, rm);
7036     }
7037 
7038     if (setflags) {
7039         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
7040     } else {
7041         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
7042     }
7043 }
7044 
7045 /*
7046  * Rotate right into flags
7047  *  31 30 29                21       15          10      5  4      0
7048  * +--+--+--+-----------------+--------+-----------+------+--+------+
7049  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
7050  * +--+--+--+-----------------+--------+-----------+------+--+------+
7051  */
7052 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
7053 {
7054     int mask = extract32(insn, 0, 4);
7055     int o2 = extract32(insn, 4, 1);
7056     int rn = extract32(insn, 5, 5);
7057     int imm6 = extract32(insn, 15, 6);
7058     int sf_op_s = extract32(insn, 29, 3);
7059     TCGv_i64 tcg_rn;
7060     TCGv_i32 nzcv;
7061 
7062     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
7063         unallocated_encoding(s);
7064         return;
7065     }
7066 
7067     tcg_rn = read_cpu_reg(s, rn, 1);
7068     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
7069 
7070     nzcv = tcg_temp_new_i32();
7071     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
7072 
7073     if (mask & 8) { /* N */
7074         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
7075     }
7076     if (mask & 4) { /* Z */
7077         tcg_gen_not_i32(cpu_ZF, nzcv);
7078         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
7079     }
7080     if (mask & 2) { /* C */
7081         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
7082     }
7083     if (mask & 1) { /* V */
7084         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
7085     }
7086 }
7087 
7088 /*
7089  * Evaluate into flags
7090  *  31 30 29                21        15   14        10      5  4      0
7091  * +--+--+--+-----------------+---------+----+---------+------+--+------+
7092  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
7093  * +--+--+--+-----------------+---------+----+---------+------+--+------+
7094  */
7095 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
7096 {
7097     int o3_mask = extract32(insn, 0, 5);
7098     int rn = extract32(insn, 5, 5);
7099     int o2 = extract32(insn, 15, 6);
7100     int sz = extract32(insn, 14, 1);
7101     int sf_op_s = extract32(insn, 29, 3);
7102     TCGv_i32 tmp;
7103     int shift;
7104 
7105     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
7106         !dc_isar_feature(aa64_condm_4, s)) {
7107         unallocated_encoding(s);
7108         return;
7109     }
7110     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
7111 
7112     tmp = tcg_temp_new_i32();
7113     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
7114     tcg_gen_shli_i32(cpu_NF, tmp, shift);
7115     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
7116     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
7117     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
7118 }
7119 
7120 /* Conditional compare (immediate / register)
7121  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
7122  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7123  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
7124  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7125  *        [1]                             y                [0]       [0]
7126  */
7127 static void disas_cc(DisasContext *s, uint32_t insn)
7128 {
7129     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
7130     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
7131     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
7132     DisasCompare c;
7133 
7134     if (!extract32(insn, 29, 1)) {
7135         unallocated_encoding(s);
7136         return;
7137     }
7138     if (insn & (1 << 10 | 1 << 4)) {
7139         unallocated_encoding(s);
7140         return;
7141     }
7142     sf = extract32(insn, 31, 1);
7143     op = extract32(insn, 30, 1);
7144     is_imm = extract32(insn, 11, 1);
7145     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
7146     cond = extract32(insn, 12, 4);
7147     rn = extract32(insn, 5, 5);
7148     nzcv = extract32(insn, 0, 4);
7149 
7150     /* Set T0 = !COND.  */
7151     tcg_t0 = tcg_temp_new_i32();
7152     arm_test_cc(&c, cond);
7153     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
7154 
7155     /* Load the arguments for the new comparison.  */
7156     if (is_imm) {
7157         tcg_y = tcg_temp_new_i64();
7158         tcg_gen_movi_i64(tcg_y, y);
7159     } else {
7160         tcg_y = cpu_reg(s, y);
7161     }
7162     tcg_rn = cpu_reg(s, rn);
7163 
7164     /* Set the flags for the new comparison.  */
7165     tcg_tmp = tcg_temp_new_i64();
7166     if (op) {
7167         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
7168     } else {
7169         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
7170     }
7171 
7172     /* If COND was false, force the flags to #nzcv.  Compute two masks
7173      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
7174      * For tcg hosts that support ANDC, we can make do with just T1.
7175      * In either case, allow the tcg optimizer to delete any unused mask.
7176      */
7177     tcg_t1 = tcg_temp_new_i32();
7178     tcg_t2 = tcg_temp_new_i32();
7179     tcg_gen_neg_i32(tcg_t1, tcg_t0);
7180     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
7181 
7182     if (nzcv & 8) { /* N */
7183         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
7184     } else {
7185         if (TCG_TARGET_HAS_andc_i32) {
7186             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
7187         } else {
7188             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
7189         }
7190     }
7191     if (nzcv & 4) { /* Z */
7192         if (TCG_TARGET_HAS_andc_i32) {
7193             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
7194         } else {
7195             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
7196         }
7197     } else {
7198         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
7199     }
7200     if (nzcv & 2) { /* C */
7201         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
7202     } else {
7203         if (TCG_TARGET_HAS_andc_i32) {
7204             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
7205         } else {
7206             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
7207         }
7208     }
7209     if (nzcv & 1) { /* V */
7210         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
7211     } else {
7212         if (TCG_TARGET_HAS_andc_i32) {
7213             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
7214         } else {
7215             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
7216         }
7217     }
7218 }
7219 
7220 /* Conditional select
7221  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
7222  * +----+----+---+-----------------+------+------+-----+------+------+
7223  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
7224  * +----+----+---+-----------------+------+------+-----+------+------+
7225  */
7226 static void disas_cond_select(DisasContext *s, uint32_t insn)
7227 {
7228     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
7229     TCGv_i64 tcg_rd, zero;
7230     DisasCompare64 c;
7231 
7232     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
7233         /* S == 1 or op2<1> == 1 */
7234         unallocated_encoding(s);
7235         return;
7236     }
7237     sf = extract32(insn, 31, 1);
7238     else_inv = extract32(insn, 30, 1);
7239     rm = extract32(insn, 16, 5);
7240     cond = extract32(insn, 12, 4);
7241     else_inc = extract32(insn, 10, 1);
7242     rn = extract32(insn, 5, 5);
7243     rd = extract32(insn, 0, 5);
7244 
7245     tcg_rd = cpu_reg(s, rd);
7246 
7247     a64_test_cc(&c, cond);
7248     zero = tcg_constant_i64(0);
7249 
7250     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
7251         /* CSET & CSETM.  */
7252         if (else_inv) {
7253             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
7254                                    tcg_rd, c.value, zero);
7255         } else {
7256             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
7257                                 tcg_rd, c.value, zero);
7258         }
7259     } else {
7260         TCGv_i64 t_true = cpu_reg(s, rn);
7261         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
7262         if (else_inv && else_inc) {
7263             tcg_gen_neg_i64(t_false, t_false);
7264         } else if (else_inv) {
7265             tcg_gen_not_i64(t_false, t_false);
7266         } else if (else_inc) {
7267             tcg_gen_addi_i64(t_false, t_false, 1);
7268         }
7269         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
7270     }
7271 
7272     if (!sf) {
7273         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7274     }
7275 }
7276 
7277 static void handle_clz(DisasContext *s, unsigned int sf,
7278                        unsigned int rn, unsigned int rd)
7279 {
7280     TCGv_i64 tcg_rd, tcg_rn;
7281     tcg_rd = cpu_reg(s, rd);
7282     tcg_rn = cpu_reg(s, rn);
7283 
7284     if (sf) {
7285         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7286     } else {
7287         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
7288         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
7289         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
7290         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
7291     }
7292 }
7293 
7294 static void handle_cls(DisasContext *s, unsigned int sf,
7295                        unsigned int rn, unsigned int rd)
7296 {
7297     TCGv_i64 tcg_rd, tcg_rn;
7298     tcg_rd = cpu_reg(s, rd);
7299     tcg_rn = cpu_reg(s, rn);
7300 
7301     if (sf) {
7302         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
7303     } else {
7304         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
7305         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
7306         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
7307         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
7308     }
7309 }
7310 
7311 static void handle_rbit(DisasContext *s, unsigned int sf,
7312                         unsigned int rn, unsigned int rd)
7313 {
7314     TCGv_i64 tcg_rd, tcg_rn;
7315     tcg_rd = cpu_reg(s, rd);
7316     tcg_rn = cpu_reg(s, rn);
7317 
7318     if (sf) {
7319         gen_helper_rbit64(tcg_rd, tcg_rn);
7320     } else {
7321         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
7322         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
7323         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
7324         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
7325     }
7326 }
7327 
7328 /* REV with sf==1, opcode==3 ("REV64") */
7329 static void handle_rev64(DisasContext *s, unsigned int sf,
7330                          unsigned int rn, unsigned int rd)
7331 {
7332     if (!sf) {
7333         unallocated_encoding(s);
7334         return;
7335     }
7336     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
7337 }
7338 
7339 /* REV with sf==0, opcode==2
7340  * REV32 (sf==1, opcode==2)
7341  */
7342 static void handle_rev32(DisasContext *s, unsigned int sf,
7343                          unsigned int rn, unsigned int rd)
7344 {
7345     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7346     TCGv_i64 tcg_rn = cpu_reg(s, rn);
7347 
7348     if (sf) {
7349         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
7350         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
7351     } else {
7352         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
7353     }
7354 }
7355 
7356 /* REV16 (opcode==1) */
7357 static void handle_rev16(DisasContext *s, unsigned int sf,
7358                          unsigned int rn, unsigned int rd)
7359 {
7360     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7361     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7362     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
7363     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
7364 
7365     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
7366     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
7367     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
7368     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
7369     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
7370 }
7371 
7372 /* Data-processing (1 source)
7373  *   31  30  29  28             21 20     16 15    10 9    5 4    0
7374  * +----+---+---+-----------------+---------+--------+------+------+
7375  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
7376  * +----+---+---+-----------------+---------+--------+------+------+
7377  */
7378 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
7379 {
7380     unsigned int sf, opcode, opcode2, rn, rd;
7381     TCGv_i64 tcg_rd;
7382 
7383     if (extract32(insn, 29, 1)) {
7384         unallocated_encoding(s);
7385         return;
7386     }
7387 
7388     sf = extract32(insn, 31, 1);
7389     opcode = extract32(insn, 10, 6);
7390     opcode2 = extract32(insn, 16, 5);
7391     rn = extract32(insn, 5, 5);
7392     rd = extract32(insn, 0, 5);
7393 
7394 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
7395 
7396     switch (MAP(sf, opcode2, opcode)) {
7397     case MAP(0, 0x00, 0x00): /* RBIT */
7398     case MAP(1, 0x00, 0x00):
7399         handle_rbit(s, sf, rn, rd);
7400         break;
7401     case MAP(0, 0x00, 0x01): /* REV16 */
7402     case MAP(1, 0x00, 0x01):
7403         handle_rev16(s, sf, rn, rd);
7404         break;
7405     case MAP(0, 0x00, 0x02): /* REV/REV32 */
7406     case MAP(1, 0x00, 0x02):
7407         handle_rev32(s, sf, rn, rd);
7408         break;
7409     case MAP(1, 0x00, 0x03): /* REV64 */
7410         handle_rev64(s, sf, rn, rd);
7411         break;
7412     case MAP(0, 0x00, 0x04): /* CLZ */
7413     case MAP(1, 0x00, 0x04):
7414         handle_clz(s, sf, rn, rd);
7415         break;
7416     case MAP(0, 0x00, 0x05): /* CLS */
7417     case MAP(1, 0x00, 0x05):
7418         handle_cls(s, sf, rn, rd);
7419         break;
7420     case MAP(1, 0x01, 0x00): /* PACIA */
7421         if (s->pauth_active) {
7422             tcg_rd = cpu_reg(s, rd);
7423             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7424         } else if (!dc_isar_feature(aa64_pauth, s)) {
7425             goto do_unallocated;
7426         }
7427         break;
7428     case MAP(1, 0x01, 0x01): /* PACIB */
7429         if (s->pauth_active) {
7430             tcg_rd = cpu_reg(s, rd);
7431             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7432         } else if (!dc_isar_feature(aa64_pauth, s)) {
7433             goto do_unallocated;
7434         }
7435         break;
7436     case MAP(1, 0x01, 0x02): /* PACDA */
7437         if (s->pauth_active) {
7438             tcg_rd = cpu_reg(s, rd);
7439             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7440         } else if (!dc_isar_feature(aa64_pauth, s)) {
7441             goto do_unallocated;
7442         }
7443         break;
7444     case MAP(1, 0x01, 0x03): /* PACDB */
7445         if (s->pauth_active) {
7446             tcg_rd = cpu_reg(s, rd);
7447             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7448         } else if (!dc_isar_feature(aa64_pauth, s)) {
7449             goto do_unallocated;
7450         }
7451         break;
7452     case MAP(1, 0x01, 0x04): /* AUTIA */
7453         if (s->pauth_active) {
7454             tcg_rd = cpu_reg(s, rd);
7455             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7456         } else if (!dc_isar_feature(aa64_pauth, s)) {
7457             goto do_unallocated;
7458         }
7459         break;
7460     case MAP(1, 0x01, 0x05): /* AUTIB */
7461         if (s->pauth_active) {
7462             tcg_rd = cpu_reg(s, rd);
7463             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7464         } else if (!dc_isar_feature(aa64_pauth, s)) {
7465             goto do_unallocated;
7466         }
7467         break;
7468     case MAP(1, 0x01, 0x06): /* AUTDA */
7469         if (s->pauth_active) {
7470             tcg_rd = cpu_reg(s, rd);
7471             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7472         } else if (!dc_isar_feature(aa64_pauth, s)) {
7473             goto do_unallocated;
7474         }
7475         break;
7476     case MAP(1, 0x01, 0x07): /* AUTDB */
7477         if (s->pauth_active) {
7478             tcg_rd = cpu_reg(s, rd);
7479             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7480         } else if (!dc_isar_feature(aa64_pauth, s)) {
7481             goto do_unallocated;
7482         }
7483         break;
7484     case MAP(1, 0x01, 0x08): /* PACIZA */
7485         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7486             goto do_unallocated;
7487         } else if (s->pauth_active) {
7488             tcg_rd = cpu_reg(s, rd);
7489             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7490         }
7491         break;
7492     case MAP(1, 0x01, 0x09): /* PACIZB */
7493         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7494             goto do_unallocated;
7495         } else if (s->pauth_active) {
7496             tcg_rd = cpu_reg(s, rd);
7497             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7498         }
7499         break;
7500     case MAP(1, 0x01, 0x0a): /* PACDZA */
7501         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7502             goto do_unallocated;
7503         } else if (s->pauth_active) {
7504             tcg_rd = cpu_reg(s, rd);
7505             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7506         }
7507         break;
7508     case MAP(1, 0x01, 0x0b): /* PACDZB */
7509         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7510             goto do_unallocated;
7511         } else if (s->pauth_active) {
7512             tcg_rd = cpu_reg(s, rd);
7513             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7514         }
7515         break;
7516     case MAP(1, 0x01, 0x0c): /* AUTIZA */
7517         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7518             goto do_unallocated;
7519         } else if (s->pauth_active) {
7520             tcg_rd = cpu_reg(s, rd);
7521             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7522         }
7523         break;
7524     case MAP(1, 0x01, 0x0d): /* AUTIZB */
7525         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7526             goto do_unallocated;
7527         } else if (s->pauth_active) {
7528             tcg_rd = cpu_reg(s, rd);
7529             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7530         }
7531         break;
7532     case MAP(1, 0x01, 0x0e): /* AUTDZA */
7533         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7534             goto do_unallocated;
7535         } else if (s->pauth_active) {
7536             tcg_rd = cpu_reg(s, rd);
7537             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7538         }
7539         break;
7540     case MAP(1, 0x01, 0x0f): /* AUTDZB */
7541         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7542             goto do_unallocated;
7543         } else if (s->pauth_active) {
7544             tcg_rd = cpu_reg(s, rd);
7545             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7546         }
7547         break;
7548     case MAP(1, 0x01, 0x10): /* XPACI */
7549         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7550             goto do_unallocated;
7551         } else if (s->pauth_active) {
7552             tcg_rd = cpu_reg(s, rd);
7553             gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
7554         }
7555         break;
7556     case MAP(1, 0x01, 0x11): /* XPACD */
7557         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7558             goto do_unallocated;
7559         } else if (s->pauth_active) {
7560             tcg_rd = cpu_reg(s, rd);
7561             gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
7562         }
7563         break;
7564     default:
7565     do_unallocated:
7566         unallocated_encoding(s);
7567         break;
7568     }
7569 
7570 #undef MAP
7571 }
7572 
7573 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
7574                        unsigned int rm, unsigned int rn, unsigned int rd)
7575 {
7576     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7577     tcg_rd = cpu_reg(s, rd);
7578 
7579     if (!sf && is_signed) {
7580         tcg_n = tcg_temp_new_i64();
7581         tcg_m = tcg_temp_new_i64();
7582         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
7583         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
7584     } else {
7585         tcg_n = read_cpu_reg(s, rn, sf);
7586         tcg_m = read_cpu_reg(s, rm, sf);
7587     }
7588 
7589     if (is_signed) {
7590         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7591     } else {
7592         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7593     }
7594 
7595     if (!sf) { /* zero extend final result */
7596         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7597     }
7598 }
7599 
7600 /* LSLV, LSRV, ASRV, RORV */
7601 static void handle_shift_reg(DisasContext *s,
7602                              enum a64_shift_type shift_type, unsigned int sf,
7603                              unsigned int rm, unsigned int rn, unsigned int rd)
7604 {
7605     TCGv_i64 tcg_shift = tcg_temp_new_i64();
7606     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7607     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
7608 
7609     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
7610     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
7611 }
7612 
7613 /* CRC32[BHWX], CRC32C[BHWX] */
7614 static void handle_crc32(DisasContext *s,
7615                          unsigned int sf, unsigned int sz, bool crc32c,
7616                          unsigned int rm, unsigned int rn, unsigned int rd)
7617 {
7618     TCGv_i64 tcg_acc, tcg_val;
7619     TCGv_i32 tcg_bytes;
7620 
7621     if (!dc_isar_feature(aa64_crc32, s)
7622         || (sf == 1 && sz != 3)
7623         || (sf == 0 && sz == 3)) {
7624         unallocated_encoding(s);
7625         return;
7626     }
7627 
7628     if (sz == 3) {
7629         tcg_val = cpu_reg(s, rm);
7630     } else {
7631         uint64_t mask;
7632         switch (sz) {
7633         case 0:
7634             mask = 0xFF;
7635             break;
7636         case 1:
7637             mask = 0xFFFF;
7638             break;
7639         case 2:
7640             mask = 0xFFFFFFFF;
7641             break;
7642         default:
7643             g_assert_not_reached();
7644         }
7645         tcg_val = tcg_temp_new_i64();
7646         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
7647     }
7648 
7649     tcg_acc = cpu_reg(s, rn);
7650     tcg_bytes = tcg_constant_i32(1 << sz);
7651 
7652     if (crc32c) {
7653         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
7654     } else {
7655         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
7656     }
7657 }
7658 
7659 /* Data-processing (2 source)
7660  *   31   30  29 28             21 20  16 15    10 9    5 4    0
7661  * +----+---+---+-----------------+------+--------+------+------+
7662  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
7663  * +----+---+---+-----------------+------+--------+------+------+
7664  */
7665 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
7666 {
7667     unsigned int sf, rm, opcode, rn, rd, setflag;
7668     sf = extract32(insn, 31, 1);
7669     setflag = extract32(insn, 29, 1);
7670     rm = extract32(insn, 16, 5);
7671     opcode = extract32(insn, 10, 6);
7672     rn = extract32(insn, 5, 5);
7673     rd = extract32(insn, 0, 5);
7674 
7675     if (setflag && opcode != 0) {
7676         unallocated_encoding(s);
7677         return;
7678     }
7679 
7680     switch (opcode) {
7681     case 0: /* SUBP(S) */
7682         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
7683             goto do_unallocated;
7684         } else {
7685             TCGv_i64 tcg_n, tcg_m, tcg_d;
7686 
7687             tcg_n = read_cpu_reg_sp(s, rn, true);
7688             tcg_m = read_cpu_reg_sp(s, rm, true);
7689             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
7690             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
7691             tcg_d = cpu_reg(s, rd);
7692 
7693             if (setflag) {
7694                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
7695             } else {
7696                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
7697             }
7698         }
7699         break;
7700     case 2: /* UDIV */
7701         handle_div(s, false, sf, rm, rn, rd);
7702         break;
7703     case 3: /* SDIV */
7704         handle_div(s, true, sf, rm, rn, rd);
7705         break;
7706     case 4: /* IRG */
7707         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
7708             goto do_unallocated;
7709         }
7710         if (s->ata[0]) {
7711             gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
7712                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
7713         } else {
7714             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
7715                                              cpu_reg_sp(s, rn));
7716         }
7717         break;
7718     case 5: /* GMI */
7719         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
7720             goto do_unallocated;
7721         } else {
7722             TCGv_i64 t = tcg_temp_new_i64();
7723 
7724             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
7725             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
7726             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
7727         }
7728         break;
7729     case 8: /* LSLV */
7730         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
7731         break;
7732     case 9: /* LSRV */
7733         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
7734         break;
7735     case 10: /* ASRV */
7736         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
7737         break;
7738     case 11: /* RORV */
7739         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
7740         break;
7741     case 12: /* PACGA */
7742         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
7743             goto do_unallocated;
7744         }
7745         gen_helper_pacga(cpu_reg(s, rd), tcg_env,
7746                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
7747         break;
7748     case 16:
7749     case 17:
7750     case 18:
7751     case 19:
7752     case 20:
7753     case 21:
7754     case 22:
7755     case 23: /* CRC32 */
7756     {
7757         int sz = extract32(opcode, 0, 2);
7758         bool crc32c = extract32(opcode, 2, 1);
7759         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
7760         break;
7761     }
7762     default:
7763     do_unallocated:
7764         unallocated_encoding(s);
7765         break;
7766     }
7767 }
7768 
7769 /*
7770  * Data processing - register
7771  *  31  30 29  28      25    21  20  16      10         0
7772  * +--+---+--+---+-------+-----+-------+-------+---------+
7773  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
7774  * +--+---+--+---+-------+-----+-------+-------+---------+
7775  */
7776 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
7777 {
7778     int op0 = extract32(insn, 30, 1);
7779     int op1 = extract32(insn, 28, 1);
7780     int op2 = extract32(insn, 21, 4);
7781     int op3 = extract32(insn, 10, 6);
7782 
7783     if (!op1) {
7784         if (op2 & 8) {
7785             if (op2 & 1) {
7786                 /* Add/sub (extended register) */
7787                 disas_add_sub_ext_reg(s, insn);
7788             } else {
7789                 /* Add/sub (shifted register) */
7790                 disas_add_sub_reg(s, insn);
7791             }
7792         } else {
7793             /* Logical (shifted register) */
7794             disas_logic_reg(s, insn);
7795         }
7796         return;
7797     }
7798 
7799     switch (op2) {
7800     case 0x0:
7801         switch (op3) {
7802         case 0x00: /* Add/subtract (with carry) */
7803             disas_adc_sbc(s, insn);
7804             break;
7805 
7806         case 0x01: /* Rotate right into flags */
7807         case 0x21:
7808             disas_rotate_right_into_flags(s, insn);
7809             break;
7810 
7811         case 0x02: /* Evaluate into flags */
7812         case 0x12:
7813         case 0x22:
7814         case 0x32:
7815             disas_evaluate_into_flags(s, insn);
7816             break;
7817 
7818         default:
7819             goto do_unallocated;
7820         }
7821         break;
7822 
7823     case 0x2: /* Conditional compare */
7824         disas_cc(s, insn); /* both imm and reg forms */
7825         break;
7826 
7827     case 0x4: /* Conditional select */
7828         disas_cond_select(s, insn);
7829         break;
7830 
7831     case 0x6: /* Data-processing */
7832         if (op0) {    /* (1 source) */
7833             disas_data_proc_1src(s, insn);
7834         } else {      /* (2 source) */
7835             disas_data_proc_2src(s, insn);
7836         }
7837         break;
7838     case 0x8 ... 0xf: /* (3 source) */
7839         disas_data_proc_3src(s, insn);
7840         break;
7841 
7842     default:
7843     do_unallocated:
7844         unallocated_encoding(s);
7845         break;
7846     }
7847 }
7848 
7849 static void handle_fp_compare(DisasContext *s, int size,
7850                               unsigned int rn, unsigned int rm,
7851                               bool cmp_with_zero, bool signal_all_nans)
7852 {
7853     TCGv_i64 tcg_flags = tcg_temp_new_i64();
7854     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7855 
7856     if (size == MO_64) {
7857         TCGv_i64 tcg_vn, tcg_vm;
7858 
7859         tcg_vn = read_fp_dreg(s, rn);
7860         if (cmp_with_zero) {
7861             tcg_vm = tcg_constant_i64(0);
7862         } else {
7863             tcg_vm = read_fp_dreg(s, rm);
7864         }
7865         if (signal_all_nans) {
7866             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7867         } else {
7868             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7869         }
7870     } else {
7871         TCGv_i32 tcg_vn = tcg_temp_new_i32();
7872         TCGv_i32 tcg_vm = tcg_temp_new_i32();
7873 
7874         read_vec_element_i32(s, tcg_vn, rn, 0, size);
7875         if (cmp_with_zero) {
7876             tcg_gen_movi_i32(tcg_vm, 0);
7877         } else {
7878             read_vec_element_i32(s, tcg_vm, rm, 0, size);
7879         }
7880 
7881         switch (size) {
7882         case MO_32:
7883             if (signal_all_nans) {
7884                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7885             } else {
7886                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7887             }
7888             break;
7889         case MO_16:
7890             if (signal_all_nans) {
7891                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7892             } else {
7893                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7894             }
7895             break;
7896         default:
7897             g_assert_not_reached();
7898         }
7899     }
7900 
7901     gen_set_nzcv(tcg_flags);
7902 }
7903 
7904 /* Floating point compare
7905  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
7906  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
7907  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
7908  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
7909  */
7910 static void disas_fp_compare(DisasContext *s, uint32_t insn)
7911 {
7912     unsigned int mos, type, rm, op, rn, opc, op2r;
7913     int size;
7914 
7915     mos = extract32(insn, 29, 3);
7916     type = extract32(insn, 22, 2);
7917     rm = extract32(insn, 16, 5);
7918     op = extract32(insn, 14, 2);
7919     rn = extract32(insn, 5, 5);
7920     opc = extract32(insn, 3, 2);
7921     op2r = extract32(insn, 0, 3);
7922 
7923     if (mos || op || op2r) {
7924         unallocated_encoding(s);
7925         return;
7926     }
7927 
7928     switch (type) {
7929     case 0:
7930         size = MO_32;
7931         break;
7932     case 1:
7933         size = MO_64;
7934         break;
7935     case 3:
7936         size = MO_16;
7937         if (dc_isar_feature(aa64_fp16, s)) {
7938             break;
7939         }
7940         /* fallthru */
7941     default:
7942         unallocated_encoding(s);
7943         return;
7944     }
7945 
7946     if (!fp_access_check(s)) {
7947         return;
7948     }
7949 
7950     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
7951 }
7952 
7953 /* Floating point conditional compare
7954  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
7955  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
7956  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
7957  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
7958  */
7959 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
7960 {
7961     unsigned int mos, type, rm, cond, rn, op, nzcv;
7962     TCGLabel *label_continue = NULL;
7963     int size;
7964 
7965     mos = extract32(insn, 29, 3);
7966     type = extract32(insn, 22, 2);
7967     rm = extract32(insn, 16, 5);
7968     cond = extract32(insn, 12, 4);
7969     rn = extract32(insn, 5, 5);
7970     op = extract32(insn, 4, 1);
7971     nzcv = extract32(insn, 0, 4);
7972 
7973     if (mos) {
7974         unallocated_encoding(s);
7975         return;
7976     }
7977 
7978     switch (type) {
7979     case 0:
7980         size = MO_32;
7981         break;
7982     case 1:
7983         size = MO_64;
7984         break;
7985     case 3:
7986         size = MO_16;
7987         if (dc_isar_feature(aa64_fp16, s)) {
7988             break;
7989         }
7990         /* fallthru */
7991     default:
7992         unallocated_encoding(s);
7993         return;
7994     }
7995 
7996     if (!fp_access_check(s)) {
7997         return;
7998     }
7999 
8000     if (cond < 0x0e) { /* not always */
8001         TCGLabel *label_match = gen_new_label();
8002         label_continue = gen_new_label();
8003         arm_gen_test_cc(cond, label_match);
8004         /* nomatch: */
8005         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
8006         tcg_gen_br(label_continue);
8007         gen_set_label(label_match);
8008     }
8009 
8010     handle_fp_compare(s, size, rn, rm, false, op);
8011 
8012     if (cond < 0x0e) {
8013         gen_set_label(label_continue);
8014     }
8015 }
8016 
8017 /* Floating-point data-processing (1 source) - half precision */
8018 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
8019 {
8020     TCGv_ptr fpst = NULL;
8021     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
8022     TCGv_i32 tcg_res = tcg_temp_new_i32();
8023 
8024     switch (opcode) {
8025     case 0x0: /* FMOV */
8026         tcg_gen_mov_i32(tcg_res, tcg_op);
8027         break;
8028     case 0x1: /* FABS */
8029         gen_vfp_absh(tcg_res, tcg_op);
8030         break;
8031     case 0x2: /* FNEG */
8032         gen_vfp_negh(tcg_res, tcg_op);
8033         break;
8034     case 0x3: /* FSQRT */
8035         fpst = fpstatus_ptr(FPST_FPCR_F16);
8036         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
8037         break;
8038     case 0x8: /* FRINTN */
8039     case 0x9: /* FRINTP */
8040     case 0xa: /* FRINTM */
8041     case 0xb: /* FRINTZ */
8042     case 0xc: /* FRINTA */
8043     {
8044         TCGv_i32 tcg_rmode;
8045 
8046         fpst = fpstatus_ptr(FPST_FPCR_F16);
8047         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
8048         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8049         gen_restore_rmode(tcg_rmode, fpst);
8050         break;
8051     }
8052     case 0xe: /* FRINTX */
8053         fpst = fpstatus_ptr(FPST_FPCR_F16);
8054         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
8055         break;
8056     case 0xf: /* FRINTI */
8057         fpst = fpstatus_ptr(FPST_FPCR_F16);
8058         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8059         break;
8060     default:
8061         g_assert_not_reached();
8062     }
8063 
8064     write_fp_sreg(s, rd, tcg_res);
8065 }
8066 
8067 /* Floating-point data-processing (1 source) - single precision */
8068 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
8069 {
8070     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
8071     TCGv_i32 tcg_op, tcg_res;
8072     TCGv_ptr fpst;
8073     int rmode = -1;
8074 
8075     tcg_op = read_fp_sreg(s, rn);
8076     tcg_res = tcg_temp_new_i32();
8077 
8078     switch (opcode) {
8079     case 0x0: /* FMOV */
8080         tcg_gen_mov_i32(tcg_res, tcg_op);
8081         goto done;
8082     case 0x1: /* FABS */
8083         gen_vfp_abss(tcg_res, tcg_op);
8084         goto done;
8085     case 0x2: /* FNEG */
8086         gen_vfp_negs(tcg_res, tcg_op);
8087         goto done;
8088     case 0x3: /* FSQRT */
8089         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
8090         goto done;
8091     case 0x6: /* BFCVT */
8092         gen_fpst = gen_helper_bfcvt;
8093         break;
8094     case 0x8: /* FRINTN */
8095     case 0x9: /* FRINTP */
8096     case 0xa: /* FRINTM */
8097     case 0xb: /* FRINTZ */
8098     case 0xc: /* FRINTA */
8099         rmode = opcode & 7;
8100         gen_fpst = gen_helper_rints;
8101         break;
8102     case 0xe: /* FRINTX */
8103         gen_fpst = gen_helper_rints_exact;
8104         break;
8105     case 0xf: /* FRINTI */
8106         gen_fpst = gen_helper_rints;
8107         break;
8108     case 0x10: /* FRINT32Z */
8109         rmode = FPROUNDING_ZERO;
8110         gen_fpst = gen_helper_frint32_s;
8111         break;
8112     case 0x11: /* FRINT32X */
8113         gen_fpst = gen_helper_frint32_s;
8114         break;
8115     case 0x12: /* FRINT64Z */
8116         rmode = FPROUNDING_ZERO;
8117         gen_fpst = gen_helper_frint64_s;
8118         break;
8119     case 0x13: /* FRINT64X */
8120         gen_fpst = gen_helper_frint64_s;
8121         break;
8122     default:
8123         g_assert_not_reached();
8124     }
8125 
8126     fpst = fpstatus_ptr(FPST_FPCR);
8127     if (rmode >= 0) {
8128         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
8129         gen_fpst(tcg_res, tcg_op, fpst);
8130         gen_restore_rmode(tcg_rmode, fpst);
8131     } else {
8132         gen_fpst(tcg_res, tcg_op, fpst);
8133     }
8134 
8135  done:
8136     write_fp_sreg(s, rd, tcg_res);
8137 }
8138 
8139 /* Floating-point data-processing (1 source) - double precision */
8140 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
8141 {
8142     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
8143     TCGv_i64 tcg_op, tcg_res;
8144     TCGv_ptr fpst;
8145     int rmode = -1;
8146 
8147     switch (opcode) {
8148     case 0x0: /* FMOV */
8149         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
8150         return;
8151     }
8152 
8153     tcg_op = read_fp_dreg(s, rn);
8154     tcg_res = tcg_temp_new_i64();
8155 
8156     switch (opcode) {
8157     case 0x1: /* FABS */
8158         gen_vfp_absd(tcg_res, tcg_op);
8159         goto done;
8160     case 0x2: /* FNEG */
8161         gen_vfp_negd(tcg_res, tcg_op);
8162         goto done;
8163     case 0x3: /* FSQRT */
8164         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
8165         goto done;
8166     case 0x8: /* FRINTN */
8167     case 0x9: /* FRINTP */
8168     case 0xa: /* FRINTM */
8169     case 0xb: /* FRINTZ */
8170     case 0xc: /* FRINTA */
8171         rmode = opcode & 7;
8172         gen_fpst = gen_helper_rintd;
8173         break;
8174     case 0xe: /* FRINTX */
8175         gen_fpst = gen_helper_rintd_exact;
8176         break;
8177     case 0xf: /* FRINTI */
8178         gen_fpst = gen_helper_rintd;
8179         break;
8180     case 0x10: /* FRINT32Z */
8181         rmode = FPROUNDING_ZERO;
8182         gen_fpst = gen_helper_frint32_d;
8183         break;
8184     case 0x11: /* FRINT32X */
8185         gen_fpst = gen_helper_frint32_d;
8186         break;
8187     case 0x12: /* FRINT64Z */
8188         rmode = FPROUNDING_ZERO;
8189         gen_fpst = gen_helper_frint64_d;
8190         break;
8191     case 0x13: /* FRINT64X */
8192         gen_fpst = gen_helper_frint64_d;
8193         break;
8194     default:
8195         g_assert_not_reached();
8196     }
8197 
8198     fpst = fpstatus_ptr(FPST_FPCR);
8199     if (rmode >= 0) {
8200         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
8201         gen_fpst(tcg_res, tcg_op, fpst);
8202         gen_restore_rmode(tcg_rmode, fpst);
8203     } else {
8204         gen_fpst(tcg_res, tcg_op, fpst);
8205     }
8206 
8207  done:
8208     write_fp_dreg(s, rd, tcg_res);
8209 }
8210 
8211 static void handle_fp_fcvt(DisasContext *s, int opcode,
8212                            int rd, int rn, int dtype, int ntype)
8213 {
8214     switch (ntype) {
8215     case 0x0:
8216     {
8217         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
8218         if (dtype == 1) {
8219             /* Single to double */
8220             TCGv_i64 tcg_rd = tcg_temp_new_i64();
8221             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
8222             write_fp_dreg(s, rd, tcg_rd);
8223         } else {
8224             /* Single to half */
8225             TCGv_i32 tcg_rd = tcg_temp_new_i32();
8226             TCGv_i32 ahp = get_ahp_flag();
8227             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8228 
8229             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8230             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8231             write_fp_sreg(s, rd, tcg_rd);
8232         }
8233         break;
8234     }
8235     case 0x1:
8236     {
8237         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8238         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8239         if (dtype == 0) {
8240             /* Double to single */
8241             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
8242         } else {
8243             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8244             TCGv_i32 ahp = get_ahp_flag();
8245             /* Double to half */
8246             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8247             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8248         }
8249         write_fp_sreg(s, rd, tcg_rd);
8250         break;
8251     }
8252     case 0x3:
8253     {
8254         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
8255         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
8256         TCGv_i32 tcg_ahp = get_ahp_flag();
8257         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
8258         if (dtype == 0) {
8259             /* Half to single */
8260             TCGv_i32 tcg_rd = tcg_temp_new_i32();
8261             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8262             write_fp_sreg(s, rd, tcg_rd);
8263         } else {
8264             /* Half to double */
8265             TCGv_i64 tcg_rd = tcg_temp_new_i64();
8266             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8267             write_fp_dreg(s, rd, tcg_rd);
8268         }
8269         break;
8270     }
8271     default:
8272         g_assert_not_reached();
8273     }
8274 }
8275 
8276 /* Floating point data-processing (1 source)
8277  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
8278  * +---+---+---+-----------+------+---+--------+-----------+------+------+
8279  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
8280  * +---+---+---+-----------+------+---+--------+-----------+------+------+
8281  */
8282 static void disas_fp_1src(DisasContext *s, uint32_t insn)
8283 {
8284     int mos = extract32(insn, 29, 3);
8285     int type = extract32(insn, 22, 2);
8286     int opcode = extract32(insn, 15, 6);
8287     int rn = extract32(insn, 5, 5);
8288     int rd = extract32(insn, 0, 5);
8289 
8290     if (mos) {
8291         goto do_unallocated;
8292     }
8293 
8294     switch (opcode) {
8295     case 0x4: case 0x5: case 0x7:
8296     {
8297         /* FCVT between half, single and double precision */
8298         int dtype = extract32(opcode, 0, 2);
8299         if (type == 2 || dtype == type) {
8300             goto do_unallocated;
8301         }
8302         if (!fp_access_check(s)) {
8303             return;
8304         }
8305 
8306         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
8307         break;
8308     }
8309 
8310     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
8311         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
8312             goto do_unallocated;
8313         }
8314         /* fall through */
8315     case 0x0 ... 0x3:
8316     case 0x8 ... 0xc:
8317     case 0xe ... 0xf:
8318         /* 32-to-32 and 64-to-64 ops */
8319         switch (type) {
8320         case 0:
8321             if (!fp_access_check(s)) {
8322                 return;
8323             }
8324             handle_fp_1src_single(s, opcode, rd, rn);
8325             break;
8326         case 1:
8327             if (!fp_access_check(s)) {
8328                 return;
8329             }
8330             handle_fp_1src_double(s, opcode, rd, rn);
8331             break;
8332         case 3:
8333             if (!dc_isar_feature(aa64_fp16, s)) {
8334                 goto do_unallocated;
8335             }
8336 
8337             if (!fp_access_check(s)) {
8338                 return;
8339             }
8340             handle_fp_1src_half(s, opcode, rd, rn);
8341             break;
8342         default:
8343             goto do_unallocated;
8344         }
8345         break;
8346 
8347     case 0x6:
8348         switch (type) {
8349         case 1: /* BFCVT */
8350             if (!dc_isar_feature(aa64_bf16, s)) {
8351                 goto do_unallocated;
8352             }
8353             if (!fp_access_check(s)) {
8354                 return;
8355             }
8356             handle_fp_1src_single(s, opcode, rd, rn);
8357             break;
8358         default:
8359             goto do_unallocated;
8360         }
8361         break;
8362 
8363     default:
8364     do_unallocated:
8365         unallocated_encoding(s);
8366         break;
8367     }
8368 }
8369 
8370 /* Floating point immediate
8371  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
8372  * +---+---+---+-----------+------+---+------------+-------+------+------+
8373  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
8374  * +---+---+---+-----------+------+---+------------+-------+------+------+
8375  */
8376 static void disas_fp_imm(DisasContext *s, uint32_t insn)
8377 {
8378     int rd = extract32(insn, 0, 5);
8379     int imm5 = extract32(insn, 5, 5);
8380     int imm8 = extract32(insn, 13, 8);
8381     int type = extract32(insn, 22, 2);
8382     int mos = extract32(insn, 29, 3);
8383     uint64_t imm;
8384     MemOp sz;
8385 
8386     if (mos || imm5) {
8387         unallocated_encoding(s);
8388         return;
8389     }
8390 
8391     switch (type) {
8392     case 0:
8393         sz = MO_32;
8394         break;
8395     case 1:
8396         sz = MO_64;
8397         break;
8398     case 3:
8399         sz = MO_16;
8400         if (dc_isar_feature(aa64_fp16, s)) {
8401             break;
8402         }
8403         /* fallthru */
8404     default:
8405         unallocated_encoding(s);
8406         return;
8407     }
8408 
8409     if (!fp_access_check(s)) {
8410         return;
8411     }
8412 
8413     imm = vfp_expand_imm(sz, imm8);
8414     write_fp_dreg(s, rd, tcg_constant_i64(imm));
8415 }
8416 
8417 /* Handle floating point <=> fixed point conversions. Note that we can
8418  * also deal with fp <=> integer conversions as a special case (scale == 64)
8419  * OPTME: consider handling that special case specially or at least skipping
8420  * the call to scalbn in the helpers for zero shifts.
8421  */
8422 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
8423                            bool itof, int rmode, int scale, int sf, int type)
8424 {
8425     bool is_signed = !(opcode & 1);
8426     TCGv_ptr tcg_fpstatus;
8427     TCGv_i32 tcg_shift, tcg_single;
8428     TCGv_i64 tcg_double;
8429 
8430     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
8431 
8432     tcg_shift = tcg_constant_i32(64 - scale);
8433 
8434     if (itof) {
8435         TCGv_i64 tcg_int = cpu_reg(s, rn);
8436         if (!sf) {
8437             TCGv_i64 tcg_extend = tcg_temp_new_i64();
8438 
8439             if (is_signed) {
8440                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
8441             } else {
8442                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
8443             }
8444 
8445             tcg_int = tcg_extend;
8446         }
8447 
8448         switch (type) {
8449         case 1: /* float64 */
8450             tcg_double = tcg_temp_new_i64();
8451             if (is_signed) {
8452                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
8453                                      tcg_shift, tcg_fpstatus);
8454             } else {
8455                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
8456                                      tcg_shift, tcg_fpstatus);
8457             }
8458             write_fp_dreg(s, rd, tcg_double);
8459             break;
8460 
8461         case 0: /* float32 */
8462             tcg_single = tcg_temp_new_i32();
8463             if (is_signed) {
8464                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
8465                                      tcg_shift, tcg_fpstatus);
8466             } else {
8467                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
8468                                      tcg_shift, tcg_fpstatus);
8469             }
8470             write_fp_sreg(s, rd, tcg_single);
8471             break;
8472 
8473         case 3: /* float16 */
8474             tcg_single = tcg_temp_new_i32();
8475             if (is_signed) {
8476                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
8477                                      tcg_shift, tcg_fpstatus);
8478             } else {
8479                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
8480                                      tcg_shift, tcg_fpstatus);
8481             }
8482             write_fp_sreg(s, rd, tcg_single);
8483             break;
8484 
8485         default:
8486             g_assert_not_reached();
8487         }
8488     } else {
8489         TCGv_i64 tcg_int = cpu_reg(s, rd);
8490         TCGv_i32 tcg_rmode;
8491 
8492         if (extract32(opcode, 2, 1)) {
8493             /* There are too many rounding modes to all fit into rmode,
8494              * so FCVTA[US] is a special case.
8495              */
8496             rmode = FPROUNDING_TIEAWAY;
8497         }
8498 
8499         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
8500 
8501         switch (type) {
8502         case 1: /* float64 */
8503             tcg_double = read_fp_dreg(s, rn);
8504             if (is_signed) {
8505                 if (!sf) {
8506                     gen_helper_vfp_tosld(tcg_int, tcg_double,
8507                                          tcg_shift, tcg_fpstatus);
8508                 } else {
8509                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
8510                                          tcg_shift, tcg_fpstatus);
8511                 }
8512             } else {
8513                 if (!sf) {
8514                     gen_helper_vfp_tould(tcg_int, tcg_double,
8515                                          tcg_shift, tcg_fpstatus);
8516                 } else {
8517                     gen_helper_vfp_touqd(tcg_int, tcg_double,
8518                                          tcg_shift, tcg_fpstatus);
8519                 }
8520             }
8521             if (!sf) {
8522                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
8523             }
8524             break;
8525 
8526         case 0: /* float32 */
8527             tcg_single = read_fp_sreg(s, rn);
8528             if (sf) {
8529                 if (is_signed) {
8530                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
8531                                          tcg_shift, tcg_fpstatus);
8532                 } else {
8533                     gen_helper_vfp_touqs(tcg_int, tcg_single,
8534                                          tcg_shift, tcg_fpstatus);
8535                 }
8536             } else {
8537                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
8538                 if (is_signed) {
8539                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
8540                                          tcg_shift, tcg_fpstatus);
8541                 } else {
8542                     gen_helper_vfp_touls(tcg_dest, tcg_single,
8543                                          tcg_shift, tcg_fpstatus);
8544                 }
8545                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
8546             }
8547             break;
8548 
8549         case 3: /* float16 */
8550             tcg_single = read_fp_sreg(s, rn);
8551             if (sf) {
8552                 if (is_signed) {
8553                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
8554                                          tcg_shift, tcg_fpstatus);
8555                 } else {
8556                     gen_helper_vfp_touqh(tcg_int, tcg_single,
8557                                          tcg_shift, tcg_fpstatus);
8558                 }
8559             } else {
8560                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
8561                 if (is_signed) {
8562                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
8563                                          tcg_shift, tcg_fpstatus);
8564                 } else {
8565                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
8566                                          tcg_shift, tcg_fpstatus);
8567                 }
8568                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
8569             }
8570             break;
8571 
8572         default:
8573             g_assert_not_reached();
8574         }
8575 
8576         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8577     }
8578 }
8579 
8580 /* Floating point <-> fixed point conversions
8581  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
8582  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
8583  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
8584  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
8585  */
8586 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
8587 {
8588     int rd = extract32(insn, 0, 5);
8589     int rn = extract32(insn, 5, 5);
8590     int scale = extract32(insn, 10, 6);
8591     int opcode = extract32(insn, 16, 3);
8592     int rmode = extract32(insn, 19, 2);
8593     int type = extract32(insn, 22, 2);
8594     bool sbit = extract32(insn, 29, 1);
8595     bool sf = extract32(insn, 31, 1);
8596     bool itof;
8597 
8598     if (sbit || (!sf && scale < 32)) {
8599         unallocated_encoding(s);
8600         return;
8601     }
8602 
8603     switch (type) {
8604     case 0: /* float32 */
8605     case 1: /* float64 */
8606         break;
8607     case 3: /* float16 */
8608         if (dc_isar_feature(aa64_fp16, s)) {
8609             break;
8610         }
8611         /* fallthru */
8612     default:
8613         unallocated_encoding(s);
8614         return;
8615     }
8616 
8617     switch ((rmode << 3) | opcode) {
8618     case 0x2: /* SCVTF */
8619     case 0x3: /* UCVTF */
8620         itof = true;
8621         break;
8622     case 0x18: /* FCVTZS */
8623     case 0x19: /* FCVTZU */
8624         itof = false;
8625         break;
8626     default:
8627         unallocated_encoding(s);
8628         return;
8629     }
8630 
8631     if (!fp_access_check(s)) {
8632         return;
8633     }
8634 
8635     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
8636 }
8637 
8638 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
8639 {
8640     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
8641      * without conversion.
8642      */
8643 
8644     if (itof) {
8645         TCGv_i64 tcg_rn = cpu_reg(s, rn);
8646         TCGv_i64 tmp;
8647 
8648         switch (type) {
8649         case 0:
8650             /* 32 bit */
8651             tmp = tcg_temp_new_i64();
8652             tcg_gen_ext32u_i64(tmp, tcg_rn);
8653             write_fp_dreg(s, rd, tmp);
8654             break;
8655         case 1:
8656             /* 64 bit */
8657             write_fp_dreg(s, rd, tcg_rn);
8658             break;
8659         case 2:
8660             /* 64 bit to top half. */
8661             tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
8662             clear_vec_high(s, true, rd);
8663             break;
8664         case 3:
8665             /* 16 bit */
8666             tmp = tcg_temp_new_i64();
8667             tcg_gen_ext16u_i64(tmp, tcg_rn);
8668             write_fp_dreg(s, rd, tmp);
8669             break;
8670         default:
8671             g_assert_not_reached();
8672         }
8673     } else {
8674         TCGv_i64 tcg_rd = cpu_reg(s, rd);
8675 
8676         switch (type) {
8677         case 0:
8678             /* 32 bit */
8679             tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
8680             break;
8681         case 1:
8682             /* 64 bit */
8683             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
8684             break;
8685         case 2:
8686             /* 64 bits from top half */
8687             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
8688             break;
8689         case 3:
8690             /* 16 bit */
8691             tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
8692             break;
8693         default:
8694             g_assert_not_reached();
8695         }
8696     }
8697 }
8698 
8699 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
8700 {
8701     TCGv_i64 t = read_fp_dreg(s, rn);
8702     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
8703 
8704     gen_helper_fjcvtzs(t, t, fpstatus);
8705 
8706     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
8707     tcg_gen_extrh_i64_i32(cpu_ZF, t);
8708     tcg_gen_movi_i32(cpu_CF, 0);
8709     tcg_gen_movi_i32(cpu_NF, 0);
8710     tcg_gen_movi_i32(cpu_VF, 0);
8711 }
8712 
8713 /* Floating point <-> integer conversions
8714  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
8715  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
8716  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
8717  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
8718  */
8719 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
8720 {
8721     int rd = extract32(insn, 0, 5);
8722     int rn = extract32(insn, 5, 5);
8723     int opcode = extract32(insn, 16, 3);
8724     int rmode = extract32(insn, 19, 2);
8725     int type = extract32(insn, 22, 2);
8726     bool sbit = extract32(insn, 29, 1);
8727     bool sf = extract32(insn, 31, 1);
8728     bool itof = false;
8729 
8730     if (sbit) {
8731         goto do_unallocated;
8732     }
8733 
8734     switch (opcode) {
8735     case 2: /* SCVTF */
8736     case 3: /* UCVTF */
8737         itof = true;
8738         /* fallthru */
8739     case 4: /* FCVTAS */
8740     case 5: /* FCVTAU */
8741         if (rmode != 0) {
8742             goto do_unallocated;
8743         }
8744         /* fallthru */
8745     case 0: /* FCVT[NPMZ]S */
8746     case 1: /* FCVT[NPMZ]U */
8747         switch (type) {
8748         case 0: /* float32 */
8749         case 1: /* float64 */
8750             break;
8751         case 3: /* float16 */
8752             if (!dc_isar_feature(aa64_fp16, s)) {
8753                 goto do_unallocated;
8754             }
8755             break;
8756         default:
8757             goto do_unallocated;
8758         }
8759         if (!fp_access_check(s)) {
8760             return;
8761         }
8762         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
8763         break;
8764 
8765     default:
8766         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
8767         case 0b01100110: /* FMOV half <-> 32-bit int */
8768         case 0b01100111:
8769         case 0b11100110: /* FMOV half <-> 64-bit int */
8770         case 0b11100111:
8771             if (!dc_isar_feature(aa64_fp16, s)) {
8772                 goto do_unallocated;
8773             }
8774             /* fallthru */
8775         case 0b00000110: /* FMOV 32-bit */
8776         case 0b00000111:
8777         case 0b10100110: /* FMOV 64-bit */
8778         case 0b10100111:
8779         case 0b11001110: /* FMOV top half of 128-bit */
8780         case 0b11001111:
8781             if (!fp_access_check(s)) {
8782                 return;
8783             }
8784             itof = opcode & 1;
8785             handle_fmov(s, rd, rn, type, itof);
8786             break;
8787 
8788         case 0b00111110: /* FJCVTZS */
8789             if (!dc_isar_feature(aa64_jscvt, s)) {
8790                 goto do_unallocated;
8791             } else if (fp_access_check(s)) {
8792                 handle_fjcvtzs(s, rd, rn);
8793             }
8794             break;
8795 
8796         default:
8797         do_unallocated:
8798             unallocated_encoding(s);
8799             return;
8800         }
8801         break;
8802     }
8803 }
8804 
8805 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
8806  *   31  30  29 28     25 24                          0
8807  * +---+---+---+---------+-----------------------------+
8808  * |   | 0 |   | 1 1 1 1 |                             |
8809  * +---+---+---+---------+-----------------------------+
8810  */
8811 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
8812 {
8813     if (extract32(insn, 24, 1)) {
8814         unallocated_encoding(s); /* in decodetree */
8815     } else if (extract32(insn, 21, 1) == 0) {
8816         /* Floating point to fixed point conversions */
8817         disas_fp_fixed_conv(s, insn);
8818     } else {
8819         switch (extract32(insn, 10, 2)) {
8820         case 1:
8821             /* Floating point conditional compare */
8822             disas_fp_ccomp(s, insn);
8823             break;
8824         case 2:
8825             /* Floating point data-processing (2 source) */
8826             unallocated_encoding(s); /* in decodetree */
8827             break;
8828         case 3:
8829             /* Floating point conditional select */
8830             unallocated_encoding(s); /* in decodetree */
8831             break;
8832         case 0:
8833             switch (ctz32(extract32(insn, 12, 4))) {
8834             case 0: /* [15:12] == xxx1 */
8835                 /* Floating point immediate */
8836                 disas_fp_imm(s, insn);
8837                 break;
8838             case 1: /* [15:12] == xx10 */
8839                 /* Floating point compare */
8840                 disas_fp_compare(s, insn);
8841                 break;
8842             case 2: /* [15:12] == x100 */
8843                 /* Floating point data-processing (1 source) */
8844                 disas_fp_1src(s, insn);
8845                 break;
8846             case 3: /* [15:12] == 1000 */
8847                 unallocated_encoding(s);
8848                 break;
8849             default: /* [15:12] == 0000 */
8850                 /* Floating point <-> integer conversions */
8851                 disas_fp_int_conv(s, insn);
8852                 break;
8853             }
8854             break;
8855         }
8856     }
8857 }
8858 
8859 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
8860                      int pos)
8861 {
8862     /* Extract 64 bits from the middle of two concatenated 64 bit
8863      * vector register slices left:right. The extracted bits start
8864      * at 'pos' bits into the right (least significant) side.
8865      * We return the result in tcg_right, and guarantee not to
8866      * trash tcg_left.
8867      */
8868     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8869     assert(pos > 0 && pos < 64);
8870 
8871     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
8872     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
8873     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
8874 }
8875 
8876 /* EXT
8877  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
8878  * +---+---+-------------+-----+---+------+---+------+---+------+------+
8879  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
8880  * +---+---+-------------+-----+---+------+---+------+---+------+------+
8881  */
8882 static void disas_simd_ext(DisasContext *s, uint32_t insn)
8883 {
8884     int is_q = extract32(insn, 30, 1);
8885     int op2 = extract32(insn, 22, 2);
8886     int imm4 = extract32(insn, 11, 4);
8887     int rm = extract32(insn, 16, 5);
8888     int rn = extract32(insn, 5, 5);
8889     int rd = extract32(insn, 0, 5);
8890     int pos = imm4 << 3;
8891     TCGv_i64 tcg_resl, tcg_resh;
8892 
8893     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
8894         unallocated_encoding(s);
8895         return;
8896     }
8897 
8898     if (!fp_access_check(s)) {
8899         return;
8900     }
8901 
8902     tcg_resh = tcg_temp_new_i64();
8903     tcg_resl = tcg_temp_new_i64();
8904 
8905     /* Vd gets bits starting at pos bits into Vm:Vn. This is
8906      * either extracting 128 bits from a 128:128 concatenation, or
8907      * extracting 64 bits from a 64:64 concatenation.
8908      */
8909     if (!is_q) {
8910         read_vec_element(s, tcg_resl, rn, 0, MO_64);
8911         if (pos != 0) {
8912             read_vec_element(s, tcg_resh, rm, 0, MO_64);
8913             do_ext64(s, tcg_resh, tcg_resl, pos);
8914         }
8915     } else {
8916         TCGv_i64 tcg_hh;
8917         typedef struct {
8918             int reg;
8919             int elt;
8920         } EltPosns;
8921         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
8922         EltPosns *elt = eltposns;
8923 
8924         if (pos >= 64) {
8925             elt++;
8926             pos -= 64;
8927         }
8928 
8929         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
8930         elt++;
8931         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
8932         elt++;
8933         if (pos != 0) {
8934             do_ext64(s, tcg_resh, tcg_resl, pos);
8935             tcg_hh = tcg_temp_new_i64();
8936             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
8937             do_ext64(s, tcg_hh, tcg_resh, pos);
8938         }
8939     }
8940 
8941     write_vec_element(s, tcg_resl, rd, 0, MO_64);
8942     if (is_q) {
8943         write_vec_element(s, tcg_resh, rd, 1, MO_64);
8944     }
8945     clear_vec_high(s, is_q, rd);
8946 }
8947 
8948 /* TBL/TBX
8949  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
8950  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
8951  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
8952  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
8953  */
8954 static void disas_simd_tb(DisasContext *s, uint32_t insn)
8955 {
8956     int op2 = extract32(insn, 22, 2);
8957     int is_q = extract32(insn, 30, 1);
8958     int rm = extract32(insn, 16, 5);
8959     int rn = extract32(insn, 5, 5);
8960     int rd = extract32(insn, 0, 5);
8961     int is_tbx = extract32(insn, 12, 1);
8962     int len = (extract32(insn, 13, 2) + 1) * 16;
8963 
8964     if (op2 != 0) {
8965         unallocated_encoding(s);
8966         return;
8967     }
8968 
8969     if (!fp_access_check(s)) {
8970         return;
8971     }
8972 
8973     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
8974                        vec_full_reg_offset(s, rm), tcg_env,
8975                        is_q ? 16 : 8, vec_full_reg_size(s),
8976                        (len << 6) | (is_tbx << 5) | rn,
8977                        gen_helper_simd_tblx);
8978 }
8979 
8980 /* ZIP/UZP/TRN
8981  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
8982  * +---+---+-------------+------+---+------+---+------------------+------+
8983  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
8984  * +---+---+-------------+------+---+------+---+------------------+------+
8985  */
8986 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
8987 {
8988     int rd = extract32(insn, 0, 5);
8989     int rn = extract32(insn, 5, 5);
8990     int rm = extract32(insn, 16, 5);
8991     int size = extract32(insn, 22, 2);
8992     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
8993      * bit 2 indicates 1 vs 2 variant of the insn.
8994      */
8995     int opcode = extract32(insn, 12, 2);
8996     bool part = extract32(insn, 14, 1);
8997     bool is_q = extract32(insn, 30, 1);
8998     int esize = 8 << size;
8999     int i;
9000     int datasize = is_q ? 128 : 64;
9001     int elements = datasize / esize;
9002     TCGv_i64 tcg_res[2], tcg_ele;
9003 
9004     if (opcode == 0 || (size == 3 && !is_q)) {
9005         unallocated_encoding(s);
9006         return;
9007     }
9008 
9009     if (!fp_access_check(s)) {
9010         return;
9011     }
9012 
9013     tcg_res[0] = tcg_temp_new_i64();
9014     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
9015     tcg_ele = tcg_temp_new_i64();
9016 
9017     for (i = 0; i < elements; i++) {
9018         int o, w;
9019 
9020         switch (opcode) {
9021         case 1: /* UZP1/2 */
9022         {
9023             int midpoint = elements / 2;
9024             if (i < midpoint) {
9025                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
9026             } else {
9027                 read_vec_element(s, tcg_ele, rm,
9028                                  2 * (i - midpoint) + part, size);
9029             }
9030             break;
9031         }
9032         case 2: /* TRN1/2 */
9033             if (i & 1) {
9034                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
9035             } else {
9036                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
9037             }
9038             break;
9039         case 3: /* ZIP1/2 */
9040         {
9041             int base = part * elements / 2;
9042             if (i & 1) {
9043                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
9044             } else {
9045                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
9046             }
9047             break;
9048         }
9049         default:
9050             g_assert_not_reached();
9051         }
9052 
9053         w = (i * esize) / 64;
9054         o = (i * esize) % 64;
9055         if (o == 0) {
9056             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
9057         } else {
9058             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
9059             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
9060         }
9061     }
9062 
9063     for (i = 0; i <= is_q; ++i) {
9064         write_vec_element(s, tcg_res[i], rd, i, MO_64);
9065     }
9066     clear_vec_high(s, is_q, rd);
9067 }
9068 
9069 /*
9070  * do_reduction_op helper
9071  *
9072  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
9073  * important for correct NaN propagation that we do these
9074  * operations in exactly the order specified by the pseudocode.
9075  *
9076  * This is a recursive function, TCG temps should be freed by the
9077  * calling function once it is done with the values.
9078  */
9079 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
9080                                 int esize, int size, int vmap, TCGv_ptr fpst)
9081 {
9082     if (esize == size) {
9083         int element;
9084         MemOp msize = esize == 16 ? MO_16 : MO_32;
9085         TCGv_i32 tcg_elem;
9086 
9087         /* We should have one register left here */
9088         assert(ctpop8(vmap) == 1);
9089         element = ctz32(vmap);
9090         assert(element < 8);
9091 
9092         tcg_elem = tcg_temp_new_i32();
9093         read_vec_element_i32(s, tcg_elem, rn, element, msize);
9094         return tcg_elem;
9095     } else {
9096         int bits = size / 2;
9097         int shift = ctpop8(vmap) / 2;
9098         int vmap_lo = (vmap >> shift) & vmap;
9099         int vmap_hi = (vmap & ~vmap_lo);
9100         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
9101 
9102         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
9103         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
9104         tcg_res = tcg_temp_new_i32();
9105 
9106         switch (fpopcode) {
9107         case 0x0c: /* fmaxnmv half-precision */
9108             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
9109             break;
9110         case 0x0f: /* fmaxv half-precision */
9111             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
9112             break;
9113         case 0x1c: /* fminnmv half-precision */
9114             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
9115             break;
9116         case 0x1f: /* fminv half-precision */
9117             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
9118             break;
9119         case 0x2c: /* fmaxnmv */
9120             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
9121             break;
9122         case 0x2f: /* fmaxv */
9123             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
9124             break;
9125         case 0x3c: /* fminnmv */
9126             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
9127             break;
9128         case 0x3f: /* fminv */
9129             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
9130             break;
9131         default:
9132             g_assert_not_reached();
9133         }
9134         return tcg_res;
9135     }
9136 }
9137 
9138 /* AdvSIMD across lanes
9139  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9140  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9141  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9142  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9143  */
9144 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
9145 {
9146     int rd = extract32(insn, 0, 5);
9147     int rn = extract32(insn, 5, 5);
9148     int size = extract32(insn, 22, 2);
9149     int opcode = extract32(insn, 12, 5);
9150     bool is_q = extract32(insn, 30, 1);
9151     bool is_u = extract32(insn, 29, 1);
9152     bool is_fp = false;
9153     bool is_min = false;
9154     int esize;
9155     int elements;
9156     int i;
9157     TCGv_i64 tcg_res, tcg_elt;
9158 
9159     switch (opcode) {
9160     case 0x1b: /* ADDV */
9161         if (is_u) {
9162             unallocated_encoding(s);
9163             return;
9164         }
9165         /* fall through */
9166     case 0x3: /* SADDLV, UADDLV */
9167     case 0xa: /* SMAXV, UMAXV */
9168     case 0x1a: /* SMINV, UMINV */
9169         if (size == 3 || (size == 2 && !is_q)) {
9170             unallocated_encoding(s);
9171             return;
9172         }
9173         break;
9174     case 0xc: /* FMAXNMV, FMINNMV */
9175     case 0xf: /* FMAXV, FMINV */
9176         /* Bit 1 of size field encodes min vs max and the actual size
9177          * depends on the encoding of the U bit. If not set (and FP16
9178          * enabled) then we do half-precision float instead of single
9179          * precision.
9180          */
9181         is_min = extract32(size, 1, 1);
9182         is_fp = true;
9183         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
9184             size = 1;
9185         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
9186             unallocated_encoding(s);
9187             return;
9188         } else {
9189             size = 2;
9190         }
9191         break;
9192     default:
9193         unallocated_encoding(s);
9194         return;
9195     }
9196 
9197     if (!fp_access_check(s)) {
9198         return;
9199     }
9200 
9201     esize = 8 << size;
9202     elements = (is_q ? 128 : 64) / esize;
9203 
9204     tcg_res = tcg_temp_new_i64();
9205     tcg_elt = tcg_temp_new_i64();
9206 
9207     /* These instructions operate across all lanes of a vector
9208      * to produce a single result. We can guarantee that a 64
9209      * bit intermediate is sufficient:
9210      *  + for [US]ADDLV the maximum element size is 32 bits, and
9211      *    the result type is 64 bits
9212      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
9213      *    same as the element size, which is 32 bits at most
9214      * For the integer operations we can choose to work at 64
9215      * or 32 bits and truncate at the end; for simplicity
9216      * we use 64 bits always. The floating point
9217      * ops do require 32 bit intermediates, though.
9218      */
9219     if (!is_fp) {
9220         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
9221 
9222         for (i = 1; i < elements; i++) {
9223             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
9224 
9225             switch (opcode) {
9226             case 0x03: /* SADDLV / UADDLV */
9227             case 0x1b: /* ADDV */
9228                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
9229                 break;
9230             case 0x0a: /* SMAXV / UMAXV */
9231                 if (is_u) {
9232                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
9233                 } else {
9234                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
9235                 }
9236                 break;
9237             case 0x1a: /* SMINV / UMINV */
9238                 if (is_u) {
9239                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
9240                 } else {
9241                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
9242                 }
9243                 break;
9244             default:
9245                 g_assert_not_reached();
9246             }
9247 
9248         }
9249     } else {
9250         /* Floating point vector reduction ops which work across 32
9251          * bit (single) or 16 bit (half-precision) intermediates.
9252          * Note that correct NaN propagation requires that we do these
9253          * operations in exactly the order specified by the pseudocode.
9254          */
9255         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9256         int fpopcode = opcode | is_min << 4 | is_u << 5;
9257         int vmap = (1 << elements) - 1;
9258         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
9259                                              (is_q ? 128 : 64), vmap, fpst);
9260         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
9261     }
9262 
9263     /* Now truncate the result to the width required for the final output */
9264     if (opcode == 0x03) {
9265         /* SADDLV, UADDLV: result is 2*esize */
9266         size++;
9267     }
9268 
9269     switch (size) {
9270     case 0:
9271         tcg_gen_ext8u_i64(tcg_res, tcg_res);
9272         break;
9273     case 1:
9274         tcg_gen_ext16u_i64(tcg_res, tcg_res);
9275         break;
9276     case 2:
9277         tcg_gen_ext32u_i64(tcg_res, tcg_res);
9278         break;
9279     case 3:
9280         break;
9281     default:
9282         g_assert_not_reached();
9283     }
9284 
9285     write_fp_dreg(s, rd, tcg_res);
9286 }
9287 
9288 /* AdvSIMD modified immediate
9289  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
9290  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
9291  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
9292  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
9293  *
9294  * There are a number of operations that can be carried out here:
9295  *   MOVI - move (shifted) imm into register
9296  *   MVNI - move inverted (shifted) imm into register
9297  *   ORR  - bitwise OR of (shifted) imm with register
9298  *   BIC  - bitwise clear of (shifted) imm with register
9299  * With ARMv8.2 we also have:
9300  *   FMOV half-precision
9301  */
9302 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
9303 {
9304     int rd = extract32(insn, 0, 5);
9305     int cmode = extract32(insn, 12, 4);
9306     int o2 = extract32(insn, 11, 1);
9307     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
9308     bool is_neg = extract32(insn, 29, 1);
9309     bool is_q = extract32(insn, 30, 1);
9310     uint64_t imm = 0;
9311 
9312     if (o2) {
9313         if (cmode != 0xf || is_neg) {
9314             unallocated_encoding(s);
9315             return;
9316         }
9317         /* FMOV (vector, immediate) - half-precision */
9318         if (!dc_isar_feature(aa64_fp16, s)) {
9319             unallocated_encoding(s);
9320             return;
9321         }
9322         imm = vfp_expand_imm(MO_16, abcdefgh);
9323         /* now duplicate across the lanes */
9324         imm = dup_const(MO_16, imm);
9325     } else {
9326         if (cmode == 0xf && is_neg && !is_q) {
9327             unallocated_encoding(s);
9328             return;
9329         }
9330         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
9331     }
9332 
9333     if (!fp_access_check(s)) {
9334         return;
9335     }
9336 
9337     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
9338         /* MOVI or MVNI, with MVNI negation handled above.  */
9339         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
9340                              vec_full_reg_size(s), imm);
9341     } else {
9342         /* ORR or BIC, with BIC negation to AND handled above.  */
9343         if (is_neg) {
9344             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
9345         } else {
9346             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
9347         }
9348     }
9349 }
9350 
9351 /*
9352  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
9353  *
9354  * This code is handles the common shifting code and is used by both
9355  * the vector and scalar code.
9356  */
9357 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
9358                                     TCGv_i64 tcg_rnd, bool accumulate,
9359                                     bool is_u, int size, int shift)
9360 {
9361     bool extended_result = false;
9362     bool round = tcg_rnd != NULL;
9363     int ext_lshift = 0;
9364     TCGv_i64 tcg_src_hi;
9365 
9366     if (round && size == 3) {
9367         extended_result = true;
9368         ext_lshift = 64 - shift;
9369         tcg_src_hi = tcg_temp_new_i64();
9370     } else if (shift == 64) {
9371         if (!accumulate && is_u) {
9372             /* result is zero */
9373             tcg_gen_movi_i64(tcg_res, 0);
9374             return;
9375         }
9376     }
9377 
9378     /* Deal with the rounding step */
9379     if (round) {
9380         if (extended_result) {
9381             TCGv_i64 tcg_zero = tcg_constant_i64(0);
9382             if (!is_u) {
9383                 /* take care of sign extending tcg_res */
9384                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
9385                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
9386                                  tcg_src, tcg_src_hi,
9387                                  tcg_rnd, tcg_zero);
9388             } else {
9389                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
9390                                  tcg_src, tcg_zero,
9391                                  tcg_rnd, tcg_zero);
9392             }
9393         } else {
9394             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
9395         }
9396     }
9397 
9398     /* Now do the shift right */
9399     if (round && extended_result) {
9400         /* extended case, >64 bit precision required */
9401         if (ext_lshift == 0) {
9402             /* special case, only high bits matter */
9403             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
9404         } else {
9405             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
9406             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
9407             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
9408         }
9409     } else {
9410         if (is_u) {
9411             if (shift == 64) {
9412                 /* essentially shifting in 64 zeros */
9413                 tcg_gen_movi_i64(tcg_src, 0);
9414             } else {
9415                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
9416             }
9417         } else {
9418             if (shift == 64) {
9419                 /* effectively extending the sign-bit */
9420                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
9421             } else {
9422                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
9423             }
9424         }
9425     }
9426 
9427     if (accumulate) {
9428         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
9429     } else {
9430         tcg_gen_mov_i64(tcg_res, tcg_src);
9431     }
9432 }
9433 
9434 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
9435 static void handle_scalar_simd_shri(DisasContext *s,
9436                                     bool is_u, int immh, int immb,
9437                                     int opcode, int rn, int rd)
9438 {
9439     const int size = 3;
9440     int immhb = immh << 3 | immb;
9441     int shift = 2 * (8 << size) - immhb;
9442     bool accumulate = false;
9443     bool round = false;
9444     bool insert = false;
9445     TCGv_i64 tcg_rn;
9446     TCGv_i64 tcg_rd;
9447     TCGv_i64 tcg_round;
9448 
9449     if (!extract32(immh, 3, 1)) {
9450         unallocated_encoding(s);
9451         return;
9452     }
9453 
9454     if (!fp_access_check(s)) {
9455         return;
9456     }
9457 
9458     switch (opcode) {
9459     case 0x02: /* SSRA / USRA (accumulate) */
9460         accumulate = true;
9461         break;
9462     case 0x04: /* SRSHR / URSHR (rounding) */
9463         round = true;
9464         break;
9465     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9466         accumulate = round = true;
9467         break;
9468     case 0x08: /* SRI */
9469         insert = true;
9470         break;
9471     }
9472 
9473     if (round) {
9474         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
9475     } else {
9476         tcg_round = NULL;
9477     }
9478 
9479     tcg_rn = read_fp_dreg(s, rn);
9480     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
9481 
9482     if (insert) {
9483         /* shift count same as element size is valid but does nothing;
9484          * special case to avoid potential shift by 64.
9485          */
9486         int esize = 8 << size;
9487         if (shift != esize) {
9488             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
9489             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
9490         }
9491     } else {
9492         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9493                                 accumulate, is_u, size, shift);
9494     }
9495 
9496     write_fp_dreg(s, rd, tcg_rd);
9497 }
9498 
9499 /* SHL/SLI - Scalar shift left */
9500 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
9501                                     int immh, int immb, int opcode,
9502                                     int rn, int rd)
9503 {
9504     int size = 32 - clz32(immh) - 1;
9505     int immhb = immh << 3 | immb;
9506     int shift = immhb - (8 << size);
9507     TCGv_i64 tcg_rn;
9508     TCGv_i64 tcg_rd;
9509 
9510     if (!extract32(immh, 3, 1)) {
9511         unallocated_encoding(s);
9512         return;
9513     }
9514 
9515     if (!fp_access_check(s)) {
9516         return;
9517     }
9518 
9519     tcg_rn = read_fp_dreg(s, rn);
9520     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
9521 
9522     if (insert) {
9523         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
9524     } else {
9525         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
9526     }
9527 
9528     write_fp_dreg(s, rd, tcg_rd);
9529 }
9530 
9531 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
9532  * (signed/unsigned) narrowing */
9533 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
9534                                    bool is_u_shift, bool is_u_narrow,
9535                                    int immh, int immb, int opcode,
9536                                    int rn, int rd)
9537 {
9538     int immhb = immh << 3 | immb;
9539     int size = 32 - clz32(immh) - 1;
9540     int esize = 8 << size;
9541     int shift = (2 * esize) - immhb;
9542     int elements = is_scalar ? 1 : (64 / esize);
9543     bool round = extract32(opcode, 0, 1);
9544     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
9545     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
9546     TCGv_i32 tcg_rd_narrowed;
9547     TCGv_i64 tcg_final;
9548 
9549     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
9550         { gen_helper_neon_narrow_sat_s8,
9551           gen_helper_neon_unarrow_sat8 },
9552         { gen_helper_neon_narrow_sat_s16,
9553           gen_helper_neon_unarrow_sat16 },
9554         { gen_helper_neon_narrow_sat_s32,
9555           gen_helper_neon_unarrow_sat32 },
9556         { NULL, NULL },
9557     };
9558     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
9559         gen_helper_neon_narrow_sat_u8,
9560         gen_helper_neon_narrow_sat_u16,
9561         gen_helper_neon_narrow_sat_u32,
9562         NULL
9563     };
9564     NeonGenNarrowEnvFn *narrowfn;
9565 
9566     int i;
9567 
9568     assert(size < 4);
9569 
9570     if (extract32(immh, 3, 1)) {
9571         unallocated_encoding(s);
9572         return;
9573     }
9574 
9575     if (!fp_access_check(s)) {
9576         return;
9577     }
9578 
9579     if (is_u_shift) {
9580         narrowfn = unsigned_narrow_fns[size];
9581     } else {
9582         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
9583     }
9584 
9585     tcg_rn = tcg_temp_new_i64();
9586     tcg_rd = tcg_temp_new_i64();
9587     tcg_rd_narrowed = tcg_temp_new_i32();
9588     tcg_final = tcg_temp_new_i64();
9589 
9590     if (round) {
9591         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
9592     } else {
9593         tcg_round = NULL;
9594     }
9595 
9596     for (i = 0; i < elements; i++) {
9597         read_vec_element(s, tcg_rn, rn, i, ldop);
9598         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9599                                 false, is_u_shift, size+1, shift);
9600         narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
9601         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
9602         if (i == 0) {
9603             tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
9604         } else {
9605             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9606         }
9607     }
9608 
9609     if (!is_q) {
9610         write_vec_element(s, tcg_final, rd, 0, MO_64);
9611     } else {
9612         write_vec_element(s, tcg_final, rd, 1, MO_64);
9613     }
9614     clear_vec_high(s, is_q, rd);
9615 }
9616 
9617 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
9618 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
9619                              bool src_unsigned, bool dst_unsigned,
9620                              int immh, int immb, int rn, int rd)
9621 {
9622     int immhb = immh << 3 | immb;
9623     int size = 32 - clz32(immh) - 1;
9624     int shift = immhb - (8 << size);
9625     int pass;
9626 
9627     assert(immh != 0);
9628     assert(!(scalar && is_q));
9629 
9630     if (!scalar) {
9631         if (!is_q && extract32(immh, 3, 1)) {
9632             unallocated_encoding(s);
9633             return;
9634         }
9635 
9636         /* Since we use the variable-shift helpers we must
9637          * replicate the shift count into each element of
9638          * the tcg_shift value.
9639          */
9640         switch (size) {
9641         case 0:
9642             shift |= shift << 8;
9643             /* fall through */
9644         case 1:
9645             shift |= shift << 16;
9646             break;
9647         case 2:
9648         case 3:
9649             break;
9650         default:
9651             g_assert_not_reached();
9652         }
9653     }
9654 
9655     if (!fp_access_check(s)) {
9656         return;
9657     }
9658 
9659     if (size == 3) {
9660         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
9661         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
9662             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
9663             { NULL, gen_helper_neon_qshl_u64 },
9664         };
9665         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
9666         int maxpass = is_q ? 2 : 1;
9667 
9668         for (pass = 0; pass < maxpass; pass++) {
9669             TCGv_i64 tcg_op = tcg_temp_new_i64();
9670 
9671             read_vec_element(s, tcg_op, rn, pass, MO_64);
9672             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
9673             write_vec_element(s, tcg_op, rd, pass, MO_64);
9674         }
9675         clear_vec_high(s, is_q, rd);
9676     } else {
9677         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
9678         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
9679             {
9680                 { gen_helper_neon_qshl_s8,
9681                   gen_helper_neon_qshl_s16,
9682                   gen_helper_neon_qshl_s32 },
9683                 { gen_helper_neon_qshlu_s8,
9684                   gen_helper_neon_qshlu_s16,
9685                   gen_helper_neon_qshlu_s32 }
9686             }, {
9687                 { NULL, NULL, NULL },
9688                 { gen_helper_neon_qshl_u8,
9689                   gen_helper_neon_qshl_u16,
9690                   gen_helper_neon_qshl_u32 }
9691             }
9692         };
9693         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
9694         MemOp memop = scalar ? size : MO_32;
9695         int maxpass = scalar ? 1 : is_q ? 4 : 2;
9696 
9697         for (pass = 0; pass < maxpass; pass++) {
9698             TCGv_i32 tcg_op = tcg_temp_new_i32();
9699 
9700             read_vec_element_i32(s, tcg_op, rn, pass, memop);
9701             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
9702             if (scalar) {
9703                 switch (size) {
9704                 case 0:
9705                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
9706                     break;
9707                 case 1:
9708                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
9709                     break;
9710                 case 2:
9711                     break;
9712                 default:
9713                     g_assert_not_reached();
9714                 }
9715                 write_fp_sreg(s, rd, tcg_op);
9716             } else {
9717                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
9718             }
9719         }
9720 
9721         if (!scalar) {
9722             clear_vec_high(s, is_q, rd);
9723         }
9724     }
9725 }
9726 
9727 /* Common vector code for handling integer to FP conversion */
9728 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
9729                                    int elements, int is_signed,
9730                                    int fracbits, int size)
9731 {
9732     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9733     TCGv_i32 tcg_shift = NULL;
9734 
9735     MemOp mop = size | (is_signed ? MO_SIGN : 0);
9736     int pass;
9737 
9738     if (fracbits || size == MO_64) {
9739         tcg_shift = tcg_constant_i32(fracbits);
9740     }
9741 
9742     if (size == MO_64) {
9743         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
9744         TCGv_i64 tcg_double = tcg_temp_new_i64();
9745 
9746         for (pass = 0; pass < elements; pass++) {
9747             read_vec_element(s, tcg_int64, rn, pass, mop);
9748 
9749             if (is_signed) {
9750                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
9751                                      tcg_shift, tcg_fpst);
9752             } else {
9753                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
9754                                      tcg_shift, tcg_fpst);
9755             }
9756             if (elements == 1) {
9757                 write_fp_dreg(s, rd, tcg_double);
9758             } else {
9759                 write_vec_element(s, tcg_double, rd, pass, MO_64);
9760             }
9761         }
9762     } else {
9763         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
9764         TCGv_i32 tcg_float = tcg_temp_new_i32();
9765 
9766         for (pass = 0; pass < elements; pass++) {
9767             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
9768 
9769             switch (size) {
9770             case MO_32:
9771                 if (fracbits) {
9772                     if (is_signed) {
9773                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
9774                                              tcg_shift, tcg_fpst);
9775                     } else {
9776                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
9777                                              tcg_shift, tcg_fpst);
9778                     }
9779                 } else {
9780                     if (is_signed) {
9781                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
9782                     } else {
9783                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
9784                     }
9785                 }
9786                 break;
9787             case MO_16:
9788                 if (fracbits) {
9789                     if (is_signed) {
9790                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
9791                                              tcg_shift, tcg_fpst);
9792                     } else {
9793                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
9794                                              tcg_shift, tcg_fpst);
9795                     }
9796                 } else {
9797                     if (is_signed) {
9798                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
9799                     } else {
9800                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
9801                     }
9802                 }
9803                 break;
9804             default:
9805                 g_assert_not_reached();
9806             }
9807 
9808             if (elements == 1) {
9809                 write_fp_sreg(s, rd, tcg_float);
9810             } else {
9811                 write_vec_element_i32(s, tcg_float, rd, pass, size);
9812             }
9813         }
9814     }
9815 
9816     clear_vec_high(s, elements << size == 16, rd);
9817 }
9818 
9819 /* UCVTF/SCVTF - Integer to FP conversion */
9820 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
9821                                          bool is_q, bool is_u,
9822                                          int immh, int immb, int opcode,
9823                                          int rn, int rd)
9824 {
9825     int size, elements, fracbits;
9826     int immhb = immh << 3 | immb;
9827 
9828     if (immh & 8) {
9829         size = MO_64;
9830         if (!is_scalar && !is_q) {
9831             unallocated_encoding(s);
9832             return;
9833         }
9834     } else if (immh & 4) {
9835         size = MO_32;
9836     } else if (immh & 2) {
9837         size = MO_16;
9838         if (!dc_isar_feature(aa64_fp16, s)) {
9839             unallocated_encoding(s);
9840             return;
9841         }
9842     } else {
9843         /* immh == 0 would be a failure of the decode logic */
9844         g_assert(immh == 1);
9845         unallocated_encoding(s);
9846         return;
9847     }
9848 
9849     if (is_scalar) {
9850         elements = 1;
9851     } else {
9852         elements = (8 << is_q) >> size;
9853     }
9854     fracbits = (16 << size) - immhb;
9855 
9856     if (!fp_access_check(s)) {
9857         return;
9858     }
9859 
9860     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
9861 }
9862 
9863 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
9864 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
9865                                          bool is_q, bool is_u,
9866                                          int immh, int immb, int rn, int rd)
9867 {
9868     int immhb = immh << 3 | immb;
9869     int pass, size, fracbits;
9870     TCGv_ptr tcg_fpstatus;
9871     TCGv_i32 tcg_rmode, tcg_shift;
9872 
9873     if (immh & 0x8) {
9874         size = MO_64;
9875         if (!is_scalar && !is_q) {
9876             unallocated_encoding(s);
9877             return;
9878         }
9879     } else if (immh & 0x4) {
9880         size = MO_32;
9881     } else if (immh & 0x2) {
9882         size = MO_16;
9883         if (!dc_isar_feature(aa64_fp16, s)) {
9884             unallocated_encoding(s);
9885             return;
9886         }
9887     } else {
9888         /* Should have split out AdvSIMD modified immediate earlier.  */
9889         assert(immh == 1);
9890         unallocated_encoding(s);
9891         return;
9892     }
9893 
9894     if (!fp_access_check(s)) {
9895         return;
9896     }
9897 
9898     assert(!(is_scalar && is_q));
9899 
9900     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9901     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
9902     fracbits = (16 << size) - immhb;
9903     tcg_shift = tcg_constant_i32(fracbits);
9904 
9905     if (size == MO_64) {
9906         int maxpass = is_scalar ? 1 : 2;
9907 
9908         for (pass = 0; pass < maxpass; pass++) {
9909             TCGv_i64 tcg_op = tcg_temp_new_i64();
9910 
9911             read_vec_element(s, tcg_op, rn, pass, MO_64);
9912             if (is_u) {
9913                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9914             } else {
9915                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9916             }
9917             write_vec_element(s, tcg_op, rd, pass, MO_64);
9918         }
9919         clear_vec_high(s, is_q, rd);
9920     } else {
9921         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
9922         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
9923 
9924         switch (size) {
9925         case MO_16:
9926             if (is_u) {
9927                 fn = gen_helper_vfp_touhh;
9928             } else {
9929                 fn = gen_helper_vfp_toshh;
9930             }
9931             break;
9932         case MO_32:
9933             if (is_u) {
9934                 fn = gen_helper_vfp_touls;
9935             } else {
9936                 fn = gen_helper_vfp_tosls;
9937             }
9938             break;
9939         default:
9940             g_assert_not_reached();
9941         }
9942 
9943         for (pass = 0; pass < maxpass; pass++) {
9944             TCGv_i32 tcg_op = tcg_temp_new_i32();
9945 
9946             read_vec_element_i32(s, tcg_op, rn, pass, size);
9947             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9948             if (is_scalar) {
9949                 if (size == MO_16 && !is_u) {
9950                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
9951                 }
9952                 write_fp_sreg(s, rd, tcg_op);
9953             } else {
9954                 write_vec_element_i32(s, tcg_op, rd, pass, size);
9955             }
9956         }
9957         if (!is_scalar) {
9958             clear_vec_high(s, is_q, rd);
9959         }
9960     }
9961 
9962     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9963 }
9964 
9965 /* AdvSIMD scalar shift by immediate
9966  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
9967  * +-----+---+-------------+------+------+--------+---+------+------+
9968  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9969  * +-----+---+-------------+------+------+--------+---+------+------+
9970  *
9971  * This is the scalar version so it works on a fixed sized registers
9972  */
9973 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
9974 {
9975     int rd = extract32(insn, 0, 5);
9976     int rn = extract32(insn, 5, 5);
9977     int opcode = extract32(insn, 11, 5);
9978     int immb = extract32(insn, 16, 3);
9979     int immh = extract32(insn, 19, 4);
9980     bool is_u = extract32(insn, 29, 1);
9981 
9982     if (immh == 0) {
9983         unallocated_encoding(s);
9984         return;
9985     }
9986 
9987     switch (opcode) {
9988     case 0x08: /* SRI */
9989         if (!is_u) {
9990             unallocated_encoding(s);
9991             return;
9992         }
9993         /* fall through */
9994     case 0x00: /* SSHR / USHR */
9995     case 0x02: /* SSRA / USRA */
9996     case 0x04: /* SRSHR / URSHR */
9997     case 0x06: /* SRSRA / URSRA */
9998         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
9999         break;
10000     case 0x0a: /* SHL / SLI */
10001         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
10002         break;
10003     case 0x1c: /* SCVTF, UCVTF */
10004         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
10005                                      opcode, rn, rd);
10006         break;
10007     case 0x10: /* SQSHRUN, SQSHRUN2 */
10008     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
10009         if (!is_u) {
10010             unallocated_encoding(s);
10011             return;
10012         }
10013         handle_vec_simd_sqshrn(s, true, false, false, true,
10014                                immh, immb, opcode, rn, rd);
10015         break;
10016     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
10017     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
10018         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
10019                                immh, immb, opcode, rn, rd);
10020         break;
10021     case 0xc: /* SQSHLU */
10022         if (!is_u) {
10023             unallocated_encoding(s);
10024             return;
10025         }
10026         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
10027         break;
10028     case 0xe: /* SQSHL, UQSHL */
10029         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
10030         break;
10031     case 0x1f: /* FCVTZS, FCVTZU */
10032         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
10033         break;
10034     default:
10035         unallocated_encoding(s);
10036         break;
10037     }
10038 }
10039 
10040 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
10041                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
10042                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
10043 {
10044     /* Handle 64->64 opcodes which are shared between the scalar and
10045      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
10046      * is valid in either group and also the double-precision fp ops.
10047      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
10048      * requires them.
10049      */
10050     TCGCond cond;
10051 
10052     switch (opcode) {
10053     case 0x4: /* CLS, CLZ */
10054         if (u) {
10055             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
10056         } else {
10057             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
10058         }
10059         break;
10060     case 0x5: /* NOT */
10061         /* This opcode is shared with CNT and RBIT but we have earlier
10062          * enforced that size == 3 if and only if this is the NOT insn.
10063          */
10064         tcg_gen_not_i64(tcg_rd, tcg_rn);
10065         break;
10066     case 0x7: /* SQABS, SQNEG */
10067         if (u) {
10068             gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
10069         } else {
10070             gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
10071         }
10072         break;
10073     case 0xa: /* CMLT */
10074         cond = TCG_COND_LT;
10075     do_cmop:
10076         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
10077         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
10078         break;
10079     case 0x8: /* CMGT, CMGE */
10080         cond = u ? TCG_COND_GE : TCG_COND_GT;
10081         goto do_cmop;
10082     case 0x9: /* CMEQ, CMLE */
10083         cond = u ? TCG_COND_LE : TCG_COND_EQ;
10084         goto do_cmop;
10085     case 0xb: /* ABS, NEG */
10086         if (u) {
10087             tcg_gen_neg_i64(tcg_rd, tcg_rn);
10088         } else {
10089             tcg_gen_abs_i64(tcg_rd, tcg_rn);
10090         }
10091         break;
10092     case 0x2f: /* FABS */
10093         gen_vfp_absd(tcg_rd, tcg_rn);
10094         break;
10095     case 0x6f: /* FNEG */
10096         gen_vfp_negd(tcg_rd, tcg_rn);
10097         break;
10098     case 0x7f: /* FSQRT */
10099         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
10100         break;
10101     case 0x1a: /* FCVTNS */
10102     case 0x1b: /* FCVTMS */
10103     case 0x1c: /* FCVTAS */
10104     case 0x3a: /* FCVTPS */
10105     case 0x3b: /* FCVTZS */
10106         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10107         break;
10108     case 0x5a: /* FCVTNU */
10109     case 0x5b: /* FCVTMU */
10110     case 0x5c: /* FCVTAU */
10111     case 0x7a: /* FCVTPU */
10112     case 0x7b: /* FCVTZU */
10113         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10114         break;
10115     case 0x18: /* FRINTN */
10116     case 0x19: /* FRINTM */
10117     case 0x38: /* FRINTP */
10118     case 0x39: /* FRINTZ */
10119     case 0x58: /* FRINTA */
10120     case 0x79: /* FRINTI */
10121         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
10122         break;
10123     case 0x59: /* FRINTX */
10124         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
10125         break;
10126     case 0x1e: /* FRINT32Z */
10127     case 0x5e: /* FRINT32X */
10128         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
10129         break;
10130     case 0x1f: /* FRINT64Z */
10131     case 0x5f: /* FRINT64X */
10132         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
10133         break;
10134     default:
10135         g_assert_not_reached();
10136     }
10137 }
10138 
10139 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
10140                                    bool is_scalar, bool is_u, bool is_q,
10141                                    int size, int rn, int rd)
10142 {
10143     bool is_double = (size == MO_64);
10144     TCGv_ptr fpst;
10145 
10146     if (!fp_access_check(s)) {
10147         return;
10148     }
10149 
10150     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
10151 
10152     if (is_double) {
10153         TCGv_i64 tcg_op = tcg_temp_new_i64();
10154         TCGv_i64 tcg_zero = tcg_constant_i64(0);
10155         TCGv_i64 tcg_res = tcg_temp_new_i64();
10156         NeonGenTwoDoubleOpFn *genfn;
10157         bool swap = false;
10158         int pass;
10159 
10160         switch (opcode) {
10161         case 0x2e: /* FCMLT (zero) */
10162             swap = true;
10163             /* fallthrough */
10164         case 0x2c: /* FCMGT (zero) */
10165             genfn = gen_helper_neon_cgt_f64;
10166             break;
10167         case 0x2d: /* FCMEQ (zero) */
10168             genfn = gen_helper_neon_ceq_f64;
10169             break;
10170         case 0x6d: /* FCMLE (zero) */
10171             swap = true;
10172             /* fall through */
10173         case 0x6c: /* FCMGE (zero) */
10174             genfn = gen_helper_neon_cge_f64;
10175             break;
10176         default:
10177             g_assert_not_reached();
10178         }
10179 
10180         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10181             read_vec_element(s, tcg_op, rn, pass, MO_64);
10182             if (swap) {
10183                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10184             } else {
10185                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10186             }
10187             write_vec_element(s, tcg_res, rd, pass, MO_64);
10188         }
10189 
10190         clear_vec_high(s, !is_scalar, rd);
10191     } else {
10192         TCGv_i32 tcg_op = tcg_temp_new_i32();
10193         TCGv_i32 tcg_zero = tcg_constant_i32(0);
10194         TCGv_i32 tcg_res = tcg_temp_new_i32();
10195         NeonGenTwoSingleOpFn *genfn;
10196         bool swap = false;
10197         int pass, maxpasses;
10198 
10199         if (size == MO_16) {
10200             switch (opcode) {
10201             case 0x2e: /* FCMLT (zero) */
10202                 swap = true;
10203                 /* fall through */
10204             case 0x2c: /* FCMGT (zero) */
10205                 genfn = gen_helper_advsimd_cgt_f16;
10206                 break;
10207             case 0x2d: /* FCMEQ (zero) */
10208                 genfn = gen_helper_advsimd_ceq_f16;
10209                 break;
10210             case 0x6d: /* FCMLE (zero) */
10211                 swap = true;
10212                 /* fall through */
10213             case 0x6c: /* FCMGE (zero) */
10214                 genfn = gen_helper_advsimd_cge_f16;
10215                 break;
10216             default:
10217                 g_assert_not_reached();
10218             }
10219         } else {
10220             switch (opcode) {
10221             case 0x2e: /* FCMLT (zero) */
10222                 swap = true;
10223                 /* fall through */
10224             case 0x2c: /* FCMGT (zero) */
10225                 genfn = gen_helper_neon_cgt_f32;
10226                 break;
10227             case 0x2d: /* FCMEQ (zero) */
10228                 genfn = gen_helper_neon_ceq_f32;
10229                 break;
10230             case 0x6d: /* FCMLE (zero) */
10231                 swap = true;
10232                 /* fall through */
10233             case 0x6c: /* FCMGE (zero) */
10234                 genfn = gen_helper_neon_cge_f32;
10235                 break;
10236             default:
10237                 g_assert_not_reached();
10238             }
10239         }
10240 
10241         if (is_scalar) {
10242             maxpasses = 1;
10243         } else {
10244             int vector_size = 8 << is_q;
10245             maxpasses = vector_size >> size;
10246         }
10247 
10248         for (pass = 0; pass < maxpasses; pass++) {
10249             read_vec_element_i32(s, tcg_op, rn, pass, size);
10250             if (swap) {
10251                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10252             } else {
10253                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10254             }
10255             if (is_scalar) {
10256                 write_fp_sreg(s, rd, tcg_res);
10257             } else {
10258                 write_vec_element_i32(s, tcg_res, rd, pass, size);
10259             }
10260         }
10261 
10262         if (!is_scalar) {
10263             clear_vec_high(s, is_q, rd);
10264         }
10265     }
10266 }
10267 
10268 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
10269                                     bool is_scalar, bool is_u, bool is_q,
10270                                     int size, int rn, int rd)
10271 {
10272     bool is_double = (size == 3);
10273     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10274 
10275     if (is_double) {
10276         TCGv_i64 tcg_op = tcg_temp_new_i64();
10277         TCGv_i64 tcg_res = tcg_temp_new_i64();
10278         int pass;
10279 
10280         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10281             read_vec_element(s, tcg_op, rn, pass, MO_64);
10282             switch (opcode) {
10283             case 0x3d: /* FRECPE */
10284                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
10285                 break;
10286             case 0x3f: /* FRECPX */
10287                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
10288                 break;
10289             case 0x7d: /* FRSQRTE */
10290                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
10291                 break;
10292             default:
10293                 g_assert_not_reached();
10294             }
10295             write_vec_element(s, tcg_res, rd, pass, MO_64);
10296         }
10297         clear_vec_high(s, !is_scalar, rd);
10298     } else {
10299         TCGv_i32 tcg_op = tcg_temp_new_i32();
10300         TCGv_i32 tcg_res = tcg_temp_new_i32();
10301         int pass, maxpasses;
10302 
10303         if (is_scalar) {
10304             maxpasses = 1;
10305         } else {
10306             maxpasses = is_q ? 4 : 2;
10307         }
10308 
10309         for (pass = 0; pass < maxpasses; pass++) {
10310             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10311 
10312             switch (opcode) {
10313             case 0x3c: /* URECPE */
10314                 gen_helper_recpe_u32(tcg_res, tcg_op);
10315                 break;
10316             case 0x3d: /* FRECPE */
10317                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
10318                 break;
10319             case 0x3f: /* FRECPX */
10320                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
10321                 break;
10322             case 0x7d: /* FRSQRTE */
10323                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
10324                 break;
10325             default:
10326                 g_assert_not_reached();
10327             }
10328 
10329             if (is_scalar) {
10330                 write_fp_sreg(s, rd, tcg_res);
10331             } else {
10332                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10333             }
10334         }
10335         if (!is_scalar) {
10336             clear_vec_high(s, is_q, rd);
10337         }
10338     }
10339 }
10340 
10341 static void handle_2misc_narrow(DisasContext *s, bool scalar,
10342                                 int opcode, bool u, bool is_q,
10343                                 int size, int rn, int rd)
10344 {
10345     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
10346      * in the source becomes a size element in the destination).
10347      */
10348     int pass;
10349     TCGv_i32 tcg_res[2];
10350     int destelt = is_q ? 2 : 0;
10351     int passes = scalar ? 1 : 2;
10352 
10353     if (scalar) {
10354         tcg_res[1] = tcg_constant_i32(0);
10355     }
10356 
10357     for (pass = 0; pass < passes; pass++) {
10358         TCGv_i64 tcg_op = tcg_temp_new_i64();
10359         NeonGenNarrowFn *genfn = NULL;
10360         NeonGenNarrowEnvFn *genenvfn = NULL;
10361 
10362         if (scalar) {
10363             read_vec_element(s, tcg_op, rn, pass, size + 1);
10364         } else {
10365             read_vec_element(s, tcg_op, rn, pass, MO_64);
10366         }
10367         tcg_res[pass] = tcg_temp_new_i32();
10368 
10369         switch (opcode) {
10370         case 0x12: /* XTN, SQXTUN */
10371         {
10372             static NeonGenNarrowFn * const xtnfns[3] = {
10373                 gen_helper_neon_narrow_u8,
10374                 gen_helper_neon_narrow_u16,
10375                 tcg_gen_extrl_i64_i32,
10376             };
10377             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
10378                 gen_helper_neon_unarrow_sat8,
10379                 gen_helper_neon_unarrow_sat16,
10380                 gen_helper_neon_unarrow_sat32,
10381             };
10382             if (u) {
10383                 genenvfn = sqxtunfns[size];
10384             } else {
10385                 genfn = xtnfns[size];
10386             }
10387             break;
10388         }
10389         case 0x14: /* SQXTN, UQXTN */
10390         {
10391             static NeonGenNarrowEnvFn * const fns[3][2] = {
10392                 { gen_helper_neon_narrow_sat_s8,
10393                   gen_helper_neon_narrow_sat_u8 },
10394                 { gen_helper_neon_narrow_sat_s16,
10395                   gen_helper_neon_narrow_sat_u16 },
10396                 { gen_helper_neon_narrow_sat_s32,
10397                   gen_helper_neon_narrow_sat_u32 },
10398             };
10399             genenvfn = fns[size][u];
10400             break;
10401         }
10402         case 0x16: /* FCVTN, FCVTN2 */
10403             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
10404             if (size == 2) {
10405                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
10406             } else {
10407                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
10408                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
10409                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10410                 TCGv_i32 ahp = get_ahp_flag();
10411 
10412                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
10413                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10414                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10415                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
10416             }
10417             break;
10418         case 0x36: /* BFCVTN, BFCVTN2 */
10419             {
10420                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10421                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
10422             }
10423             break;
10424         case 0x56:  /* FCVTXN, FCVTXN2 */
10425             /* 64 bit to 32 bit float conversion
10426              * with von Neumann rounding (round to odd)
10427              */
10428             assert(size == 2);
10429             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
10430             break;
10431         default:
10432             g_assert_not_reached();
10433         }
10434 
10435         if (genfn) {
10436             genfn(tcg_res[pass], tcg_op);
10437         } else if (genenvfn) {
10438             genenvfn(tcg_res[pass], tcg_env, tcg_op);
10439         }
10440     }
10441 
10442     for (pass = 0; pass < 2; pass++) {
10443         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
10444     }
10445     clear_vec_high(s, is_q, rd);
10446 }
10447 
10448 /* AdvSIMD scalar two reg misc
10449  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10450  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10451  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10452  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10453  */
10454 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10455 {
10456     int rd = extract32(insn, 0, 5);
10457     int rn = extract32(insn, 5, 5);
10458     int opcode = extract32(insn, 12, 5);
10459     int size = extract32(insn, 22, 2);
10460     bool u = extract32(insn, 29, 1);
10461     bool is_fcvt = false;
10462     int rmode;
10463     TCGv_i32 tcg_rmode;
10464     TCGv_ptr tcg_fpstatus;
10465 
10466     switch (opcode) {
10467     case 0x7: /* SQABS / SQNEG */
10468         break;
10469     case 0xa: /* CMLT */
10470         if (u) {
10471             unallocated_encoding(s);
10472             return;
10473         }
10474         /* fall through */
10475     case 0x8: /* CMGT, CMGE */
10476     case 0x9: /* CMEQ, CMLE */
10477     case 0xb: /* ABS, NEG */
10478         if (size != 3) {
10479             unallocated_encoding(s);
10480             return;
10481         }
10482         break;
10483     case 0x12: /* SQXTUN */
10484         if (!u) {
10485             unallocated_encoding(s);
10486             return;
10487         }
10488         /* fall through */
10489     case 0x14: /* SQXTN, UQXTN */
10490         if (size == 3) {
10491             unallocated_encoding(s);
10492             return;
10493         }
10494         if (!fp_access_check(s)) {
10495             return;
10496         }
10497         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10498         return;
10499     case 0xc ... 0xf:
10500     case 0x16 ... 0x1d:
10501     case 0x1f:
10502         /* Floating point: U, size[1] and opcode indicate operation;
10503          * size[0] indicates single or double precision.
10504          */
10505         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10506         size = extract32(size, 0, 1) ? 3 : 2;
10507         switch (opcode) {
10508         case 0x2c: /* FCMGT (zero) */
10509         case 0x2d: /* FCMEQ (zero) */
10510         case 0x2e: /* FCMLT (zero) */
10511         case 0x6c: /* FCMGE (zero) */
10512         case 0x6d: /* FCMLE (zero) */
10513             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10514             return;
10515         case 0x1d: /* SCVTF */
10516         case 0x5d: /* UCVTF */
10517         {
10518             bool is_signed = (opcode == 0x1d);
10519             if (!fp_access_check(s)) {
10520                 return;
10521             }
10522             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10523             return;
10524         }
10525         case 0x3d: /* FRECPE */
10526         case 0x3f: /* FRECPX */
10527         case 0x7d: /* FRSQRTE */
10528             if (!fp_access_check(s)) {
10529                 return;
10530             }
10531             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10532             return;
10533         case 0x1a: /* FCVTNS */
10534         case 0x1b: /* FCVTMS */
10535         case 0x3a: /* FCVTPS */
10536         case 0x3b: /* FCVTZS */
10537         case 0x5a: /* FCVTNU */
10538         case 0x5b: /* FCVTMU */
10539         case 0x7a: /* FCVTPU */
10540         case 0x7b: /* FCVTZU */
10541             is_fcvt = true;
10542             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10543             break;
10544         case 0x1c: /* FCVTAS */
10545         case 0x5c: /* FCVTAU */
10546             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10547             is_fcvt = true;
10548             rmode = FPROUNDING_TIEAWAY;
10549             break;
10550         case 0x56: /* FCVTXN, FCVTXN2 */
10551             if (size == 2) {
10552                 unallocated_encoding(s);
10553                 return;
10554             }
10555             if (!fp_access_check(s)) {
10556                 return;
10557             }
10558             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10559             return;
10560         default:
10561             unallocated_encoding(s);
10562             return;
10563         }
10564         break;
10565     default:
10566     case 0x3: /* USQADD / SUQADD */
10567         unallocated_encoding(s);
10568         return;
10569     }
10570 
10571     if (!fp_access_check(s)) {
10572         return;
10573     }
10574 
10575     if (is_fcvt) {
10576         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10577         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10578     } else {
10579         tcg_fpstatus = NULL;
10580         tcg_rmode = NULL;
10581     }
10582 
10583     if (size == 3) {
10584         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10585         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10586 
10587         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10588         write_fp_dreg(s, rd, tcg_rd);
10589     } else {
10590         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10591         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10592 
10593         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10594 
10595         switch (opcode) {
10596         case 0x7: /* SQABS, SQNEG */
10597         {
10598             NeonGenOneOpEnvFn *genfn;
10599             static NeonGenOneOpEnvFn * const fns[3][2] = {
10600                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10601                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10602                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10603             };
10604             genfn = fns[size][u];
10605             genfn(tcg_rd, tcg_env, tcg_rn);
10606             break;
10607         }
10608         case 0x1a: /* FCVTNS */
10609         case 0x1b: /* FCVTMS */
10610         case 0x1c: /* FCVTAS */
10611         case 0x3a: /* FCVTPS */
10612         case 0x3b: /* FCVTZS */
10613             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10614                                  tcg_fpstatus);
10615             break;
10616         case 0x5a: /* FCVTNU */
10617         case 0x5b: /* FCVTMU */
10618         case 0x5c: /* FCVTAU */
10619         case 0x7a: /* FCVTPU */
10620         case 0x7b: /* FCVTZU */
10621             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10622                                  tcg_fpstatus);
10623             break;
10624         default:
10625             g_assert_not_reached();
10626         }
10627 
10628         write_fp_sreg(s, rd, tcg_rd);
10629     }
10630 
10631     if (is_fcvt) {
10632         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10633     }
10634 }
10635 
10636 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10637 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10638                                  int immh, int immb, int opcode, int rn, int rd)
10639 {
10640     int size = 32 - clz32(immh) - 1;
10641     int immhb = immh << 3 | immb;
10642     int shift = 2 * (8 << size) - immhb;
10643     GVecGen2iFn *gvec_fn;
10644 
10645     if (extract32(immh, 3, 1) && !is_q) {
10646         unallocated_encoding(s);
10647         return;
10648     }
10649     tcg_debug_assert(size <= 3);
10650 
10651     if (!fp_access_check(s)) {
10652         return;
10653     }
10654 
10655     switch (opcode) {
10656     case 0x02: /* SSRA / USRA (accumulate) */
10657         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10658         break;
10659 
10660     case 0x08: /* SRI */
10661         gvec_fn = gen_gvec_sri;
10662         break;
10663 
10664     case 0x00: /* SSHR / USHR */
10665         if (is_u) {
10666             if (shift == 8 << size) {
10667                 /* Shift count the same size as element size produces zero.  */
10668                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10669                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10670                 return;
10671             }
10672             gvec_fn = tcg_gen_gvec_shri;
10673         } else {
10674             /* Shift count the same size as element size produces all sign.  */
10675             if (shift == 8 << size) {
10676                 shift -= 1;
10677             }
10678             gvec_fn = tcg_gen_gvec_sari;
10679         }
10680         break;
10681 
10682     case 0x04: /* SRSHR / URSHR (rounding) */
10683         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10684         break;
10685 
10686     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10687         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10688         break;
10689 
10690     default:
10691         g_assert_not_reached();
10692     }
10693 
10694     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10695 }
10696 
10697 /* SHL/SLI - Vector shift left */
10698 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10699                                  int immh, int immb, int opcode, int rn, int rd)
10700 {
10701     int size = 32 - clz32(immh) - 1;
10702     int immhb = immh << 3 | immb;
10703     int shift = immhb - (8 << size);
10704 
10705     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10706     assert(size >= 0 && size <= 3);
10707 
10708     if (extract32(immh, 3, 1) && !is_q) {
10709         unallocated_encoding(s);
10710         return;
10711     }
10712 
10713     if (!fp_access_check(s)) {
10714         return;
10715     }
10716 
10717     if (insert) {
10718         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10719     } else {
10720         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10721     }
10722 }
10723 
10724 /* USHLL/SHLL - Vector shift left with widening */
10725 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10726                                  int immh, int immb, int opcode, int rn, int rd)
10727 {
10728     int size = 32 - clz32(immh) - 1;
10729     int immhb = immh << 3 | immb;
10730     int shift = immhb - (8 << size);
10731     int dsize = 64;
10732     int esize = 8 << size;
10733     int elements = dsize/esize;
10734     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10735     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10736     int i;
10737 
10738     if (size >= 3) {
10739         unallocated_encoding(s);
10740         return;
10741     }
10742 
10743     if (!fp_access_check(s)) {
10744         return;
10745     }
10746 
10747     /* For the LL variants the store is larger than the load,
10748      * so if rd == rn we would overwrite parts of our input.
10749      * So load everything right now and use shifts in the main loop.
10750      */
10751     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10752 
10753     for (i = 0; i < elements; i++) {
10754         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10755         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10756         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10757         write_vec_element(s, tcg_rd, rd, i, size + 1);
10758     }
10759     clear_vec_high(s, true, rd);
10760 }
10761 
10762 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10763 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10764                                  int immh, int immb, int opcode, int rn, int rd)
10765 {
10766     int immhb = immh << 3 | immb;
10767     int size = 32 - clz32(immh) - 1;
10768     int dsize = 64;
10769     int esize = 8 << size;
10770     int elements = dsize/esize;
10771     int shift = (2 * esize) - immhb;
10772     bool round = extract32(opcode, 0, 1);
10773     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10774     TCGv_i64 tcg_round;
10775     int i;
10776 
10777     if (extract32(immh, 3, 1)) {
10778         unallocated_encoding(s);
10779         return;
10780     }
10781 
10782     if (!fp_access_check(s)) {
10783         return;
10784     }
10785 
10786     tcg_rn = tcg_temp_new_i64();
10787     tcg_rd = tcg_temp_new_i64();
10788     tcg_final = tcg_temp_new_i64();
10789     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10790 
10791     if (round) {
10792         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10793     } else {
10794         tcg_round = NULL;
10795     }
10796 
10797     for (i = 0; i < elements; i++) {
10798         read_vec_element(s, tcg_rn, rn, i, size+1);
10799         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10800                                 false, true, size+1, shift);
10801 
10802         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10803     }
10804 
10805     if (!is_q) {
10806         write_vec_element(s, tcg_final, rd, 0, MO_64);
10807     } else {
10808         write_vec_element(s, tcg_final, rd, 1, MO_64);
10809     }
10810 
10811     clear_vec_high(s, is_q, rd);
10812 }
10813 
10814 
10815 /* AdvSIMD shift by immediate
10816  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10817  * +---+---+---+-------------+------+------+--------+---+------+------+
10818  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10819  * +---+---+---+-------------+------+------+--------+---+------+------+
10820  */
10821 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10822 {
10823     int rd = extract32(insn, 0, 5);
10824     int rn = extract32(insn, 5, 5);
10825     int opcode = extract32(insn, 11, 5);
10826     int immb = extract32(insn, 16, 3);
10827     int immh = extract32(insn, 19, 4);
10828     bool is_u = extract32(insn, 29, 1);
10829     bool is_q = extract32(insn, 30, 1);
10830 
10831     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10832     assert(immh != 0);
10833 
10834     switch (opcode) {
10835     case 0x08: /* SRI */
10836         if (!is_u) {
10837             unallocated_encoding(s);
10838             return;
10839         }
10840         /* fall through */
10841     case 0x00: /* SSHR / USHR */
10842     case 0x02: /* SSRA / USRA (accumulate) */
10843     case 0x04: /* SRSHR / URSHR (rounding) */
10844     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10845         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10846         break;
10847     case 0x0a: /* SHL / SLI */
10848         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10849         break;
10850     case 0x10: /* SHRN */
10851     case 0x11: /* RSHRN / SQRSHRUN */
10852         if (is_u) {
10853             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10854                                    opcode, rn, rd);
10855         } else {
10856             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10857         }
10858         break;
10859     case 0x12: /* SQSHRN / UQSHRN */
10860     case 0x13: /* SQRSHRN / UQRSHRN */
10861         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10862                                opcode, rn, rd);
10863         break;
10864     case 0x14: /* SSHLL / USHLL */
10865         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10866         break;
10867     case 0x1c: /* SCVTF / UCVTF */
10868         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10869                                      opcode, rn, rd);
10870         break;
10871     case 0xc: /* SQSHLU */
10872         if (!is_u) {
10873             unallocated_encoding(s);
10874             return;
10875         }
10876         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10877         break;
10878     case 0xe: /* SQSHL, UQSHL */
10879         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10880         break;
10881     case 0x1f: /* FCVTZS/ FCVTZU */
10882         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10883         return;
10884     default:
10885         unallocated_encoding(s);
10886         return;
10887     }
10888 }
10889 
10890 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
10891                                   int size, int rn, int rd)
10892 {
10893     /* Handle 2-reg-misc ops which are widening (so each size element
10894      * in the source becomes a 2*size element in the destination.
10895      * The only instruction like this is FCVTL.
10896      */
10897     int pass;
10898 
10899     if (size == 3) {
10900         /* 32 -> 64 bit fp conversion */
10901         TCGv_i64 tcg_res[2];
10902         int srcelt = is_q ? 2 : 0;
10903 
10904         for (pass = 0; pass < 2; pass++) {
10905             TCGv_i32 tcg_op = tcg_temp_new_i32();
10906             tcg_res[pass] = tcg_temp_new_i64();
10907 
10908             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
10909             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
10910         }
10911         for (pass = 0; pass < 2; pass++) {
10912             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10913         }
10914     } else {
10915         /* 16 -> 32 bit fp conversion */
10916         int srcelt = is_q ? 4 : 0;
10917         TCGv_i32 tcg_res[4];
10918         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10919         TCGv_i32 ahp = get_ahp_flag();
10920 
10921         for (pass = 0; pass < 4; pass++) {
10922             tcg_res[pass] = tcg_temp_new_i32();
10923 
10924             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
10925             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10926                                            fpst, ahp);
10927         }
10928         for (pass = 0; pass < 4; pass++) {
10929             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10930         }
10931     }
10932 }
10933 
10934 static void handle_rev(DisasContext *s, int opcode, bool u,
10935                        bool is_q, int size, int rn, int rd)
10936 {
10937     int op = (opcode << 1) | u;
10938     int opsz = op + size;
10939     int grp_size = 3 - opsz;
10940     int dsize = is_q ? 128 : 64;
10941     int i;
10942 
10943     if (opsz >= 3) {
10944         unallocated_encoding(s);
10945         return;
10946     }
10947 
10948     if (!fp_access_check(s)) {
10949         return;
10950     }
10951 
10952     if (size == 0) {
10953         /* Special case bytes, use bswap op on each group of elements */
10954         int groups = dsize / (8 << grp_size);
10955 
10956         for (i = 0; i < groups; i++) {
10957             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
10958 
10959             read_vec_element(s, tcg_tmp, rn, i, grp_size);
10960             switch (grp_size) {
10961             case MO_16:
10962                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10963                 break;
10964             case MO_32:
10965                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10966                 break;
10967             case MO_64:
10968                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
10969                 break;
10970             default:
10971                 g_assert_not_reached();
10972             }
10973             write_vec_element(s, tcg_tmp, rd, i, grp_size);
10974         }
10975         clear_vec_high(s, is_q, rd);
10976     } else {
10977         int revmask = (1 << grp_size) - 1;
10978         int esize = 8 << size;
10979         int elements = dsize / esize;
10980         TCGv_i64 tcg_rn = tcg_temp_new_i64();
10981         TCGv_i64 tcg_rd[2];
10982 
10983         for (i = 0; i < 2; i++) {
10984             tcg_rd[i] = tcg_temp_new_i64();
10985             tcg_gen_movi_i64(tcg_rd[i], 0);
10986         }
10987 
10988         for (i = 0; i < elements; i++) {
10989             int e_rev = (i & 0xf) ^ revmask;
10990             int w = (e_rev * esize) / 64;
10991             int o = (e_rev * esize) % 64;
10992 
10993             read_vec_element(s, tcg_rn, rn, i, size);
10994             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
10995         }
10996 
10997         for (i = 0; i < 2; i++) {
10998             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
10999         }
11000         clear_vec_high(s, true, rd);
11001     }
11002 }
11003 
11004 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11005                                   bool is_q, int size, int rn, int rd)
11006 {
11007     /* Implement the pairwise operations from 2-misc:
11008      * SADDLP, UADDLP, SADALP, UADALP.
11009      * These all add pairs of elements in the input to produce a
11010      * double-width result element in the output (possibly accumulating).
11011      */
11012     bool accum = (opcode == 0x6);
11013     int maxpass = is_q ? 2 : 1;
11014     int pass;
11015     TCGv_i64 tcg_res[2];
11016 
11017     if (size == 2) {
11018         /* 32 + 32 -> 64 op */
11019         MemOp memop = size + (u ? 0 : MO_SIGN);
11020 
11021         for (pass = 0; pass < maxpass; pass++) {
11022             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11023             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11024 
11025             tcg_res[pass] = tcg_temp_new_i64();
11026 
11027             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11028             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11029             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11030             if (accum) {
11031                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11032                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11033             }
11034         }
11035     } else {
11036         for (pass = 0; pass < maxpass; pass++) {
11037             TCGv_i64 tcg_op = tcg_temp_new_i64();
11038             NeonGenOne64OpFn *genfn;
11039             static NeonGenOne64OpFn * const fns[2][2] = {
11040                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11041                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11042             };
11043 
11044             genfn = fns[size][u];
11045 
11046             tcg_res[pass] = tcg_temp_new_i64();
11047 
11048             read_vec_element(s, tcg_op, rn, pass, MO_64);
11049             genfn(tcg_res[pass], tcg_op);
11050 
11051             if (accum) {
11052                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11053                 if (size == 0) {
11054                     gen_helper_neon_addl_u16(tcg_res[pass],
11055                                              tcg_res[pass], tcg_op);
11056                 } else {
11057                     gen_helper_neon_addl_u32(tcg_res[pass],
11058                                              tcg_res[pass], tcg_op);
11059                 }
11060             }
11061         }
11062     }
11063     if (!is_q) {
11064         tcg_res[1] = tcg_constant_i64(0);
11065     }
11066     for (pass = 0; pass < 2; pass++) {
11067         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11068     }
11069 }
11070 
11071 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11072 {
11073     /* Implement SHLL and SHLL2 */
11074     int pass;
11075     int part = is_q ? 2 : 0;
11076     TCGv_i64 tcg_res[2];
11077 
11078     for (pass = 0; pass < 2; pass++) {
11079         static NeonGenWidenFn * const widenfns[3] = {
11080             gen_helper_neon_widen_u8,
11081             gen_helper_neon_widen_u16,
11082             tcg_gen_extu_i32_i64,
11083         };
11084         NeonGenWidenFn *widenfn = widenfns[size];
11085         TCGv_i32 tcg_op = tcg_temp_new_i32();
11086 
11087         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11088         tcg_res[pass] = tcg_temp_new_i64();
11089         widenfn(tcg_res[pass], tcg_op);
11090         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11091     }
11092 
11093     for (pass = 0; pass < 2; pass++) {
11094         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11095     }
11096 }
11097 
11098 /* AdvSIMD two reg misc
11099  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11100  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11101  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11102  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11103  */
11104 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11105 {
11106     int size = extract32(insn, 22, 2);
11107     int opcode = extract32(insn, 12, 5);
11108     bool u = extract32(insn, 29, 1);
11109     bool is_q = extract32(insn, 30, 1);
11110     int rn = extract32(insn, 5, 5);
11111     int rd = extract32(insn, 0, 5);
11112     bool need_fpstatus = false;
11113     int rmode = -1;
11114     TCGv_i32 tcg_rmode;
11115     TCGv_ptr tcg_fpstatus;
11116 
11117     switch (opcode) {
11118     case 0x0: /* REV64, REV32 */
11119     case 0x1: /* REV16 */
11120         handle_rev(s, opcode, u, is_q, size, rn, rd);
11121         return;
11122     case 0x5: /* CNT, NOT, RBIT */
11123         if (u && size == 0) {
11124             /* NOT */
11125             break;
11126         } else if (u && size == 1) {
11127             /* RBIT */
11128             break;
11129         } else if (!u && size == 0) {
11130             /* CNT */
11131             break;
11132         }
11133         unallocated_encoding(s);
11134         return;
11135     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11136     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11137         if (size == 3) {
11138             unallocated_encoding(s);
11139             return;
11140         }
11141         if (!fp_access_check(s)) {
11142             return;
11143         }
11144 
11145         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11146         return;
11147     case 0x4: /* CLS, CLZ */
11148         if (size == 3) {
11149             unallocated_encoding(s);
11150             return;
11151         }
11152         break;
11153     case 0x2: /* SADDLP, UADDLP */
11154     case 0x6: /* SADALP, UADALP */
11155         if (size == 3) {
11156             unallocated_encoding(s);
11157             return;
11158         }
11159         if (!fp_access_check(s)) {
11160             return;
11161         }
11162         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11163         return;
11164     case 0x13: /* SHLL, SHLL2 */
11165         if (u == 0 || size == 3) {
11166             unallocated_encoding(s);
11167             return;
11168         }
11169         if (!fp_access_check(s)) {
11170             return;
11171         }
11172         handle_shll(s, is_q, size, rn, rd);
11173         return;
11174     case 0xa: /* CMLT */
11175         if (u == 1) {
11176             unallocated_encoding(s);
11177             return;
11178         }
11179         /* fall through */
11180     case 0x8: /* CMGT, CMGE */
11181     case 0x9: /* CMEQ, CMLE */
11182     case 0xb: /* ABS, NEG */
11183         if (size == 3 && !is_q) {
11184             unallocated_encoding(s);
11185             return;
11186         }
11187         break;
11188     case 0x7: /* SQABS, SQNEG */
11189         if (size == 3 && !is_q) {
11190             unallocated_encoding(s);
11191             return;
11192         }
11193         break;
11194     case 0xc ... 0xf:
11195     case 0x16 ... 0x1f:
11196     {
11197         /* Floating point: U, size[1] and opcode indicate operation;
11198          * size[0] indicates single or double precision.
11199          */
11200         int is_double = extract32(size, 0, 1);
11201         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11202         size = is_double ? 3 : 2;
11203         switch (opcode) {
11204         case 0x2f: /* FABS */
11205         case 0x6f: /* FNEG */
11206             if (size == 3 && !is_q) {
11207                 unallocated_encoding(s);
11208                 return;
11209             }
11210             break;
11211         case 0x1d: /* SCVTF */
11212         case 0x5d: /* UCVTF */
11213         {
11214             bool is_signed = (opcode == 0x1d) ? true : false;
11215             int elements = is_double ? 2 : is_q ? 4 : 2;
11216             if (is_double && !is_q) {
11217                 unallocated_encoding(s);
11218                 return;
11219             }
11220             if (!fp_access_check(s)) {
11221                 return;
11222             }
11223             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11224             return;
11225         }
11226         case 0x2c: /* FCMGT (zero) */
11227         case 0x2d: /* FCMEQ (zero) */
11228         case 0x2e: /* FCMLT (zero) */
11229         case 0x6c: /* FCMGE (zero) */
11230         case 0x6d: /* FCMLE (zero) */
11231             if (size == 3 && !is_q) {
11232                 unallocated_encoding(s);
11233                 return;
11234             }
11235             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11236             return;
11237         case 0x7f: /* FSQRT */
11238             if (size == 3 && !is_q) {
11239                 unallocated_encoding(s);
11240                 return;
11241             }
11242             break;
11243         case 0x1a: /* FCVTNS */
11244         case 0x1b: /* FCVTMS */
11245         case 0x3a: /* FCVTPS */
11246         case 0x3b: /* FCVTZS */
11247         case 0x5a: /* FCVTNU */
11248         case 0x5b: /* FCVTMU */
11249         case 0x7a: /* FCVTPU */
11250         case 0x7b: /* FCVTZU */
11251             need_fpstatus = true;
11252             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11253             if (size == 3 && !is_q) {
11254                 unallocated_encoding(s);
11255                 return;
11256             }
11257             break;
11258         case 0x5c: /* FCVTAU */
11259         case 0x1c: /* FCVTAS */
11260             need_fpstatus = true;
11261             rmode = FPROUNDING_TIEAWAY;
11262             if (size == 3 && !is_q) {
11263                 unallocated_encoding(s);
11264                 return;
11265             }
11266             break;
11267         case 0x3c: /* URECPE */
11268             if (size == 3) {
11269                 unallocated_encoding(s);
11270                 return;
11271             }
11272             /* fall through */
11273         case 0x3d: /* FRECPE */
11274         case 0x7d: /* FRSQRTE */
11275             if (size == 3 && !is_q) {
11276                 unallocated_encoding(s);
11277                 return;
11278             }
11279             if (!fp_access_check(s)) {
11280                 return;
11281             }
11282             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11283             return;
11284         case 0x56: /* FCVTXN, FCVTXN2 */
11285             if (size == 2) {
11286                 unallocated_encoding(s);
11287                 return;
11288             }
11289             /* fall through */
11290         case 0x16: /* FCVTN, FCVTN2 */
11291             /* handle_2misc_narrow does a 2*size -> size operation, but these
11292              * instructions encode the source size rather than dest size.
11293              */
11294             if (!fp_access_check(s)) {
11295                 return;
11296             }
11297             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11298             return;
11299         case 0x36: /* BFCVTN, BFCVTN2 */
11300             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
11301                 unallocated_encoding(s);
11302                 return;
11303             }
11304             if (!fp_access_check(s)) {
11305                 return;
11306             }
11307             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11308             return;
11309         case 0x17: /* FCVTL, FCVTL2 */
11310             if (!fp_access_check(s)) {
11311                 return;
11312             }
11313             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11314             return;
11315         case 0x18: /* FRINTN */
11316         case 0x19: /* FRINTM */
11317         case 0x38: /* FRINTP */
11318         case 0x39: /* FRINTZ */
11319             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11320             /* fall through */
11321         case 0x59: /* FRINTX */
11322         case 0x79: /* FRINTI */
11323             need_fpstatus = true;
11324             if (size == 3 && !is_q) {
11325                 unallocated_encoding(s);
11326                 return;
11327             }
11328             break;
11329         case 0x58: /* FRINTA */
11330             rmode = FPROUNDING_TIEAWAY;
11331             need_fpstatus = true;
11332             if (size == 3 && !is_q) {
11333                 unallocated_encoding(s);
11334                 return;
11335             }
11336             break;
11337         case 0x7c: /* URSQRTE */
11338             if (size == 3) {
11339                 unallocated_encoding(s);
11340                 return;
11341             }
11342             break;
11343         case 0x1e: /* FRINT32Z */
11344         case 0x1f: /* FRINT64Z */
11345             rmode = FPROUNDING_ZERO;
11346             /* fall through */
11347         case 0x5e: /* FRINT32X */
11348         case 0x5f: /* FRINT64X */
11349             need_fpstatus = true;
11350             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
11351                 unallocated_encoding(s);
11352                 return;
11353             }
11354             break;
11355         default:
11356             unallocated_encoding(s);
11357             return;
11358         }
11359         break;
11360     }
11361     default:
11362     case 0x3: /* SUQADD, USQADD */
11363         unallocated_encoding(s);
11364         return;
11365     }
11366 
11367     if (!fp_access_check(s)) {
11368         return;
11369     }
11370 
11371     if (need_fpstatus || rmode >= 0) {
11372         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
11373     } else {
11374         tcg_fpstatus = NULL;
11375     }
11376     if (rmode >= 0) {
11377         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11378     } else {
11379         tcg_rmode = NULL;
11380     }
11381 
11382     switch (opcode) {
11383     case 0x5:
11384         if (u && size == 0) { /* NOT */
11385             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11386             return;
11387         }
11388         break;
11389     case 0x8: /* CMGT, CMGE */
11390         if (u) {
11391             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
11392         } else {
11393             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
11394         }
11395         return;
11396     case 0x9: /* CMEQ, CMLE */
11397         if (u) {
11398             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
11399         } else {
11400             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
11401         }
11402         return;
11403     case 0xa: /* CMLT */
11404         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
11405         return;
11406     case 0xb:
11407         if (u) { /* ABS, NEG */
11408             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11409         } else {
11410             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
11411         }
11412         return;
11413     }
11414 
11415     if (size == 3) {
11416         /* All 64-bit element operations can be shared with scalar 2misc */
11417         int pass;
11418 
11419         /* Coverity claims (size == 3 && !is_q) has been eliminated
11420          * from all paths leading to here.
11421          */
11422         tcg_debug_assert(is_q);
11423         for (pass = 0; pass < 2; pass++) {
11424             TCGv_i64 tcg_op = tcg_temp_new_i64();
11425             TCGv_i64 tcg_res = tcg_temp_new_i64();
11426 
11427             read_vec_element(s, tcg_op, rn, pass, MO_64);
11428 
11429             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11430                             tcg_rmode, tcg_fpstatus);
11431 
11432             write_vec_element(s, tcg_res, rd, pass, MO_64);
11433         }
11434     } else {
11435         int pass;
11436 
11437         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11438             TCGv_i32 tcg_op = tcg_temp_new_i32();
11439             TCGv_i32 tcg_res = tcg_temp_new_i32();
11440 
11441             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11442 
11443             if (size == 2) {
11444                 /* Special cases for 32 bit elements */
11445                 switch (opcode) {
11446                 case 0x4: /* CLS */
11447                     if (u) {
11448                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11449                     } else {
11450                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
11451                     }
11452                     break;
11453                 case 0x7: /* SQABS, SQNEG */
11454                     if (u) {
11455                         gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
11456                     } else {
11457                         gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
11458                     }
11459                     break;
11460                 case 0x2f: /* FABS */
11461                     gen_vfp_abss(tcg_res, tcg_op);
11462                     break;
11463                 case 0x6f: /* FNEG */
11464                     gen_vfp_negs(tcg_res, tcg_op);
11465                     break;
11466                 case 0x7f: /* FSQRT */
11467                     gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
11468                     break;
11469                 case 0x1a: /* FCVTNS */
11470                 case 0x1b: /* FCVTMS */
11471                 case 0x1c: /* FCVTAS */
11472                 case 0x3a: /* FCVTPS */
11473                 case 0x3b: /* FCVTZS */
11474                     gen_helper_vfp_tosls(tcg_res, tcg_op,
11475                                          tcg_constant_i32(0), tcg_fpstatus);
11476                     break;
11477                 case 0x5a: /* FCVTNU */
11478                 case 0x5b: /* FCVTMU */
11479                 case 0x5c: /* FCVTAU */
11480                 case 0x7a: /* FCVTPU */
11481                 case 0x7b: /* FCVTZU */
11482                     gen_helper_vfp_touls(tcg_res, tcg_op,
11483                                          tcg_constant_i32(0), tcg_fpstatus);
11484                     break;
11485                 case 0x18: /* FRINTN */
11486                 case 0x19: /* FRINTM */
11487                 case 0x38: /* FRINTP */
11488                 case 0x39: /* FRINTZ */
11489                 case 0x58: /* FRINTA */
11490                 case 0x79: /* FRINTI */
11491                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11492                     break;
11493                 case 0x59: /* FRINTX */
11494                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11495                     break;
11496                 case 0x7c: /* URSQRTE */
11497                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
11498                     break;
11499                 case 0x1e: /* FRINT32Z */
11500                 case 0x5e: /* FRINT32X */
11501                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
11502                     break;
11503                 case 0x1f: /* FRINT64Z */
11504                 case 0x5f: /* FRINT64X */
11505                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
11506                     break;
11507                 default:
11508                     g_assert_not_reached();
11509                 }
11510             } else {
11511                 /* Use helpers for 8 and 16 bit elements */
11512                 switch (opcode) {
11513                 case 0x5: /* CNT, RBIT */
11514                     /* For these two insns size is part of the opcode specifier
11515                      * (handled earlier); they always operate on byte elements.
11516                      */
11517                     if (u) {
11518                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11519                     } else {
11520                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11521                     }
11522                     break;
11523                 case 0x7: /* SQABS, SQNEG */
11524                 {
11525                     NeonGenOneOpEnvFn *genfn;
11526                     static NeonGenOneOpEnvFn * const fns[2][2] = {
11527                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11528                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11529                     };
11530                     genfn = fns[size][u];
11531                     genfn(tcg_res, tcg_env, tcg_op);
11532                     break;
11533                 }
11534                 case 0x4: /* CLS, CLZ */
11535                     if (u) {
11536                         if (size == 0) {
11537                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
11538                         } else {
11539                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
11540                         }
11541                     } else {
11542                         if (size == 0) {
11543                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
11544                         } else {
11545                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
11546                         }
11547                     }
11548                     break;
11549                 default:
11550                     g_assert_not_reached();
11551                 }
11552             }
11553 
11554             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11555         }
11556     }
11557     clear_vec_high(s, is_q, rd);
11558 
11559     if (tcg_rmode) {
11560         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11561     }
11562 }
11563 
11564 /* AdvSIMD [scalar] two register miscellaneous (FP16)
11565  *
11566  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
11567  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11568  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11569  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11570  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11571  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11572  *
11573  * This actually covers two groups where scalar access is governed by
11574  * bit 28. A bunch of the instructions (float to integral) only exist
11575  * in the vector form and are un-allocated for the scalar decode. Also
11576  * in the scalar decode Q is always 1.
11577  */
11578 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11579 {
11580     int fpop, opcode, a, u;
11581     int rn, rd;
11582     bool is_q;
11583     bool is_scalar;
11584     bool only_in_vector = false;
11585 
11586     int pass;
11587     TCGv_i32 tcg_rmode = NULL;
11588     TCGv_ptr tcg_fpstatus = NULL;
11589     bool need_fpst = true;
11590     int rmode = -1;
11591 
11592     if (!dc_isar_feature(aa64_fp16, s)) {
11593         unallocated_encoding(s);
11594         return;
11595     }
11596 
11597     rd = extract32(insn, 0, 5);
11598     rn = extract32(insn, 5, 5);
11599 
11600     a = extract32(insn, 23, 1);
11601     u = extract32(insn, 29, 1);
11602     is_scalar = extract32(insn, 28, 1);
11603     is_q = extract32(insn, 30, 1);
11604 
11605     opcode = extract32(insn, 12, 5);
11606     fpop = deposit32(opcode, 5, 1, a);
11607     fpop = deposit32(fpop, 6, 1, u);
11608 
11609     switch (fpop) {
11610     case 0x1d: /* SCVTF */
11611     case 0x5d: /* UCVTF */
11612     {
11613         int elements;
11614 
11615         if (is_scalar) {
11616             elements = 1;
11617         } else {
11618             elements = (is_q ? 8 : 4);
11619         }
11620 
11621         if (!fp_access_check(s)) {
11622             return;
11623         }
11624         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11625         return;
11626     }
11627     break;
11628     case 0x2c: /* FCMGT (zero) */
11629     case 0x2d: /* FCMEQ (zero) */
11630     case 0x2e: /* FCMLT (zero) */
11631     case 0x6c: /* FCMGE (zero) */
11632     case 0x6d: /* FCMLE (zero) */
11633         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11634         return;
11635     case 0x3d: /* FRECPE */
11636     case 0x3f: /* FRECPX */
11637         break;
11638     case 0x18: /* FRINTN */
11639         only_in_vector = true;
11640         rmode = FPROUNDING_TIEEVEN;
11641         break;
11642     case 0x19: /* FRINTM */
11643         only_in_vector = true;
11644         rmode = FPROUNDING_NEGINF;
11645         break;
11646     case 0x38: /* FRINTP */
11647         only_in_vector = true;
11648         rmode = FPROUNDING_POSINF;
11649         break;
11650     case 0x39: /* FRINTZ */
11651         only_in_vector = true;
11652         rmode = FPROUNDING_ZERO;
11653         break;
11654     case 0x58: /* FRINTA */
11655         only_in_vector = true;
11656         rmode = FPROUNDING_TIEAWAY;
11657         break;
11658     case 0x59: /* FRINTX */
11659     case 0x79: /* FRINTI */
11660         only_in_vector = true;
11661         /* current rounding mode */
11662         break;
11663     case 0x1a: /* FCVTNS */
11664         rmode = FPROUNDING_TIEEVEN;
11665         break;
11666     case 0x1b: /* FCVTMS */
11667         rmode = FPROUNDING_NEGINF;
11668         break;
11669     case 0x1c: /* FCVTAS */
11670         rmode = FPROUNDING_TIEAWAY;
11671         break;
11672     case 0x3a: /* FCVTPS */
11673         rmode = FPROUNDING_POSINF;
11674         break;
11675     case 0x3b: /* FCVTZS */
11676         rmode = FPROUNDING_ZERO;
11677         break;
11678     case 0x5a: /* FCVTNU */
11679         rmode = FPROUNDING_TIEEVEN;
11680         break;
11681     case 0x5b: /* FCVTMU */
11682         rmode = FPROUNDING_NEGINF;
11683         break;
11684     case 0x5c: /* FCVTAU */
11685         rmode = FPROUNDING_TIEAWAY;
11686         break;
11687     case 0x7a: /* FCVTPU */
11688         rmode = FPROUNDING_POSINF;
11689         break;
11690     case 0x7b: /* FCVTZU */
11691         rmode = FPROUNDING_ZERO;
11692         break;
11693     case 0x2f: /* FABS */
11694     case 0x6f: /* FNEG */
11695         need_fpst = false;
11696         break;
11697     case 0x7d: /* FRSQRTE */
11698     case 0x7f: /* FSQRT (vector) */
11699         break;
11700     default:
11701         unallocated_encoding(s);
11702         return;
11703     }
11704 
11705 
11706     /* Check additional constraints for the scalar encoding */
11707     if (is_scalar) {
11708         if (!is_q) {
11709             unallocated_encoding(s);
11710             return;
11711         }
11712         /* FRINTxx is only in the vector form */
11713         if (only_in_vector) {
11714             unallocated_encoding(s);
11715             return;
11716         }
11717     }
11718 
11719     if (!fp_access_check(s)) {
11720         return;
11721     }
11722 
11723     if (rmode >= 0 || need_fpst) {
11724         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
11725     }
11726 
11727     if (rmode >= 0) {
11728         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11729     }
11730 
11731     if (is_scalar) {
11732         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
11733         TCGv_i32 tcg_res = tcg_temp_new_i32();
11734 
11735         switch (fpop) {
11736         case 0x1a: /* FCVTNS */
11737         case 0x1b: /* FCVTMS */
11738         case 0x1c: /* FCVTAS */
11739         case 0x3a: /* FCVTPS */
11740         case 0x3b: /* FCVTZS */
11741             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11742             break;
11743         case 0x3d: /* FRECPE */
11744             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11745             break;
11746         case 0x3f: /* FRECPX */
11747             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
11748             break;
11749         case 0x5a: /* FCVTNU */
11750         case 0x5b: /* FCVTMU */
11751         case 0x5c: /* FCVTAU */
11752         case 0x7a: /* FCVTPU */
11753         case 0x7b: /* FCVTZU */
11754             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11755             break;
11756         case 0x6f: /* FNEG */
11757             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11758             break;
11759         case 0x7d: /* FRSQRTE */
11760             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11761             break;
11762         default:
11763             g_assert_not_reached();
11764         }
11765 
11766         /* limit any sign extension going on */
11767         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
11768         write_fp_sreg(s, rd, tcg_res);
11769     } else {
11770         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
11771             TCGv_i32 tcg_op = tcg_temp_new_i32();
11772             TCGv_i32 tcg_res = tcg_temp_new_i32();
11773 
11774             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
11775 
11776             switch (fpop) {
11777             case 0x1a: /* FCVTNS */
11778             case 0x1b: /* FCVTMS */
11779             case 0x1c: /* FCVTAS */
11780             case 0x3a: /* FCVTPS */
11781             case 0x3b: /* FCVTZS */
11782                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11783                 break;
11784             case 0x3d: /* FRECPE */
11785                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11786                 break;
11787             case 0x5a: /* FCVTNU */
11788             case 0x5b: /* FCVTMU */
11789             case 0x5c: /* FCVTAU */
11790             case 0x7a: /* FCVTPU */
11791             case 0x7b: /* FCVTZU */
11792                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11793                 break;
11794             case 0x18: /* FRINTN */
11795             case 0x19: /* FRINTM */
11796             case 0x38: /* FRINTP */
11797             case 0x39: /* FRINTZ */
11798             case 0x58: /* FRINTA */
11799             case 0x79: /* FRINTI */
11800                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
11801                 break;
11802             case 0x59: /* FRINTX */
11803                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
11804                 break;
11805             case 0x2f: /* FABS */
11806                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
11807                 break;
11808             case 0x6f: /* FNEG */
11809                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11810                 break;
11811             case 0x7d: /* FRSQRTE */
11812                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11813                 break;
11814             case 0x7f: /* FSQRT */
11815                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
11816                 break;
11817             default:
11818                 g_assert_not_reached();
11819             }
11820 
11821             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11822         }
11823 
11824         clear_vec_high(s, is_q, rd);
11825     }
11826 
11827     if (tcg_rmode) {
11828         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11829     }
11830 }
11831 
11832 /* C3.6 Data processing - SIMD, inc Crypto
11833  *
11834  * As the decode gets a little complex we are using a table based
11835  * approach for this part of the decode.
11836  */
11837 static const AArch64DecodeTable data_proc_simd[] = {
11838     /* pattern  ,  mask     ,  fn                        */
11839     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11840     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11841     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11842     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11843     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11844     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11845     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11846     { 0x2e000000, 0xbf208400, disas_simd_ext },
11847     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11848     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11849     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
11850     { 0x00000000, 0x00000000, NULL }
11851 };
11852 
11853 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11854 {
11855     /* Note that this is called with all non-FP cases from
11856      * table C3-6 so it must UNDEF for entries not specifically
11857      * allocated to instructions in that table.
11858      */
11859     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11860     if (fn) {
11861         fn(s, insn);
11862     } else {
11863         unallocated_encoding(s);
11864     }
11865 }
11866 
11867 /* C3.6 Data processing - SIMD and floating point */
11868 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11869 {
11870     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11871         disas_data_proc_fp(s, insn);
11872     } else {
11873         /* SIMD, including crypto */
11874         disas_data_proc_simd(s, insn);
11875     }
11876 }
11877 
11878 static bool trans_OK(DisasContext *s, arg_OK *a)
11879 {
11880     return true;
11881 }
11882 
11883 static bool trans_FAIL(DisasContext *s, arg_OK *a)
11884 {
11885     s->is_nonstreaming = true;
11886     return true;
11887 }
11888 
11889 /**
11890  * btype_destination_ok:
11891  * @insn: The instruction at the branch destination
11892  * @bt: SCTLR_ELx.BT
11893  * @btype: PSTATE.BTYPE, and is non-zero
11894  *
11895  * On a guarded page, there are a limited number of insns
11896  * that may be present at the branch target:
11897  *   - branch target identifiers,
11898  *   - paciasp, pacibsp,
11899  *   - BRK insn
11900  *   - HLT insn
11901  * Anything else causes a Branch Target Exception.
11902  *
11903  * Return true if the branch is compatible, false to raise BTITRAP.
11904  */
11905 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
11906 {
11907     if ((insn & 0xfffff01fu) == 0xd503201fu) {
11908         /* HINT space */
11909         switch (extract32(insn, 5, 7)) {
11910         case 0b011001: /* PACIASP */
11911         case 0b011011: /* PACIBSP */
11912             /*
11913              * If SCTLR_ELx.BT, then PACI*SP are not compatible
11914              * with btype == 3.  Otherwise all btype are ok.
11915              */
11916             return !bt || btype != 3;
11917         case 0b100000: /* BTI */
11918             /* Not compatible with any btype.  */
11919             return false;
11920         case 0b100010: /* BTI c */
11921             /* Not compatible with btype == 3 */
11922             return btype != 3;
11923         case 0b100100: /* BTI j */
11924             /* Not compatible with btype == 2 */
11925             return btype != 2;
11926         case 0b100110: /* BTI jc */
11927             /* Compatible with any btype.  */
11928             return true;
11929         }
11930     } else {
11931         switch (insn & 0xffe0001fu) {
11932         case 0xd4200000u: /* BRK */
11933         case 0xd4400000u: /* HLT */
11934             /* Give priority to the breakpoint exception.  */
11935             return true;
11936         }
11937     }
11938     return false;
11939 }
11940 
11941 /* C3.1 A64 instruction index by encoding */
11942 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
11943 {
11944     switch (extract32(insn, 25, 4)) {
11945     case 0x5:
11946     case 0xd:      /* Data processing - register */
11947         disas_data_proc_reg(s, insn);
11948         break;
11949     case 0x7:
11950     case 0xf:      /* Data processing - SIMD and floating point */
11951         disas_data_proc_simd_fp(s, insn);
11952         break;
11953     default:
11954         unallocated_encoding(s);
11955         break;
11956     }
11957 }
11958 
11959 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
11960                                           CPUState *cpu)
11961 {
11962     DisasContext *dc = container_of(dcbase, DisasContext, base);
11963     CPUARMState *env = cpu_env(cpu);
11964     ARMCPU *arm_cpu = env_archcpu(env);
11965     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
11966     int bound, core_mmu_idx;
11967 
11968     dc->isar = &arm_cpu->isar;
11969     dc->condjmp = 0;
11970     dc->pc_save = dc->base.pc_first;
11971     dc->aarch64 = true;
11972     dc->thumb = false;
11973     dc->sctlr_b = 0;
11974     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
11975     dc->condexec_mask = 0;
11976     dc->condexec_cond = 0;
11977     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
11978     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
11979     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
11980     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
11981     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
11982     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11983 #if !defined(CONFIG_USER_ONLY)
11984     dc->user = (dc->current_el == 0);
11985 #endif
11986     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
11987     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
11988     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
11989     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
11990     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
11991     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
11992     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
11993     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
11994     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
11995     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
11996     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
11997     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
11998     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
11999     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
12000     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
12001     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
12002     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
12003     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
12004     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
12005     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
12006     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
12007     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
12008     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
12009     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
12010     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
12011     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
12012     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
12013     dc->vec_len = 0;
12014     dc->vec_stride = 0;
12015     dc->cp_regs = arm_cpu->cp_regs;
12016     dc->features = env->features;
12017     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
12018     dc->gm_blocksize = arm_cpu->gm_blocksize;
12019 
12020 #ifdef CONFIG_USER_ONLY
12021     /* In sve_probe_page, we assume TBI is enabled. */
12022     tcg_debug_assert(dc->tbid & 1);
12023 #endif
12024 
12025     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
12026 
12027     /* Single step state. The code-generation logic here is:
12028      *  SS_ACTIVE == 0:
12029      *   generate code with no special handling for single-stepping (except
12030      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
12031      *   this happens anyway because those changes are all system register or
12032      *   PSTATE writes).
12033      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
12034      *   emit code for one insn
12035      *   emit code to clear PSTATE.SS
12036      *   emit code to generate software step exception for completed step
12037      *   end TB (as usual for having generated an exception)
12038      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
12039      *   emit code to generate a software step exception
12040      *   end the TB
12041      */
12042     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
12043     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
12044     dc->is_ldex = false;
12045 
12046     /* Bound the number of insns to execute to those left on the page.  */
12047     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
12048 
12049     /* If architectural single step active, limit to 1.  */
12050     if (dc->ss_active) {
12051         bound = 1;
12052     }
12053     dc->base.max_insns = MIN(dc->base.max_insns, bound);
12054 }
12055 
12056 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
12057 {
12058 }
12059 
12060 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
12061 {
12062     DisasContext *dc = container_of(dcbase, DisasContext, base);
12063     target_ulong pc_arg = dc->base.pc_next;
12064 
12065     if (tb_cflags(dcbase->tb) & CF_PCREL) {
12066         pc_arg &= ~TARGET_PAGE_MASK;
12067     }
12068     tcg_gen_insn_start(pc_arg, 0, 0);
12069     dc->insn_start_updated = false;
12070 }
12071 
12072 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
12073 {
12074     DisasContext *s = container_of(dcbase, DisasContext, base);
12075     CPUARMState *env = cpu_env(cpu);
12076     uint64_t pc = s->base.pc_next;
12077     uint32_t insn;
12078 
12079     /* Singlestep exceptions have the highest priority. */
12080     if (s->ss_active && !s->pstate_ss) {
12081         /* Singlestep state is Active-pending.
12082          * If we're in this state at the start of a TB then either
12083          *  a) we just took an exception to an EL which is being debugged
12084          *     and this is the first insn in the exception handler
12085          *  b) debug exceptions were masked and we just unmasked them
12086          *     without changing EL (eg by clearing PSTATE.D)
12087          * In either case we're going to take a swstep exception in the
12088          * "did not step an insn" case, and so the syndrome ISV and EX
12089          * bits should be zero.
12090          */
12091         assert(s->base.num_insns == 1);
12092         gen_swstep_exception(s, 0, 0);
12093         s->base.is_jmp = DISAS_NORETURN;
12094         s->base.pc_next = pc + 4;
12095         return;
12096     }
12097 
12098     if (pc & 3) {
12099         /*
12100          * PC alignment fault.  This has priority over the instruction abort
12101          * that we would receive from a translation fault via arm_ldl_code.
12102          * This should only be possible after an indirect branch, at the
12103          * start of the TB.
12104          */
12105         assert(s->base.num_insns == 1);
12106         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
12107         s->base.is_jmp = DISAS_NORETURN;
12108         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
12109         return;
12110     }
12111 
12112     s->pc_curr = pc;
12113     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
12114     s->insn = insn;
12115     s->base.pc_next = pc + 4;
12116 
12117     s->fp_access_checked = false;
12118     s->sve_access_checked = false;
12119 
12120     if (s->pstate_il) {
12121         /*
12122          * Illegal execution state. This has priority over BTI
12123          * exceptions, but comes after instruction abort exceptions.
12124          */
12125         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
12126         return;
12127     }
12128 
12129     if (dc_isar_feature(aa64_bti, s)) {
12130         if (s->base.num_insns == 1) {
12131             /* First insn can have btype set to non-zero.  */
12132             tcg_debug_assert(s->btype >= 0);
12133 
12134             /*
12135              * Note that the Branch Target Exception has fairly high
12136              * priority -- below debugging exceptions but above most
12137              * everything else.  This allows us to handle this now
12138              * instead of waiting until the insn is otherwise decoded.
12139              *
12140              * We can check all but the guarded page check here;
12141              * defer the latter to a helper.
12142              */
12143             if (s->btype != 0
12144                 && !btype_destination_ok(insn, s->bt, s->btype)) {
12145                 gen_helper_guarded_page_check(tcg_env);
12146             }
12147         } else {
12148             /* Not the first insn: btype must be 0.  */
12149             tcg_debug_assert(s->btype == 0);
12150         }
12151     }
12152 
12153     s->is_nonstreaming = false;
12154     if (s->sme_trap_nonstreaming) {
12155         disas_sme_fa64(s, insn);
12156     }
12157 
12158     if (!disas_a64(s, insn) &&
12159         !disas_sme(s, insn) &&
12160         !disas_sve(s, insn)) {
12161         disas_a64_legacy(s, insn);
12162     }
12163 
12164     /*
12165      * After execution of most insns, btype is reset to 0.
12166      * Note that we set btype == -1 when the insn sets btype.
12167      */
12168     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
12169         reset_btype(s);
12170     }
12171 }
12172 
12173 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
12174 {
12175     DisasContext *dc = container_of(dcbase, DisasContext, base);
12176 
12177     if (unlikely(dc->ss_active)) {
12178         /* Note that this means single stepping WFI doesn't halt the CPU.
12179          * For conditional branch insns this is harmless unreachable code as
12180          * gen_goto_tb() has already handled emitting the debug exception
12181          * (and thus a tb-jump is not possible when singlestepping).
12182          */
12183         switch (dc->base.is_jmp) {
12184         default:
12185             gen_a64_update_pc(dc, 4);
12186             /* fall through */
12187         case DISAS_EXIT:
12188         case DISAS_JUMP:
12189             gen_step_complete_exception(dc);
12190             break;
12191         case DISAS_NORETURN:
12192             break;
12193         }
12194     } else {
12195         switch (dc->base.is_jmp) {
12196         case DISAS_NEXT:
12197         case DISAS_TOO_MANY:
12198             gen_goto_tb(dc, 1, 4);
12199             break;
12200         default:
12201         case DISAS_UPDATE_EXIT:
12202             gen_a64_update_pc(dc, 4);
12203             /* fall through */
12204         case DISAS_EXIT:
12205             tcg_gen_exit_tb(NULL, 0);
12206             break;
12207         case DISAS_UPDATE_NOCHAIN:
12208             gen_a64_update_pc(dc, 4);
12209             /* fall through */
12210         case DISAS_JUMP:
12211             tcg_gen_lookup_and_goto_ptr();
12212             break;
12213         case DISAS_NORETURN:
12214         case DISAS_SWI:
12215             break;
12216         case DISAS_WFE:
12217             gen_a64_update_pc(dc, 4);
12218             gen_helper_wfe(tcg_env);
12219             break;
12220         case DISAS_YIELD:
12221             gen_a64_update_pc(dc, 4);
12222             gen_helper_yield(tcg_env);
12223             break;
12224         case DISAS_WFI:
12225             /*
12226              * This is a special case because we don't want to just halt
12227              * the CPU if trying to debug across a WFI.
12228              */
12229             gen_a64_update_pc(dc, 4);
12230             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
12231             /*
12232              * The helper doesn't necessarily throw an exception, but we
12233              * must go back to the main loop to check for interrupts anyway.
12234              */
12235             tcg_gen_exit_tb(NULL, 0);
12236             break;
12237         }
12238     }
12239 }
12240 
12241 const TranslatorOps aarch64_translator_ops = {
12242     .init_disas_context = aarch64_tr_init_disas_context,
12243     .tb_start           = aarch64_tr_tb_start,
12244     .insn_start         = aarch64_tr_insn_start,
12245     .translate_insn     = aarch64_tr_translate_insn,
12246     .tb_stop            = aarch64_tr_tb_stop,
12247 };
12248