xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 7d87775f)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* Table based decoder typedefs - used when the relevant bits for decode
79  * are too awkwardly scattered across the instruction (eg SIMD).
80  */
81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
82 
83 typedef struct AArch64DecodeTable {
84     uint32_t pattern;
85     uint32_t mask;
86     AArch64DecodeFn *disas_fn;
87 } AArch64DecodeTable;
88 
89 /* initialize TCG globals.  */
90 void a64_translate_init(void)
91 {
92     int i;
93 
94     cpu_pc = tcg_global_mem_new_i64(tcg_env,
95                                     offsetof(CPUARMState, pc),
96                                     "pc");
97     for (i = 0; i < 32; i++) {
98         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
99                                           offsetof(CPUARMState, xregs[i]),
100                                           regnames[i]);
101     }
102 
103     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
105 }
106 
107 /*
108  * Return the core mmu_idx to use for A64 load/store insns which
109  * have a "unprivileged load/store" variant. Those insns access
110  * EL0 if executed from an EL which has control over EL0 (usually
111  * EL1) but behave like normal loads and stores if executed from
112  * elsewhere (eg EL3).
113  *
114  * @unpriv : true for the unprivileged encoding; false for the
115  *           normal encoding (in which case we will return the same
116  *           thing as get_mem_index().
117  */
118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120     /*
121      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122      * which is the usual mmu_idx for this cpu state.
123      */
124     ARMMMUIdx useridx = s->mmu_idx;
125 
126     if (unpriv && s->unpriv) {
127         /*
128          * We have pre-computed the condition for AccType_UNPRIV.
129          * Therefore we should never get here with a mmu_idx for
130          * which we do not know the corresponding user mmu_idx.
131          */
132         switch (useridx) {
133         case ARMMMUIdx_E10_1:
134         case ARMMMUIdx_E10_1_PAN:
135             useridx = ARMMMUIdx_E10_0;
136             break;
137         case ARMMMUIdx_E20_2:
138         case ARMMMUIdx_E20_2_PAN:
139             useridx = ARMMMUIdx_E20_0;
140             break;
141         default:
142             g_assert_not_reached();
143         }
144     }
145     return arm_to_core_mmu_idx(useridx);
146 }
147 
148 static void set_btype_raw(int val)
149 {
150     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
151                    offsetof(CPUARMState, btype));
152 }
153 
154 static void set_btype(DisasContext *s, int val)
155 {
156     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
157     tcg_debug_assert(val >= 1 && val <= 3);
158     set_btype_raw(val);
159     s->btype = -1;
160 }
161 
162 static void reset_btype(DisasContext *s)
163 {
164     if (s->btype != 0) {
165         set_btype_raw(0);
166         s->btype = 0;
167     }
168 }
169 
170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
171 {
172     assert(s->pc_save != -1);
173     if (tb_cflags(s->base.tb) & CF_PCREL) {
174         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
175     } else {
176         tcg_gen_movi_i64(dest, s->pc_curr + diff);
177     }
178 }
179 
180 void gen_a64_update_pc(DisasContext *s, target_long diff)
181 {
182     gen_pc_plus_diff(s, cpu_pc, diff);
183     s->pc_save = s->pc_curr + diff;
184 }
185 
186 /*
187  * Handle Top Byte Ignore (TBI) bits.
188  *
189  * If address tagging is enabled via the TCR TBI bits:
190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
191  *    then the address is zero-extended, clearing bits [63:56]
192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
193  *    and TBI1 controls addresses with bit 55 == 1.
194  *    If the appropriate TBI bit is set for the address then
195  *    the address is sign-extended from bit 55 into bits [63:56]
196  *
197  * Here We have concatenated TBI{1,0} into tbi.
198  */
199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
200                                 TCGv_i64 src, int tbi)
201 {
202     if (tbi == 0) {
203         /* Load unmodified address */
204         tcg_gen_mov_i64(dst, src);
205     } else if (!regime_has_2_ranges(s->mmu_idx)) {
206         /* Force tag byte to all zero */
207         tcg_gen_extract_i64(dst, src, 0, 56);
208     } else {
209         /* Sign-extend from bit 55.  */
210         tcg_gen_sextract_i64(dst, src, 0, 56);
211 
212         switch (tbi) {
213         case 1:
214             /* tbi0 but !tbi1: only use the extension if positive */
215             tcg_gen_and_i64(dst, dst, src);
216             break;
217         case 2:
218             /* !tbi0 but tbi1: only use the extension if negative */
219             tcg_gen_or_i64(dst, dst, src);
220             break;
221         case 3:
222             /* tbi0 and tbi1: always use the extension */
223             break;
224         default:
225             g_assert_not_reached();
226         }
227     }
228 }
229 
230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
231 {
232     /*
233      * If address tagging is enabled for instructions via the TCR TBI bits,
234      * then loading an address into the PC will clear out any tag.
235      */
236     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
237     s->pc_save = -1;
238 }
239 
240 /*
241  * Handle MTE and/or TBI.
242  *
243  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
244  * for the tag to be present in the FAR_ELx register.  But for user-only
245  * mode we do not have a TLB with which to implement this, so we must
246  * remove the top byte now.
247  *
248  * Always return a fresh temporary that we can increment independently
249  * of the write-back address.
250  */
251 
252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
253 {
254     TCGv_i64 clean = tcg_temp_new_i64();
255 #ifdef CONFIG_USER_ONLY
256     gen_top_byte_ignore(s, clean, addr, s->tbid);
257 #else
258     tcg_gen_mov_i64(clean, addr);
259 #endif
260     return clean;
261 }
262 
263 /* Insert a zero tag into src, with the result at dst. */
264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
265 {
266     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
267 }
268 
269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
270                              MMUAccessType acc, int log2_size)
271 {
272     gen_helper_probe_access(tcg_env, ptr,
273                             tcg_constant_i32(acc),
274                             tcg_constant_i32(get_mem_index(s)),
275                             tcg_constant_i32(1 << log2_size));
276 }
277 
278 /*
279  * For MTE, check a single logical or atomic access.  This probes a single
280  * address, the exact one specified.  The size and alignment of the access
281  * is not relevant to MTE, per se, but watchpoints do require the size,
282  * and we want to recognize those before making any other changes to state.
283  */
284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
285                                       bool is_write, bool tag_checked,
286                                       MemOp memop, bool is_unpriv,
287                                       int core_idx)
288 {
289     if (tag_checked && s->mte_active[is_unpriv]) {
290         TCGv_i64 ret;
291         int desc = 0;
292 
293         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
294         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
295         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
296         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
297         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
309                         bool tag_checked, MemOp memop)
310 {
311     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
312                                  false, get_mem_index(s));
313 }
314 
315 /*
316  * For MTE, check multiple logical sequential accesses.
317  */
318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
319                         bool tag_checked, int total_size, MemOp single_mop)
320 {
321     if (tag_checked && s->mte_active[0]) {
322         TCGv_i64 ret;
323         int desc = 0;
324 
325         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
326         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
327         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
328         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
329         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
330         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
331 
332         ret = tcg_temp_new_i64();
333         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
334 
335         return ret;
336     }
337     return clean_data_tbi(s, addr);
338 }
339 
340 /*
341  * Generate the special alignment check that applies to AccType_ATOMIC
342  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
343  * naturally aligned, but it must not cross a 16-byte boundary.
344  * See AArch64.CheckAlignment().
345  */
346 static void check_lse2_align(DisasContext *s, int rn, int imm,
347                              bool is_write, MemOp mop)
348 {
349     TCGv_i32 tmp;
350     TCGv_i64 addr;
351     TCGLabel *over_label;
352     MMUAccessType type;
353     int mmu_idx;
354 
355     tmp = tcg_temp_new_i32();
356     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
357     tcg_gen_addi_i32(tmp, tmp, imm & 15);
358     tcg_gen_andi_i32(tmp, tmp, 15);
359     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
360 
361     over_label = gen_new_label();
362     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
363 
364     addr = tcg_temp_new_i64();
365     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
366 
367     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
368     mmu_idx = get_mem_index(s);
369     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
370                                 tcg_constant_i32(mmu_idx));
371 
372     gen_set_label(over_label);
373 
374 }
375 
376 /* Handle the alignment check for AccType_ATOMIC instructions. */
377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
378 {
379     MemOp size = mop & MO_SIZE;
380 
381     if (size == MO_8) {
382         return mop;
383     }
384 
385     /*
386      * If size == MO_128, this is a LDXP, and the operation is single-copy
387      * atomic for each doubleword, not the entire quadword; it still must
388      * be quadword aligned.
389      */
390     if (size == MO_128) {
391         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
392                                    MO_ATOM_IFALIGN_PAIR);
393     }
394     if (dc_isar_feature(aa64_lse2, s)) {
395         check_lse2_align(s, rn, 0, true, mop);
396     } else {
397         mop |= MO_ALIGN;
398     }
399     return finalize_memop(s, mop);
400 }
401 
402 /* Handle the alignment check for AccType_ORDERED instructions. */
403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
404                                  bool is_write, MemOp mop)
405 {
406     MemOp size = mop & MO_SIZE;
407 
408     if (size == MO_8) {
409         return mop;
410     }
411     if (size == MO_128) {
412         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
413                                    MO_ATOM_IFALIGN_PAIR);
414     }
415     if (!dc_isar_feature(aa64_lse2, s)) {
416         mop |= MO_ALIGN;
417     } else if (!s->naa) {
418         check_lse2_align(s, rn, imm, is_write, mop);
419     }
420     return finalize_memop(s, mop);
421 }
422 
423 typedef struct DisasCompare64 {
424     TCGCond cond;
425     TCGv_i64 value;
426 } DisasCompare64;
427 
428 static void a64_test_cc(DisasCompare64 *c64, int cc)
429 {
430     DisasCompare c32;
431 
432     arm_test_cc(&c32, cc);
433 
434     /*
435      * Sign-extend the 32-bit value so that the GE/LT comparisons work
436      * properly.  The NE/EQ comparisons are also fine with this choice.
437       */
438     c64->cond = c32.cond;
439     c64->value = tcg_temp_new_i64();
440     tcg_gen_ext_i32_i64(c64->value, c32.value);
441 }
442 
443 static void gen_rebuild_hflags(DisasContext *s)
444 {
445     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
446 }
447 
448 static void gen_exception_internal(int excp)
449 {
450     assert(excp_is_internal(excp));
451     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
452 }
453 
454 static void gen_exception_internal_insn(DisasContext *s, int excp)
455 {
456     gen_a64_update_pc(s, 0);
457     gen_exception_internal(excp);
458     s->base.is_jmp = DISAS_NORETURN;
459 }
460 
461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
462 {
463     gen_a64_update_pc(s, 0);
464     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
465     s->base.is_jmp = DISAS_NORETURN;
466 }
467 
468 static void gen_step_complete_exception(DisasContext *s)
469 {
470     /* We just completed step of an insn. Move from Active-not-pending
471      * to Active-pending, and then also take the swstep exception.
472      * This corresponds to making the (IMPDEF) choice to prioritize
473      * swstep exceptions over asynchronous exceptions taken to an exception
474      * level where debug is disabled. This choice has the advantage that
475      * we do not need to maintain internal state corresponding to the
476      * ISV/EX syndrome bits between completion of the step and generation
477      * of the exception, and our syndrome information is always correct.
478      */
479     gen_ss_advance(s);
480     gen_swstep_exception(s, 1, s->is_ldex);
481     s->base.is_jmp = DISAS_NORETURN;
482 }
483 
484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
485 {
486     if (s->ss_active) {
487         return false;
488     }
489     return translator_use_goto_tb(&s->base, dest);
490 }
491 
492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
493 {
494     if (use_goto_tb(s, s->pc_curr + diff)) {
495         /*
496          * For pcrel, the pc must always be up-to-date on entry to
497          * the linked TB, so that it can use simple additions for all
498          * further adjustments.  For !pcrel, the linked TB is compiled
499          * to know its full virtual address, so we can delay the
500          * update to pc to the unlinked path.  A long chain of links
501          * can thus avoid many updates to the PC.
502          */
503         if (tb_cflags(s->base.tb) & CF_PCREL) {
504             gen_a64_update_pc(s, diff);
505             tcg_gen_goto_tb(n);
506         } else {
507             tcg_gen_goto_tb(n);
508             gen_a64_update_pc(s, diff);
509         }
510         tcg_gen_exit_tb(s->base.tb, n);
511         s->base.is_jmp = DISAS_NORETURN;
512     } else {
513         gen_a64_update_pc(s, diff);
514         if (s->ss_active) {
515             gen_step_complete_exception(s);
516         } else {
517             tcg_gen_lookup_and_goto_ptr();
518             s->base.is_jmp = DISAS_NORETURN;
519         }
520     }
521 }
522 
523 /*
524  * Register access functions
525  *
526  * These functions are used for directly accessing a register in where
527  * changes to the final register value are likely to be made. If you
528  * need to use a register for temporary calculation (e.g. index type
529  * operations) use the read_* form.
530  *
531  * B1.2.1 Register mappings
532  *
533  * In instruction register encoding 31 can refer to ZR (zero register) or
534  * the SP (stack pointer) depending on context. In QEMU's case we map SP
535  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
536  * This is the point of the _sp forms.
537  */
538 TCGv_i64 cpu_reg(DisasContext *s, int reg)
539 {
540     if (reg == 31) {
541         TCGv_i64 t = tcg_temp_new_i64();
542         tcg_gen_movi_i64(t, 0);
543         return t;
544     } else {
545         return cpu_X[reg];
546     }
547 }
548 
549 /* register access for when 31 == SP */
550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
551 {
552     return cpu_X[reg];
553 }
554 
555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
556  * representing the register contents. This TCGv is an auto-freed
557  * temporary so it need not be explicitly freed, and may be modified.
558  */
559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
560 {
561     TCGv_i64 v = tcg_temp_new_i64();
562     if (reg != 31) {
563         if (sf) {
564             tcg_gen_mov_i64(v, cpu_X[reg]);
565         } else {
566             tcg_gen_ext32u_i64(v, cpu_X[reg]);
567         }
568     } else {
569         tcg_gen_movi_i64(v, 0);
570     }
571     return v;
572 }
573 
574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
575 {
576     TCGv_i64 v = tcg_temp_new_i64();
577     if (sf) {
578         tcg_gen_mov_i64(v, cpu_X[reg]);
579     } else {
580         tcg_gen_ext32u_i64(v, cpu_X[reg]);
581     }
582     return v;
583 }
584 
585 /* Return the offset into CPUARMState of a slice (from
586  * the least significant end) of FP register Qn (ie
587  * Dn, Sn, Hn or Bn).
588  * (Note that this is not the same mapping as for A32; see cpu.h)
589  */
590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
591 {
592     return vec_reg_offset(s, regno, 0, size);
593 }
594 
595 /* Offset of the high half of the 128 bit vector Qn */
596 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
597 {
598     return vec_reg_offset(s, regno, 1, MO_64);
599 }
600 
601 /* Convenience accessors for reading and writing single and double
602  * FP registers. Writing clears the upper parts of the associated
603  * 128 bit vector register, as required by the architecture.
604  * Note that unlike the GP register accessors, the values returned
605  * by the read functions must be manually freed.
606  */
607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
608 {
609     TCGv_i64 v = tcg_temp_new_i64();
610 
611     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
612     return v;
613 }
614 
615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
616 {
617     TCGv_i32 v = tcg_temp_new_i32();
618 
619     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
620     return v;
621 }
622 
623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
624 {
625     TCGv_i32 v = tcg_temp_new_i32();
626 
627     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
628     return v;
629 }
630 
631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
632  * If SVE is not enabled, then there are only 128 bits in the vector.
633  */
634 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
635 {
636     unsigned ofs = fp_reg_offset(s, rd, MO_64);
637     unsigned vsz = vec_full_reg_size(s);
638 
639     /* Nop move, with side effect of clearing the tail. */
640     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
641 }
642 
643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
644 {
645     unsigned ofs = fp_reg_offset(s, reg, MO_64);
646 
647     tcg_gen_st_i64(v, tcg_env, ofs);
648     clear_vec_high(s, false, reg);
649 }
650 
651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
652 {
653     TCGv_i64 tmp = tcg_temp_new_i64();
654 
655     tcg_gen_extu_i32_i64(tmp, v);
656     write_fp_dreg(s, reg, tmp);
657 }
658 
659 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
661                          GVecGen2Fn *gvec_fn, int vece)
662 {
663     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
664             is_q ? 16 : 8, vec_full_reg_size(s));
665 }
666 
667 /* Expand a 2-operand + immediate AdvSIMD vector operation using
668  * an expander function.
669  */
670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
671                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
672 {
673     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
674             imm, is_q ? 16 : 8, vec_full_reg_size(s));
675 }
676 
677 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
679                          GVecGen3Fn *gvec_fn, int vece)
680 {
681     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
682             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
683 }
684 
685 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
687                          int rx, GVecGen4Fn *gvec_fn, int vece)
688 {
689     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
690             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
691             is_q ? 16 : 8, vec_full_reg_size(s));
692 }
693 
694 /* Expand a 2-operand operation using an out-of-line helper.  */
695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
696                              int rn, int data, gen_helper_gvec_2 *fn)
697 {
698     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
699                        vec_full_reg_offset(s, rn),
700                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
701 }
702 
703 /* Expand a 3-operand operation using an out-of-line helper.  */
704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
705                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
706 {
707     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
708                        vec_full_reg_offset(s, rn),
709                        vec_full_reg_offset(s, rm),
710                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
711 }
712 
713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
714  * an out-of-line helper.
715  */
716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
717                               int rm, bool is_fp16, int data,
718                               gen_helper_gvec_3_ptr *fn)
719 {
720     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
721     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
722                        vec_full_reg_offset(s, rn),
723                        vec_full_reg_offset(s, rm), fpst,
724                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
725 }
726 
727 /* Expand a 4-operand operation using an out-of-line helper.  */
728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
729                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
730 {
731     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
732                        vec_full_reg_offset(s, rn),
733                        vec_full_reg_offset(s, rm),
734                        vec_full_reg_offset(s, ra),
735                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
736 }
737 
738 /*
739  * Expand a 4-operand operation using an out-of-line helper that takes
740  * a pointer to the CPU env.
741  */
742 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
743                              int rm, int ra, int data,
744                              gen_helper_gvec_4_ptr *fn)
745 {
746     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
747                        vec_full_reg_offset(s, rn),
748                        vec_full_reg_offset(s, rm),
749                        vec_full_reg_offset(s, ra),
750                        tcg_env,
751                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
752 }
753 
754 /*
755  * Expand a 4-operand + fpstatus pointer + simd data value operation using
756  * an out-of-line helper.
757  */
758 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
759                               int rm, int ra, bool is_fp16, int data,
760                               gen_helper_gvec_4_ptr *fn)
761 {
762     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
763     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
764                        vec_full_reg_offset(s, rn),
765                        vec_full_reg_offset(s, rm),
766                        vec_full_reg_offset(s, ra), fpst,
767                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
768 }
769 
770 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
771  * than the 32 bit equivalent.
772  */
773 static inline void gen_set_NZ64(TCGv_i64 result)
774 {
775     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
776     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
777 }
778 
779 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
780 static inline void gen_logic_CC(int sf, TCGv_i64 result)
781 {
782     if (sf) {
783         gen_set_NZ64(result);
784     } else {
785         tcg_gen_extrl_i64_i32(cpu_ZF, result);
786         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
787     }
788     tcg_gen_movi_i32(cpu_CF, 0);
789     tcg_gen_movi_i32(cpu_VF, 0);
790 }
791 
792 /* dest = T0 + T1; compute C, N, V and Z flags */
793 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
794 {
795     TCGv_i64 result, flag, tmp;
796     result = tcg_temp_new_i64();
797     flag = tcg_temp_new_i64();
798     tmp = tcg_temp_new_i64();
799 
800     tcg_gen_movi_i64(tmp, 0);
801     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
802 
803     tcg_gen_extrl_i64_i32(cpu_CF, flag);
804 
805     gen_set_NZ64(result);
806 
807     tcg_gen_xor_i64(flag, result, t0);
808     tcg_gen_xor_i64(tmp, t0, t1);
809     tcg_gen_andc_i64(flag, flag, tmp);
810     tcg_gen_extrh_i64_i32(cpu_VF, flag);
811 
812     tcg_gen_mov_i64(dest, result);
813 }
814 
815 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
816 {
817     TCGv_i32 t0_32 = tcg_temp_new_i32();
818     TCGv_i32 t1_32 = tcg_temp_new_i32();
819     TCGv_i32 tmp = tcg_temp_new_i32();
820 
821     tcg_gen_movi_i32(tmp, 0);
822     tcg_gen_extrl_i64_i32(t0_32, t0);
823     tcg_gen_extrl_i64_i32(t1_32, t1);
824     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
825     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
826     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
827     tcg_gen_xor_i32(tmp, t0_32, t1_32);
828     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
829     tcg_gen_extu_i32_i64(dest, cpu_NF);
830 }
831 
832 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
833 {
834     if (sf) {
835         gen_add64_CC(dest, t0, t1);
836     } else {
837         gen_add32_CC(dest, t0, t1);
838     }
839 }
840 
841 /* dest = T0 - T1; compute C, N, V and Z flags */
842 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
843 {
844     /* 64 bit arithmetic */
845     TCGv_i64 result, flag, tmp;
846 
847     result = tcg_temp_new_i64();
848     flag = tcg_temp_new_i64();
849     tcg_gen_sub_i64(result, t0, t1);
850 
851     gen_set_NZ64(result);
852 
853     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
854     tcg_gen_extrl_i64_i32(cpu_CF, flag);
855 
856     tcg_gen_xor_i64(flag, result, t0);
857     tmp = tcg_temp_new_i64();
858     tcg_gen_xor_i64(tmp, t0, t1);
859     tcg_gen_and_i64(flag, flag, tmp);
860     tcg_gen_extrh_i64_i32(cpu_VF, flag);
861     tcg_gen_mov_i64(dest, result);
862 }
863 
864 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
865 {
866     /* 32 bit arithmetic */
867     TCGv_i32 t0_32 = tcg_temp_new_i32();
868     TCGv_i32 t1_32 = tcg_temp_new_i32();
869     TCGv_i32 tmp;
870 
871     tcg_gen_extrl_i64_i32(t0_32, t0);
872     tcg_gen_extrl_i64_i32(t1_32, t1);
873     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
874     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
875     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
876     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
877     tmp = tcg_temp_new_i32();
878     tcg_gen_xor_i32(tmp, t0_32, t1_32);
879     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
880     tcg_gen_extu_i32_i64(dest, cpu_NF);
881 }
882 
883 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
884 {
885     if (sf) {
886         gen_sub64_CC(dest, t0, t1);
887     } else {
888         gen_sub32_CC(dest, t0, t1);
889     }
890 }
891 
892 /* dest = T0 + T1 + CF; do not compute flags. */
893 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
894 {
895     TCGv_i64 flag = tcg_temp_new_i64();
896     tcg_gen_extu_i32_i64(flag, cpu_CF);
897     tcg_gen_add_i64(dest, t0, t1);
898     tcg_gen_add_i64(dest, dest, flag);
899 
900     if (!sf) {
901         tcg_gen_ext32u_i64(dest, dest);
902     }
903 }
904 
905 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
906 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
907 {
908     if (sf) {
909         TCGv_i64 result = tcg_temp_new_i64();
910         TCGv_i64 cf_64 = tcg_temp_new_i64();
911         TCGv_i64 vf_64 = tcg_temp_new_i64();
912         TCGv_i64 tmp = tcg_temp_new_i64();
913         TCGv_i64 zero = tcg_constant_i64(0);
914 
915         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
916         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
917         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
918         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
919         gen_set_NZ64(result);
920 
921         tcg_gen_xor_i64(vf_64, result, t0);
922         tcg_gen_xor_i64(tmp, t0, t1);
923         tcg_gen_andc_i64(vf_64, vf_64, tmp);
924         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
925 
926         tcg_gen_mov_i64(dest, result);
927     } else {
928         TCGv_i32 t0_32 = tcg_temp_new_i32();
929         TCGv_i32 t1_32 = tcg_temp_new_i32();
930         TCGv_i32 tmp = tcg_temp_new_i32();
931         TCGv_i32 zero = tcg_constant_i32(0);
932 
933         tcg_gen_extrl_i64_i32(t0_32, t0);
934         tcg_gen_extrl_i64_i32(t1_32, t1);
935         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
936         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
937 
938         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
939         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
940         tcg_gen_xor_i32(tmp, t0_32, t1_32);
941         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
942         tcg_gen_extu_i32_i64(dest, cpu_NF);
943     }
944 }
945 
946 /*
947  * Load/Store generators
948  */
949 
950 /*
951  * Store from GPR register to memory.
952  */
953 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
954                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
955                              bool iss_valid,
956                              unsigned int iss_srt,
957                              bool iss_sf, bool iss_ar)
958 {
959     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
960 
961     if (iss_valid) {
962         uint32_t syn;
963 
964         syn = syn_data_abort_with_iss(0,
965                                       (memop & MO_SIZE),
966                                       false,
967                                       iss_srt,
968                                       iss_sf,
969                                       iss_ar,
970                                       0, 0, 0, 0, 0, false);
971         disas_set_insn_syndrome(s, syn);
972     }
973 }
974 
975 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
976                       TCGv_i64 tcg_addr, MemOp memop,
977                       bool iss_valid,
978                       unsigned int iss_srt,
979                       bool iss_sf, bool iss_ar)
980 {
981     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
982                      iss_valid, iss_srt, iss_sf, iss_ar);
983 }
984 
985 /*
986  * Load from memory to GPR register
987  */
988 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
989                              MemOp memop, bool extend, int memidx,
990                              bool iss_valid, unsigned int iss_srt,
991                              bool iss_sf, bool iss_ar)
992 {
993     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
994 
995     if (extend && (memop & MO_SIGN)) {
996         g_assert((memop & MO_SIZE) <= MO_32);
997         tcg_gen_ext32u_i64(dest, dest);
998     }
999 
1000     if (iss_valid) {
1001         uint32_t syn;
1002 
1003         syn = syn_data_abort_with_iss(0,
1004                                       (memop & MO_SIZE),
1005                                       (memop & MO_SIGN) != 0,
1006                                       iss_srt,
1007                                       iss_sf,
1008                                       iss_ar,
1009                                       0, 0, 0, 0, 0, false);
1010         disas_set_insn_syndrome(s, syn);
1011     }
1012 }
1013 
1014 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1015                       MemOp memop, bool extend,
1016                       bool iss_valid, unsigned int iss_srt,
1017                       bool iss_sf, bool iss_ar)
1018 {
1019     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1020                      iss_valid, iss_srt, iss_sf, iss_ar);
1021 }
1022 
1023 /*
1024  * Store from FP register to memory
1025  */
1026 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1027 {
1028     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1029     TCGv_i64 tmplo = tcg_temp_new_i64();
1030 
1031     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1032 
1033     if ((mop & MO_SIZE) < MO_128) {
1034         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1035     } else {
1036         TCGv_i64 tmphi = tcg_temp_new_i64();
1037         TCGv_i128 t16 = tcg_temp_new_i128();
1038 
1039         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1040         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1041 
1042         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1043     }
1044 }
1045 
1046 /*
1047  * Load from memory to FP register
1048  */
1049 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1050 {
1051     /* This always zero-extends and writes to a full 128 bit wide vector */
1052     TCGv_i64 tmplo = tcg_temp_new_i64();
1053     TCGv_i64 tmphi = NULL;
1054 
1055     if ((mop & MO_SIZE) < MO_128) {
1056         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1057     } else {
1058         TCGv_i128 t16 = tcg_temp_new_i128();
1059 
1060         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1061 
1062         tmphi = tcg_temp_new_i64();
1063         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1064     }
1065 
1066     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1067 
1068     if (tmphi) {
1069         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1070     }
1071     clear_vec_high(s, tmphi != NULL, destidx);
1072 }
1073 
1074 /*
1075  * Vector load/store helpers.
1076  *
1077  * The principal difference between this and a FP load is that we don't
1078  * zero extend as we are filling a partial chunk of the vector register.
1079  * These functions don't support 128 bit loads/stores, which would be
1080  * normal load/store operations.
1081  *
1082  * The _i32 versions are useful when operating on 32 bit quantities
1083  * (eg for floating point single or using Neon helper functions).
1084  */
1085 
1086 /* Get value of an element within a vector register */
1087 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1088                              int element, MemOp memop)
1089 {
1090     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1091     switch ((unsigned)memop) {
1092     case MO_8:
1093         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1094         break;
1095     case MO_16:
1096         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1097         break;
1098     case MO_32:
1099         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1100         break;
1101     case MO_8|MO_SIGN:
1102         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1103         break;
1104     case MO_16|MO_SIGN:
1105         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1106         break;
1107     case MO_32|MO_SIGN:
1108         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1109         break;
1110     case MO_64:
1111     case MO_64|MO_SIGN:
1112         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1113         break;
1114     default:
1115         g_assert_not_reached();
1116     }
1117 }
1118 
1119 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1120                                  int element, MemOp memop)
1121 {
1122     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1123     switch (memop) {
1124     case MO_8:
1125         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1126         break;
1127     case MO_16:
1128         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1129         break;
1130     case MO_8|MO_SIGN:
1131         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1132         break;
1133     case MO_16|MO_SIGN:
1134         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1135         break;
1136     case MO_32:
1137     case MO_32|MO_SIGN:
1138         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1139         break;
1140     default:
1141         g_assert_not_reached();
1142     }
1143 }
1144 
1145 /* Set value of an element within a vector register */
1146 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1147                               int element, MemOp memop)
1148 {
1149     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1150     switch (memop) {
1151     case MO_8:
1152         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1153         break;
1154     case MO_16:
1155         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1156         break;
1157     case MO_32:
1158         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1159         break;
1160     case MO_64:
1161         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1162         break;
1163     default:
1164         g_assert_not_reached();
1165     }
1166 }
1167 
1168 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1169                                   int destidx, int element, MemOp memop)
1170 {
1171     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1172     switch (memop) {
1173     case MO_8:
1174         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1175         break;
1176     case MO_16:
1177         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1178         break;
1179     case MO_32:
1180         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1181         break;
1182     default:
1183         g_assert_not_reached();
1184     }
1185 }
1186 
1187 /* Store from vector register to memory */
1188 static void do_vec_st(DisasContext *s, int srcidx, int element,
1189                       TCGv_i64 tcg_addr, MemOp mop)
1190 {
1191     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1192 
1193     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1194     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1195 }
1196 
1197 /* Load from memory to vector register */
1198 static void do_vec_ld(DisasContext *s, int destidx, int element,
1199                       TCGv_i64 tcg_addr, MemOp mop)
1200 {
1201     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1202 
1203     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1204     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1205 }
1206 
1207 /* Check that FP/Neon access is enabled. If it is, return
1208  * true. If not, emit code to generate an appropriate exception,
1209  * and return false; the caller should not emit any code for
1210  * the instruction. Note that this check must happen after all
1211  * unallocated-encoding checks (otherwise the syndrome information
1212  * for the resulting exception will be incorrect).
1213  */
1214 static bool fp_access_check_only(DisasContext *s)
1215 {
1216     if (s->fp_excp_el) {
1217         assert(!s->fp_access_checked);
1218         s->fp_access_checked = true;
1219 
1220         gen_exception_insn_el(s, 0, EXCP_UDEF,
1221                               syn_fp_access_trap(1, 0xe, false, 0),
1222                               s->fp_excp_el);
1223         return false;
1224     }
1225     s->fp_access_checked = true;
1226     return true;
1227 }
1228 
1229 static bool fp_access_check(DisasContext *s)
1230 {
1231     if (!fp_access_check_only(s)) {
1232         return false;
1233     }
1234     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1235         gen_exception_insn(s, 0, EXCP_UDEF,
1236                            syn_smetrap(SME_ET_Streaming, false));
1237         return false;
1238     }
1239     return true;
1240 }
1241 
1242 /*
1243  * Check that SVE access is enabled.  If it is, return true.
1244  * If not, emit code to generate an appropriate exception and return false.
1245  * This function corresponds to CheckSVEEnabled().
1246  */
1247 bool sve_access_check(DisasContext *s)
1248 {
1249     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1250         assert(dc_isar_feature(aa64_sme, s));
1251         if (!sme_sm_enabled_check(s)) {
1252             goto fail_exit;
1253         }
1254     } else if (s->sve_excp_el) {
1255         gen_exception_insn_el(s, 0, EXCP_UDEF,
1256                               syn_sve_access_trap(), s->sve_excp_el);
1257         goto fail_exit;
1258     }
1259     s->sve_access_checked = true;
1260     return fp_access_check(s);
1261 
1262  fail_exit:
1263     /* Assert that we only raise one exception per instruction. */
1264     assert(!s->sve_access_checked);
1265     s->sve_access_checked = true;
1266     return false;
1267 }
1268 
1269 /*
1270  * Check that SME access is enabled, raise an exception if not.
1271  * Note that this function corresponds to CheckSMEAccess and is
1272  * only used directly for cpregs.
1273  */
1274 static bool sme_access_check(DisasContext *s)
1275 {
1276     if (s->sme_excp_el) {
1277         gen_exception_insn_el(s, 0, EXCP_UDEF,
1278                               syn_smetrap(SME_ET_AccessTrap, false),
1279                               s->sme_excp_el);
1280         return false;
1281     }
1282     return true;
1283 }
1284 
1285 /* This function corresponds to CheckSMEEnabled. */
1286 bool sme_enabled_check(DisasContext *s)
1287 {
1288     /*
1289      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1290      * to be zero when fp_excp_el has priority.  This is because we need
1291      * sme_excp_el by itself for cpregs access checks.
1292      */
1293     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1294         s->fp_access_checked = true;
1295         return sme_access_check(s);
1296     }
1297     return fp_access_check_only(s);
1298 }
1299 
1300 /* Common subroutine for CheckSMEAnd*Enabled. */
1301 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1302 {
1303     if (!sme_enabled_check(s)) {
1304         return false;
1305     }
1306     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1307         gen_exception_insn(s, 0, EXCP_UDEF,
1308                            syn_smetrap(SME_ET_NotStreaming, false));
1309         return false;
1310     }
1311     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1312         gen_exception_insn(s, 0, EXCP_UDEF,
1313                            syn_smetrap(SME_ET_InactiveZA, false));
1314         return false;
1315     }
1316     return true;
1317 }
1318 
1319 /*
1320  * Expanders for AdvSIMD translation functions.
1321  */
1322 
1323 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1324                             gen_helper_gvec_2 *fn)
1325 {
1326     if (!a->q && a->esz == MO_64) {
1327         return false;
1328     }
1329     if (fp_access_check(s)) {
1330         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1331     }
1332     return true;
1333 }
1334 
1335 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1336                             gen_helper_gvec_3 *fn)
1337 {
1338     if (!a->q && a->esz == MO_64) {
1339         return false;
1340     }
1341     if (fp_access_check(s)) {
1342         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1343     }
1344     return true;
1345 }
1346 
1347 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1348 {
1349     if (!a->q && a->esz == MO_64) {
1350         return false;
1351     }
1352     if (fp_access_check(s)) {
1353         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1354     }
1355     return true;
1356 }
1357 
1358 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1359 {
1360     if (a->esz == MO_64) {
1361         return false;
1362     }
1363     if (fp_access_check(s)) {
1364         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1365     }
1366     return true;
1367 }
1368 
1369 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1370 {
1371     if (a->esz == MO_8) {
1372         return false;
1373     }
1374     return do_gvec_fn3_no64(s, a, fn);
1375 }
1376 
1377 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1378 {
1379     if (!a->q && a->esz == MO_64) {
1380         return false;
1381     }
1382     if (fp_access_check(s)) {
1383         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1384     }
1385     return true;
1386 }
1387 
1388 /*
1389  * This utility function is for doing register extension with an
1390  * optional shift. You will likely want to pass a temporary for the
1391  * destination register. See DecodeRegExtend() in the ARM ARM.
1392  */
1393 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1394                               int option, unsigned int shift)
1395 {
1396     int extsize = extract32(option, 0, 2);
1397     bool is_signed = extract32(option, 2, 1);
1398 
1399     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1400     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1401 }
1402 
1403 static inline void gen_check_sp_alignment(DisasContext *s)
1404 {
1405     /* The AArch64 architecture mandates that (if enabled via PSTATE
1406      * or SCTLR bits) there is a check that SP is 16-aligned on every
1407      * SP-relative load or store (with an exception generated if it is not).
1408      * In line with general QEMU practice regarding misaligned accesses,
1409      * we omit these checks for the sake of guest program performance.
1410      * This function is provided as a hook so we can more easily add these
1411      * checks in future (possibly as a "favour catching guest program bugs
1412      * over speed" user selectable option).
1413      */
1414 }
1415 
1416 /*
1417  * This provides a simple table based table lookup decoder. It is
1418  * intended to be used when the relevant bits for decode are too
1419  * awkwardly placed and switch/if based logic would be confusing and
1420  * deeply nested. Since it's a linear search through the table, tables
1421  * should be kept small.
1422  *
1423  * It returns the first handler where insn & mask == pattern, or
1424  * NULL if there is no match.
1425  * The table is terminated by an empty mask (i.e. 0)
1426  */
1427 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1428                                                uint32_t insn)
1429 {
1430     const AArch64DecodeTable *tptr = table;
1431 
1432     while (tptr->mask) {
1433         if ((insn & tptr->mask) == tptr->pattern) {
1434             return tptr->disas_fn;
1435         }
1436         tptr++;
1437     }
1438     return NULL;
1439 }
1440 
1441 /*
1442  * The instruction disassembly implemented here matches
1443  * the instruction encoding classifications in chapter C4
1444  * of the ARM Architecture Reference Manual (DDI0487B_a);
1445  * classification names and decode diagrams here should generally
1446  * match up with those in the manual.
1447  */
1448 
1449 static bool trans_B(DisasContext *s, arg_i *a)
1450 {
1451     reset_btype(s);
1452     gen_goto_tb(s, 0, a->imm);
1453     return true;
1454 }
1455 
1456 static bool trans_BL(DisasContext *s, arg_i *a)
1457 {
1458     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1459     reset_btype(s);
1460     gen_goto_tb(s, 0, a->imm);
1461     return true;
1462 }
1463 
1464 
1465 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1466 {
1467     DisasLabel match;
1468     TCGv_i64 tcg_cmp;
1469 
1470     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1471     reset_btype(s);
1472 
1473     match = gen_disas_label(s);
1474     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1475                         tcg_cmp, 0, match.label);
1476     gen_goto_tb(s, 0, 4);
1477     set_disas_label(s, match);
1478     gen_goto_tb(s, 1, a->imm);
1479     return true;
1480 }
1481 
1482 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1483 {
1484     DisasLabel match;
1485     TCGv_i64 tcg_cmp;
1486 
1487     tcg_cmp = tcg_temp_new_i64();
1488     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1489 
1490     reset_btype(s);
1491 
1492     match = gen_disas_label(s);
1493     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1494                         tcg_cmp, 0, match.label);
1495     gen_goto_tb(s, 0, 4);
1496     set_disas_label(s, match);
1497     gen_goto_tb(s, 1, a->imm);
1498     return true;
1499 }
1500 
1501 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1502 {
1503     /* BC.cond is only present with FEAT_HBC */
1504     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1505         return false;
1506     }
1507     reset_btype(s);
1508     if (a->cond < 0x0e) {
1509         /* genuinely conditional branches */
1510         DisasLabel match = gen_disas_label(s);
1511         arm_gen_test_cc(a->cond, match.label);
1512         gen_goto_tb(s, 0, 4);
1513         set_disas_label(s, match);
1514         gen_goto_tb(s, 1, a->imm);
1515     } else {
1516         /* 0xe and 0xf are both "always" conditions */
1517         gen_goto_tb(s, 0, a->imm);
1518     }
1519     return true;
1520 }
1521 
1522 static void set_btype_for_br(DisasContext *s, int rn)
1523 {
1524     if (dc_isar_feature(aa64_bti, s)) {
1525         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1526         if (rn == 16 || rn == 17) {
1527             set_btype(s, 1);
1528         } else {
1529             TCGv_i64 pc = tcg_temp_new_i64();
1530             gen_pc_plus_diff(s, pc, 0);
1531             gen_helper_guarded_page_br(tcg_env, pc);
1532             s->btype = -1;
1533         }
1534     }
1535 }
1536 
1537 static void set_btype_for_blr(DisasContext *s)
1538 {
1539     if (dc_isar_feature(aa64_bti, s)) {
1540         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1541         set_btype(s, 2);
1542     }
1543 }
1544 
1545 static bool trans_BR(DisasContext *s, arg_r *a)
1546 {
1547     set_btype_for_br(s, a->rn);
1548     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1549     s->base.is_jmp = DISAS_JUMP;
1550     return true;
1551 }
1552 
1553 static bool trans_BLR(DisasContext *s, arg_r *a)
1554 {
1555     TCGv_i64 dst = cpu_reg(s, a->rn);
1556     TCGv_i64 lr = cpu_reg(s, 30);
1557     if (dst == lr) {
1558         TCGv_i64 tmp = tcg_temp_new_i64();
1559         tcg_gen_mov_i64(tmp, dst);
1560         dst = tmp;
1561     }
1562     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1563     gen_a64_set_pc(s, dst);
1564     set_btype_for_blr(s);
1565     s->base.is_jmp = DISAS_JUMP;
1566     return true;
1567 }
1568 
1569 static bool trans_RET(DisasContext *s, arg_r *a)
1570 {
1571     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1572     s->base.is_jmp = DISAS_JUMP;
1573     return true;
1574 }
1575 
1576 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1577                                    TCGv_i64 modifier, bool use_key_a)
1578 {
1579     TCGv_i64 truedst;
1580     /*
1581      * Return the branch target for a BRAA/RETA/etc, which is either
1582      * just the destination dst, or that value with the pauth check
1583      * done and the code removed from the high bits.
1584      */
1585     if (!s->pauth_active) {
1586         return dst;
1587     }
1588 
1589     truedst = tcg_temp_new_i64();
1590     if (use_key_a) {
1591         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1592     } else {
1593         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1594     }
1595     return truedst;
1596 }
1597 
1598 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1599 {
1600     TCGv_i64 dst;
1601 
1602     if (!dc_isar_feature(aa64_pauth, s)) {
1603         return false;
1604     }
1605 
1606     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1607     set_btype_for_br(s, a->rn);
1608     gen_a64_set_pc(s, dst);
1609     s->base.is_jmp = DISAS_JUMP;
1610     return true;
1611 }
1612 
1613 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1614 {
1615     TCGv_i64 dst, lr;
1616 
1617     if (!dc_isar_feature(aa64_pauth, s)) {
1618         return false;
1619     }
1620 
1621     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1622     lr = cpu_reg(s, 30);
1623     if (dst == lr) {
1624         TCGv_i64 tmp = tcg_temp_new_i64();
1625         tcg_gen_mov_i64(tmp, dst);
1626         dst = tmp;
1627     }
1628     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1629     gen_a64_set_pc(s, dst);
1630     set_btype_for_blr(s);
1631     s->base.is_jmp = DISAS_JUMP;
1632     return true;
1633 }
1634 
1635 static bool trans_RETA(DisasContext *s, arg_reta *a)
1636 {
1637     TCGv_i64 dst;
1638 
1639     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1640     gen_a64_set_pc(s, dst);
1641     s->base.is_jmp = DISAS_JUMP;
1642     return true;
1643 }
1644 
1645 static bool trans_BRA(DisasContext *s, arg_bra *a)
1646 {
1647     TCGv_i64 dst;
1648 
1649     if (!dc_isar_feature(aa64_pauth, s)) {
1650         return false;
1651     }
1652     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1653     gen_a64_set_pc(s, dst);
1654     set_btype_for_br(s, a->rn);
1655     s->base.is_jmp = DISAS_JUMP;
1656     return true;
1657 }
1658 
1659 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1660 {
1661     TCGv_i64 dst, lr;
1662 
1663     if (!dc_isar_feature(aa64_pauth, s)) {
1664         return false;
1665     }
1666     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1667     lr = cpu_reg(s, 30);
1668     if (dst == lr) {
1669         TCGv_i64 tmp = tcg_temp_new_i64();
1670         tcg_gen_mov_i64(tmp, dst);
1671         dst = tmp;
1672     }
1673     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1674     gen_a64_set_pc(s, dst);
1675     set_btype_for_blr(s);
1676     s->base.is_jmp = DISAS_JUMP;
1677     return true;
1678 }
1679 
1680 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1681 {
1682     TCGv_i64 dst;
1683 
1684     if (s->current_el == 0) {
1685         return false;
1686     }
1687     if (s->trap_eret) {
1688         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1689         return true;
1690     }
1691     dst = tcg_temp_new_i64();
1692     tcg_gen_ld_i64(dst, tcg_env,
1693                    offsetof(CPUARMState, elr_el[s->current_el]));
1694 
1695     translator_io_start(&s->base);
1696 
1697     gen_helper_exception_return(tcg_env, dst);
1698     /* Must exit loop to check un-masked IRQs */
1699     s->base.is_jmp = DISAS_EXIT;
1700     return true;
1701 }
1702 
1703 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1704 {
1705     TCGv_i64 dst;
1706 
1707     if (!dc_isar_feature(aa64_pauth, s)) {
1708         return false;
1709     }
1710     if (s->current_el == 0) {
1711         return false;
1712     }
1713     /* The FGT trap takes precedence over an auth trap. */
1714     if (s->trap_eret) {
1715         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1716         return true;
1717     }
1718     dst = tcg_temp_new_i64();
1719     tcg_gen_ld_i64(dst, tcg_env,
1720                    offsetof(CPUARMState, elr_el[s->current_el]));
1721 
1722     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1723 
1724     translator_io_start(&s->base);
1725 
1726     gen_helper_exception_return(tcg_env, dst);
1727     /* Must exit loop to check un-masked IRQs */
1728     s->base.is_jmp = DISAS_EXIT;
1729     return true;
1730 }
1731 
1732 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1733 {
1734     return true;
1735 }
1736 
1737 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1738 {
1739     /*
1740      * When running in MTTCG we don't generate jumps to the yield and
1741      * WFE helpers as it won't affect the scheduling of other vCPUs.
1742      * If we wanted to more completely model WFE/SEV so we don't busy
1743      * spin unnecessarily we would need to do something more involved.
1744      */
1745     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1746         s->base.is_jmp = DISAS_YIELD;
1747     }
1748     return true;
1749 }
1750 
1751 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1752 {
1753     s->base.is_jmp = DISAS_WFI;
1754     return true;
1755 }
1756 
1757 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1758 {
1759     /*
1760      * When running in MTTCG we don't generate jumps to the yield and
1761      * WFE helpers as it won't affect the scheduling of other vCPUs.
1762      * If we wanted to more completely model WFE/SEV so we don't busy
1763      * spin unnecessarily we would need to do something more involved.
1764      */
1765     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1766         s->base.is_jmp = DISAS_WFE;
1767     }
1768     return true;
1769 }
1770 
1771 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1772 {
1773     if (!dc_isar_feature(aa64_wfxt, s)) {
1774         return false;
1775     }
1776 
1777     /*
1778      * Because we need to pass the register value to the helper,
1779      * it's easier to emit the code now, unlike trans_WFI which
1780      * defers it to aarch64_tr_tb_stop(). That means we need to
1781      * check ss_active so that single-stepping a WFIT doesn't halt.
1782      */
1783     if (s->ss_active) {
1784         /* Act like a NOP under architectural singlestep */
1785         return true;
1786     }
1787 
1788     gen_a64_update_pc(s, 4);
1789     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1790     /* Go back to the main loop to check for interrupts */
1791     s->base.is_jmp = DISAS_EXIT;
1792     return true;
1793 }
1794 
1795 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1796 {
1797     if (!dc_isar_feature(aa64_wfxt, s)) {
1798         return false;
1799     }
1800 
1801     /*
1802      * We rely here on our WFE implementation being a NOP, so we
1803      * don't need to do anything different to handle the WFET timeout
1804      * from what trans_WFE does.
1805      */
1806     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1807         s->base.is_jmp = DISAS_WFE;
1808     }
1809     return true;
1810 }
1811 
1812 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1813 {
1814     if (s->pauth_active) {
1815         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1816     }
1817     return true;
1818 }
1819 
1820 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1821 {
1822     if (s->pauth_active) {
1823         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1824     }
1825     return true;
1826 }
1827 
1828 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1829 {
1830     if (s->pauth_active) {
1831         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1832     }
1833     return true;
1834 }
1835 
1836 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1837 {
1838     if (s->pauth_active) {
1839         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1840     }
1841     return true;
1842 }
1843 
1844 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1845 {
1846     if (s->pauth_active) {
1847         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1848     }
1849     return true;
1850 }
1851 
1852 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1853 {
1854     /* Without RAS, we must implement this as NOP. */
1855     if (dc_isar_feature(aa64_ras, s)) {
1856         /*
1857          * QEMU does not have a source of physical SErrors,
1858          * so we are only concerned with virtual SErrors.
1859          * The pseudocode in the ARM for this case is
1860          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1861          *      AArch64.vESBOperation();
1862          * Most of the condition can be evaluated at translation time.
1863          * Test for EL2 present, and defer test for SEL2 to runtime.
1864          */
1865         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1866             gen_helper_vesb(tcg_env);
1867         }
1868     }
1869     return true;
1870 }
1871 
1872 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1873 {
1874     if (s->pauth_active) {
1875         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1876     }
1877     return true;
1878 }
1879 
1880 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1881 {
1882     if (s->pauth_active) {
1883         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1884     }
1885     return true;
1886 }
1887 
1888 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1889 {
1890     if (s->pauth_active) {
1891         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1892     }
1893     return true;
1894 }
1895 
1896 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1897 {
1898     if (s->pauth_active) {
1899         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1900     }
1901     return true;
1902 }
1903 
1904 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1905 {
1906     if (s->pauth_active) {
1907         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1908     }
1909     return true;
1910 }
1911 
1912 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1913 {
1914     if (s->pauth_active) {
1915         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1916     }
1917     return true;
1918 }
1919 
1920 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1921 {
1922     if (s->pauth_active) {
1923         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1924     }
1925     return true;
1926 }
1927 
1928 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1929 {
1930     if (s->pauth_active) {
1931         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1932     }
1933     return true;
1934 }
1935 
1936 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1937 {
1938     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1939     return true;
1940 }
1941 
1942 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1943 {
1944     /* We handle DSB and DMB the same way */
1945     TCGBar bar;
1946 
1947     switch (a->types) {
1948     case 1: /* MBReqTypes_Reads */
1949         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1950         break;
1951     case 2: /* MBReqTypes_Writes */
1952         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1953         break;
1954     default: /* MBReqTypes_All */
1955         bar = TCG_BAR_SC | TCG_MO_ALL;
1956         break;
1957     }
1958     tcg_gen_mb(bar);
1959     return true;
1960 }
1961 
1962 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1963 {
1964     /*
1965      * We need to break the TB after this insn to execute
1966      * self-modifying code correctly and also to take
1967      * any pending interrupts immediately.
1968      */
1969     reset_btype(s);
1970     gen_goto_tb(s, 0, 4);
1971     return true;
1972 }
1973 
1974 static bool trans_SB(DisasContext *s, arg_SB *a)
1975 {
1976     if (!dc_isar_feature(aa64_sb, s)) {
1977         return false;
1978     }
1979     /*
1980      * TODO: There is no speculation barrier opcode for TCG;
1981      * MB and end the TB instead.
1982      */
1983     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1984     gen_goto_tb(s, 0, 4);
1985     return true;
1986 }
1987 
1988 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1989 {
1990     if (!dc_isar_feature(aa64_condm_4, s)) {
1991         return false;
1992     }
1993     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1994     return true;
1995 }
1996 
1997 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1998 {
1999     TCGv_i32 z;
2000 
2001     if (!dc_isar_feature(aa64_condm_5, s)) {
2002         return false;
2003     }
2004 
2005     z = tcg_temp_new_i32();
2006 
2007     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2008 
2009     /*
2010      * (!C & !Z) << 31
2011      * (!(C | Z)) << 31
2012      * ~((C | Z) << 31)
2013      * ~-(C | Z)
2014      * (C | Z) - 1
2015      */
2016     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2017     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2018 
2019     /* !(Z & C) */
2020     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2021     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2022 
2023     /* (!C & Z) << 31 -> -(Z & ~C) */
2024     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2025     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2026 
2027     /* C | Z */
2028     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2029 
2030     return true;
2031 }
2032 
2033 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2034 {
2035     if (!dc_isar_feature(aa64_condm_5, s)) {
2036         return false;
2037     }
2038 
2039     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2040     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2041 
2042     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2043     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2044 
2045     tcg_gen_movi_i32(cpu_NF, 0);
2046     tcg_gen_movi_i32(cpu_VF, 0);
2047 
2048     return true;
2049 }
2050 
2051 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2052 {
2053     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2054         return false;
2055     }
2056     if (a->imm & 1) {
2057         set_pstate_bits(PSTATE_UAO);
2058     } else {
2059         clear_pstate_bits(PSTATE_UAO);
2060     }
2061     gen_rebuild_hflags(s);
2062     s->base.is_jmp = DISAS_TOO_MANY;
2063     return true;
2064 }
2065 
2066 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2067 {
2068     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2069         return false;
2070     }
2071     if (a->imm & 1) {
2072         set_pstate_bits(PSTATE_PAN);
2073     } else {
2074         clear_pstate_bits(PSTATE_PAN);
2075     }
2076     gen_rebuild_hflags(s);
2077     s->base.is_jmp = DISAS_TOO_MANY;
2078     return true;
2079 }
2080 
2081 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2082 {
2083     if (s->current_el == 0) {
2084         return false;
2085     }
2086     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2087     s->base.is_jmp = DISAS_TOO_MANY;
2088     return true;
2089 }
2090 
2091 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2092 {
2093     if (!dc_isar_feature(aa64_ssbs, s)) {
2094         return false;
2095     }
2096     if (a->imm & 1) {
2097         set_pstate_bits(PSTATE_SSBS);
2098     } else {
2099         clear_pstate_bits(PSTATE_SSBS);
2100     }
2101     /* Don't need to rebuild hflags since SSBS is a nop */
2102     s->base.is_jmp = DISAS_TOO_MANY;
2103     return true;
2104 }
2105 
2106 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2107 {
2108     if (!dc_isar_feature(aa64_dit, s)) {
2109         return false;
2110     }
2111     if (a->imm & 1) {
2112         set_pstate_bits(PSTATE_DIT);
2113     } else {
2114         clear_pstate_bits(PSTATE_DIT);
2115     }
2116     /* There's no need to rebuild hflags because DIT is a nop */
2117     s->base.is_jmp = DISAS_TOO_MANY;
2118     return true;
2119 }
2120 
2121 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2122 {
2123     if (dc_isar_feature(aa64_mte, s)) {
2124         /* Full MTE is enabled -- set the TCO bit as directed. */
2125         if (a->imm & 1) {
2126             set_pstate_bits(PSTATE_TCO);
2127         } else {
2128             clear_pstate_bits(PSTATE_TCO);
2129         }
2130         gen_rebuild_hflags(s);
2131         /* Many factors, including TCO, go into MTE_ACTIVE. */
2132         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2133         return true;
2134     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2135         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2136         return true;
2137     } else {
2138         /* Insn not present */
2139         return false;
2140     }
2141 }
2142 
2143 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2144 {
2145     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2146     s->base.is_jmp = DISAS_TOO_MANY;
2147     return true;
2148 }
2149 
2150 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2151 {
2152     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2153     /* Exit the cpu loop to re-evaluate pending IRQs. */
2154     s->base.is_jmp = DISAS_UPDATE_EXIT;
2155     return true;
2156 }
2157 
2158 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2159 {
2160     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2161         return false;
2162     }
2163 
2164     if (a->imm == 0) {
2165         clear_pstate_bits(PSTATE_ALLINT);
2166     } else if (s->current_el > 1) {
2167         set_pstate_bits(PSTATE_ALLINT);
2168     } else {
2169         gen_helper_msr_set_allint_el1(tcg_env);
2170     }
2171 
2172     /* Exit the cpu loop to re-evaluate pending IRQs. */
2173     s->base.is_jmp = DISAS_UPDATE_EXIT;
2174     return true;
2175 }
2176 
2177 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2178 {
2179     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2180         return false;
2181     }
2182     if (sme_access_check(s)) {
2183         int old = s->pstate_sm | (s->pstate_za << 1);
2184         int new = a->imm * 3;
2185 
2186         if ((old ^ new) & a->mask) {
2187             /* At least one bit changes. */
2188             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2189                                 tcg_constant_i32(a->mask));
2190             s->base.is_jmp = DISAS_TOO_MANY;
2191         }
2192     }
2193     return true;
2194 }
2195 
2196 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2197 {
2198     TCGv_i32 tmp = tcg_temp_new_i32();
2199     TCGv_i32 nzcv = tcg_temp_new_i32();
2200 
2201     /* build bit 31, N */
2202     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2203     /* build bit 30, Z */
2204     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2205     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2206     /* build bit 29, C */
2207     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2208     /* build bit 28, V */
2209     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2210     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2211     /* generate result */
2212     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2213 }
2214 
2215 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2216 {
2217     TCGv_i32 nzcv = tcg_temp_new_i32();
2218 
2219     /* take NZCV from R[t] */
2220     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2221 
2222     /* bit 31, N */
2223     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2224     /* bit 30, Z */
2225     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2226     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2227     /* bit 29, C */
2228     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2229     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2230     /* bit 28, V */
2231     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2232     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2233 }
2234 
2235 static void gen_sysreg_undef(DisasContext *s, bool isread,
2236                              uint8_t op0, uint8_t op1, uint8_t op2,
2237                              uint8_t crn, uint8_t crm, uint8_t rt)
2238 {
2239     /*
2240      * Generate code to emit an UNDEF with correct syndrome
2241      * information for a failed system register access.
2242      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2243      * but if FEAT_IDST is implemented then read accesses to registers
2244      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2245      * syndrome.
2246      */
2247     uint32_t syndrome;
2248 
2249     if (isread && dc_isar_feature(aa64_ids, s) &&
2250         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2251         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2252     } else {
2253         syndrome = syn_uncategorized();
2254     }
2255     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2256 }
2257 
2258 /* MRS - move from system register
2259  * MSR (register) - move to system register
2260  * SYS
2261  * SYSL
2262  * These are all essentially the same insn in 'read' and 'write'
2263  * versions, with varying op0 fields.
2264  */
2265 static void handle_sys(DisasContext *s, bool isread,
2266                        unsigned int op0, unsigned int op1, unsigned int op2,
2267                        unsigned int crn, unsigned int crm, unsigned int rt)
2268 {
2269     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2270                                       crn, crm, op0, op1, op2);
2271     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2272     bool need_exit_tb = false;
2273     bool nv_trap_to_el2 = false;
2274     bool nv_redirect_reg = false;
2275     bool skip_fp_access_checks = false;
2276     bool nv2_mem_redirect = false;
2277     TCGv_ptr tcg_ri = NULL;
2278     TCGv_i64 tcg_rt;
2279     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2280 
2281     if (crn == 11 || crn == 15) {
2282         /*
2283          * Check for TIDCP trap, which must take precedence over
2284          * the UNDEF for "no such register" etc.
2285          */
2286         switch (s->current_el) {
2287         case 0:
2288             if (dc_isar_feature(aa64_tidcp1, s)) {
2289                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2290             }
2291             break;
2292         case 1:
2293             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2294             break;
2295         }
2296     }
2297 
2298     if (!ri) {
2299         /* Unknown register; this might be a guest error or a QEMU
2300          * unimplemented feature.
2301          */
2302         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2303                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2304                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2305         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2306         return;
2307     }
2308 
2309     if (s->nv2 && ri->nv2_redirect_offset) {
2310         /*
2311          * Some registers always redirect to memory; some only do so if
2312          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2313          * pairs which share an offset; see the table in R_CSRPQ).
2314          */
2315         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2316             nv2_mem_redirect = s->nv1;
2317         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2318             nv2_mem_redirect = !s->nv1;
2319         } else {
2320             nv2_mem_redirect = true;
2321         }
2322     }
2323 
2324     /* Check access permissions */
2325     if (!cp_access_ok(s->current_el, ri, isread)) {
2326         /*
2327          * FEAT_NV/NV2 handling does not do the usual FP access checks
2328          * for registers only accessible at EL2 (though it *does* do them
2329          * for registers accessible at EL1).
2330          */
2331         skip_fp_access_checks = true;
2332         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2333             /*
2334              * This is one of the few EL2 registers which should redirect
2335              * to the equivalent EL1 register. We do that after running
2336              * the EL2 register's accessfn.
2337              */
2338             nv_redirect_reg = true;
2339             assert(!nv2_mem_redirect);
2340         } else if (nv2_mem_redirect) {
2341             /*
2342              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2343              * UNDEF to EL1.
2344              */
2345         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2346             /*
2347              * This register / instruction exists and is an EL2 register, so
2348              * we must trap to EL2 if accessed in nested virtualization EL1
2349              * instead of UNDEFing. We'll do that after the usual access checks.
2350              * (This makes a difference only for a couple of registers like
2351              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2352              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2353              * an accessfn which does nothing when called from EL1, because
2354              * the trap-to-EL3 controls which would apply to that register
2355              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2356              */
2357             nv_trap_to_el2 = true;
2358         } else {
2359             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2360             return;
2361         }
2362     }
2363 
2364     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2365         /* Emit code to perform further access permissions checks at
2366          * runtime; this may result in an exception.
2367          */
2368         gen_a64_update_pc(s, 0);
2369         tcg_ri = tcg_temp_new_ptr();
2370         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2371                                        tcg_constant_i32(key),
2372                                        tcg_constant_i32(syndrome),
2373                                        tcg_constant_i32(isread));
2374     } else if (ri->type & ARM_CP_RAISES_EXC) {
2375         /*
2376          * The readfn or writefn might raise an exception;
2377          * synchronize the CPU state in case it does.
2378          */
2379         gen_a64_update_pc(s, 0);
2380     }
2381 
2382     if (!skip_fp_access_checks) {
2383         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2384             return;
2385         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2386             return;
2387         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2388             return;
2389         }
2390     }
2391 
2392     if (nv_trap_to_el2) {
2393         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2394         return;
2395     }
2396 
2397     if (nv_redirect_reg) {
2398         /*
2399          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2400          * Conveniently in all cases the encoding of the EL1 register is
2401          * identical to the EL2 register except that opc1 is 0.
2402          * Get the reginfo for the EL1 register to use for the actual access.
2403          * We don't use the EL1 register's access function, and
2404          * fine-grained-traps on EL1 also do not apply here.
2405          */
2406         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2407                                  crn, crm, op0, 0, op2);
2408         ri = get_arm_cp_reginfo(s->cp_regs, key);
2409         assert(ri);
2410         assert(cp_access_ok(s->current_el, ri, isread));
2411         /*
2412          * We might not have done an update_pc earlier, so check we don't
2413          * need it. We could support this in future if necessary.
2414          */
2415         assert(!(ri->type & ARM_CP_RAISES_EXC));
2416     }
2417 
2418     if (nv2_mem_redirect) {
2419         /*
2420          * This system register is being redirected into an EL2 memory access.
2421          * This means it is not an IO operation, doesn't change hflags,
2422          * and need not end the TB, because it has no side effects.
2423          *
2424          * The access is 64-bit single copy atomic, guaranteed aligned because
2425          * of the definition of VCNR_EL2. Its endianness depends on
2426          * SCTLR_EL2.EE, not on the data endianness of EL1.
2427          * It is done under either the EL2 translation regime or the EL2&0
2428          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2429          * PSTATE.PAN is 0.
2430          */
2431         TCGv_i64 ptr = tcg_temp_new_i64();
2432         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2433         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2434         int memidx = arm_to_core_mmu_idx(armmemidx);
2435         uint32_t syn;
2436 
2437         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2438 
2439         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2440         tcg_gen_addi_i64(ptr, ptr,
2441                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2442         tcg_rt = cpu_reg(s, rt);
2443 
2444         syn = syn_data_abort_vncr(0, !isread, 0);
2445         disas_set_insn_syndrome(s, syn);
2446         if (isread) {
2447             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2448         } else {
2449             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2450         }
2451         return;
2452     }
2453 
2454     /* Handle special cases first */
2455     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2456     case 0:
2457         break;
2458     case ARM_CP_NOP:
2459         return;
2460     case ARM_CP_NZCV:
2461         tcg_rt = cpu_reg(s, rt);
2462         if (isread) {
2463             gen_get_nzcv(tcg_rt);
2464         } else {
2465             gen_set_nzcv(tcg_rt);
2466         }
2467         return;
2468     case ARM_CP_CURRENTEL:
2469     {
2470         /*
2471          * Reads as current EL value from pstate, which is
2472          * guaranteed to be constant by the tb flags.
2473          * For nested virt we should report EL2.
2474          */
2475         int el = s->nv ? 2 : s->current_el;
2476         tcg_rt = cpu_reg(s, rt);
2477         tcg_gen_movi_i64(tcg_rt, el << 2);
2478         return;
2479     }
2480     case ARM_CP_DC_ZVA:
2481         /* Writes clear the aligned block of memory which rt points into. */
2482         if (s->mte_active[0]) {
2483             int desc = 0;
2484 
2485             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2486             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2487             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2488 
2489             tcg_rt = tcg_temp_new_i64();
2490             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2491                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2492         } else {
2493             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2494         }
2495         gen_helper_dc_zva(tcg_env, tcg_rt);
2496         return;
2497     case ARM_CP_DC_GVA:
2498         {
2499             TCGv_i64 clean_addr, tag;
2500 
2501             /*
2502              * DC_GVA, like DC_ZVA, requires that we supply the original
2503              * pointer for an invalid page.  Probe that address first.
2504              */
2505             tcg_rt = cpu_reg(s, rt);
2506             clean_addr = clean_data_tbi(s, tcg_rt);
2507             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2508 
2509             if (s->ata[0]) {
2510                 /* Extract the tag from the register to match STZGM.  */
2511                 tag = tcg_temp_new_i64();
2512                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2513                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2514             }
2515         }
2516         return;
2517     case ARM_CP_DC_GZVA:
2518         {
2519             TCGv_i64 clean_addr, tag;
2520 
2521             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2522             tcg_rt = cpu_reg(s, rt);
2523             clean_addr = clean_data_tbi(s, tcg_rt);
2524             gen_helper_dc_zva(tcg_env, clean_addr);
2525 
2526             if (s->ata[0]) {
2527                 /* Extract the tag from the register to match STZGM.  */
2528                 tag = tcg_temp_new_i64();
2529                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2530                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2531             }
2532         }
2533         return;
2534     default:
2535         g_assert_not_reached();
2536     }
2537 
2538     if (ri->type & ARM_CP_IO) {
2539         /* I/O operations must end the TB here (whether read or write) */
2540         need_exit_tb = translator_io_start(&s->base);
2541     }
2542 
2543     tcg_rt = cpu_reg(s, rt);
2544 
2545     if (isread) {
2546         if (ri->type & ARM_CP_CONST) {
2547             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2548         } else if (ri->readfn) {
2549             if (!tcg_ri) {
2550                 tcg_ri = gen_lookup_cp_reg(key);
2551             }
2552             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2553         } else {
2554             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2555         }
2556     } else {
2557         if (ri->type & ARM_CP_CONST) {
2558             /* If not forbidden by access permissions, treat as WI */
2559             return;
2560         } else if (ri->writefn) {
2561             if (!tcg_ri) {
2562                 tcg_ri = gen_lookup_cp_reg(key);
2563             }
2564             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2565         } else {
2566             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2567         }
2568     }
2569 
2570     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2571         /*
2572          * A write to any coprocessor register that ends a TB
2573          * must rebuild the hflags for the next TB.
2574          */
2575         gen_rebuild_hflags(s);
2576         /*
2577          * We default to ending the TB on a coprocessor register write,
2578          * but allow this to be suppressed by the register definition
2579          * (usually only necessary to work around guest bugs).
2580          */
2581         need_exit_tb = true;
2582     }
2583     if (need_exit_tb) {
2584         s->base.is_jmp = DISAS_UPDATE_EXIT;
2585     }
2586 }
2587 
2588 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2589 {
2590     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2591     return true;
2592 }
2593 
2594 static bool trans_SVC(DisasContext *s, arg_i *a)
2595 {
2596     /*
2597      * For SVC, HVC and SMC we advance the single-step state
2598      * machine before taking the exception. This is architecturally
2599      * mandated, to ensure that single-stepping a system call
2600      * instruction works properly.
2601      */
2602     uint32_t syndrome = syn_aa64_svc(a->imm);
2603     if (s->fgt_svc) {
2604         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2605         return true;
2606     }
2607     gen_ss_advance(s);
2608     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2609     return true;
2610 }
2611 
2612 static bool trans_HVC(DisasContext *s, arg_i *a)
2613 {
2614     int target_el = s->current_el == 3 ? 3 : 2;
2615 
2616     if (s->current_el == 0) {
2617         unallocated_encoding(s);
2618         return true;
2619     }
2620     /*
2621      * The pre HVC helper handles cases when HVC gets trapped
2622      * as an undefined insn by runtime configuration.
2623      */
2624     gen_a64_update_pc(s, 0);
2625     gen_helper_pre_hvc(tcg_env);
2626     /* Architecture requires ss advance before we do the actual work */
2627     gen_ss_advance(s);
2628     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2629     return true;
2630 }
2631 
2632 static bool trans_SMC(DisasContext *s, arg_i *a)
2633 {
2634     if (s->current_el == 0) {
2635         unallocated_encoding(s);
2636         return true;
2637     }
2638     gen_a64_update_pc(s, 0);
2639     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2640     /* Architecture requires ss advance before we do the actual work */
2641     gen_ss_advance(s);
2642     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2643     return true;
2644 }
2645 
2646 static bool trans_BRK(DisasContext *s, arg_i *a)
2647 {
2648     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2649     return true;
2650 }
2651 
2652 static bool trans_HLT(DisasContext *s, arg_i *a)
2653 {
2654     /*
2655      * HLT. This has two purposes.
2656      * Architecturally, it is an external halting debug instruction.
2657      * Since QEMU doesn't implement external debug, we treat this as
2658      * it is required for halting debug disabled: it will UNDEF.
2659      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2660      */
2661     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2662         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2663     } else {
2664         unallocated_encoding(s);
2665     }
2666     return true;
2667 }
2668 
2669 /*
2670  * Load/Store exclusive instructions are implemented by remembering
2671  * the value/address loaded, and seeing if these are the same
2672  * when the store is performed. This is not actually the architecturally
2673  * mandated semantics, but it works for typical guest code sequences
2674  * and avoids having to monitor regular stores.
2675  *
2676  * The store exclusive uses the atomic cmpxchg primitives to avoid
2677  * races in multi-threaded linux-user and when MTTCG softmmu is
2678  * enabled.
2679  */
2680 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2681                                int size, bool is_pair)
2682 {
2683     int idx = get_mem_index(s);
2684     TCGv_i64 dirty_addr, clean_addr;
2685     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2686 
2687     s->is_ldex = true;
2688     dirty_addr = cpu_reg_sp(s, rn);
2689     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2690 
2691     g_assert(size <= 3);
2692     if (is_pair) {
2693         g_assert(size >= 2);
2694         if (size == 2) {
2695             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2696             if (s->be_data == MO_LE) {
2697                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2698                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2699             } else {
2700                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2701                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2702             }
2703         } else {
2704             TCGv_i128 t16 = tcg_temp_new_i128();
2705 
2706             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2707 
2708             if (s->be_data == MO_LE) {
2709                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2710                                       cpu_exclusive_high, t16);
2711             } else {
2712                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2713                                       cpu_exclusive_val, t16);
2714             }
2715             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2716             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2717         }
2718     } else {
2719         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2720         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2721     }
2722     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2723 }
2724 
2725 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2726                                 int rn, int size, int is_pair)
2727 {
2728     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2729      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2730      *     [addr] = {Rt};
2731      *     if (is_pair) {
2732      *         [addr + datasize] = {Rt2};
2733      *     }
2734      *     {Rd} = 0;
2735      * } else {
2736      *     {Rd} = 1;
2737      * }
2738      * env->exclusive_addr = -1;
2739      */
2740     TCGLabel *fail_label = gen_new_label();
2741     TCGLabel *done_label = gen_new_label();
2742     TCGv_i64 tmp, clean_addr;
2743     MemOp memop;
2744 
2745     /*
2746      * FIXME: We are out of spec here.  We have recorded only the address
2747      * from load_exclusive, not the entire range, and we assume that the
2748      * size of the access on both sides match.  The architecture allows the
2749      * store to be smaller than the load, so long as the stored bytes are
2750      * within the range recorded by the load.
2751      */
2752 
2753     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2754     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2755     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2756 
2757     /*
2758      * The write, and any associated faults, only happen if the virtual
2759      * and physical addresses pass the exclusive monitor check.  These
2760      * faults are exceedingly unlikely, because normally the guest uses
2761      * the exact same address register for the load_exclusive, and we
2762      * would have recognized these faults there.
2763      *
2764      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2765      * unaligned 4-byte write within the range of an aligned 8-byte load.
2766      * With LSE2, the store would need to cross a 16-byte boundary when the
2767      * load did not, which would mean the store is outside the range
2768      * recorded for the monitor, which would have failed a corrected monitor
2769      * check above.  For now, we assume no size change and retain the
2770      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2771      *
2772      * It is possible to trigger an MTE fault, by performing the load with
2773      * a virtual address with a valid tag and performing the store with the
2774      * same virtual address and a different invalid tag.
2775      */
2776     memop = size + is_pair;
2777     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2778         memop |= MO_ALIGN;
2779     }
2780     memop = finalize_memop(s, memop);
2781     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2782 
2783     tmp = tcg_temp_new_i64();
2784     if (is_pair) {
2785         if (size == 2) {
2786             if (s->be_data == MO_LE) {
2787                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2788             } else {
2789                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2790             }
2791             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2792                                        cpu_exclusive_val, tmp,
2793                                        get_mem_index(s), memop);
2794             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2795         } else {
2796             TCGv_i128 t16 = tcg_temp_new_i128();
2797             TCGv_i128 c16 = tcg_temp_new_i128();
2798             TCGv_i64 a, b;
2799 
2800             if (s->be_data == MO_LE) {
2801                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2802                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2803                                         cpu_exclusive_high);
2804             } else {
2805                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2806                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2807                                         cpu_exclusive_val);
2808             }
2809 
2810             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2811                                         get_mem_index(s), memop);
2812 
2813             a = tcg_temp_new_i64();
2814             b = tcg_temp_new_i64();
2815             if (s->be_data == MO_LE) {
2816                 tcg_gen_extr_i128_i64(a, b, t16);
2817             } else {
2818                 tcg_gen_extr_i128_i64(b, a, t16);
2819             }
2820 
2821             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2822             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2823             tcg_gen_or_i64(tmp, a, b);
2824 
2825             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2826         }
2827     } else {
2828         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2829                                    cpu_reg(s, rt), get_mem_index(s), memop);
2830         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2831     }
2832     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2833     tcg_gen_br(done_label);
2834 
2835     gen_set_label(fail_label);
2836     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2837     gen_set_label(done_label);
2838     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2839 }
2840 
2841 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2842                                  int rn, int size)
2843 {
2844     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2845     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2846     int memidx = get_mem_index(s);
2847     TCGv_i64 clean_addr;
2848     MemOp memop;
2849 
2850     if (rn == 31) {
2851         gen_check_sp_alignment(s);
2852     }
2853     memop = check_atomic_align(s, rn, size);
2854     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2855     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2856                                memidx, memop);
2857 }
2858 
2859 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2860                                       int rn, int size)
2861 {
2862     TCGv_i64 s1 = cpu_reg(s, rs);
2863     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2864     TCGv_i64 t1 = cpu_reg(s, rt);
2865     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2866     TCGv_i64 clean_addr;
2867     int memidx = get_mem_index(s);
2868     MemOp memop;
2869 
2870     if (rn == 31) {
2871         gen_check_sp_alignment(s);
2872     }
2873 
2874     /* This is a single atomic access, despite the "pair". */
2875     memop = check_atomic_align(s, rn, size + 1);
2876     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2877 
2878     if (size == 2) {
2879         TCGv_i64 cmp = tcg_temp_new_i64();
2880         TCGv_i64 val = tcg_temp_new_i64();
2881 
2882         if (s->be_data == MO_LE) {
2883             tcg_gen_concat32_i64(val, t1, t2);
2884             tcg_gen_concat32_i64(cmp, s1, s2);
2885         } else {
2886             tcg_gen_concat32_i64(val, t2, t1);
2887             tcg_gen_concat32_i64(cmp, s2, s1);
2888         }
2889 
2890         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2891 
2892         if (s->be_data == MO_LE) {
2893             tcg_gen_extr32_i64(s1, s2, cmp);
2894         } else {
2895             tcg_gen_extr32_i64(s2, s1, cmp);
2896         }
2897     } else {
2898         TCGv_i128 cmp = tcg_temp_new_i128();
2899         TCGv_i128 val = tcg_temp_new_i128();
2900 
2901         if (s->be_data == MO_LE) {
2902             tcg_gen_concat_i64_i128(val, t1, t2);
2903             tcg_gen_concat_i64_i128(cmp, s1, s2);
2904         } else {
2905             tcg_gen_concat_i64_i128(val, t2, t1);
2906             tcg_gen_concat_i64_i128(cmp, s2, s1);
2907         }
2908 
2909         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2910 
2911         if (s->be_data == MO_LE) {
2912             tcg_gen_extr_i128_i64(s1, s2, cmp);
2913         } else {
2914             tcg_gen_extr_i128_i64(s2, s1, cmp);
2915         }
2916     }
2917 }
2918 
2919 /*
2920  * Compute the ISS.SF bit for syndrome information if an exception
2921  * is taken on a load or store. This indicates whether the instruction
2922  * is accessing a 32-bit or 64-bit register. This logic is derived
2923  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2924  */
2925 static bool ldst_iss_sf(int size, bool sign, bool ext)
2926 {
2927 
2928     if (sign) {
2929         /*
2930          * Signed loads are 64 bit results if we are not going to
2931          * do a zero-extend from 32 to 64 after the load.
2932          * (For a store, sign and ext are always false.)
2933          */
2934         return !ext;
2935     } else {
2936         /* Unsigned loads/stores work at the specified size */
2937         return size == MO_64;
2938     }
2939 }
2940 
2941 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2942 {
2943     if (a->rn == 31) {
2944         gen_check_sp_alignment(s);
2945     }
2946     if (a->lasr) {
2947         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2948     }
2949     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2950     return true;
2951 }
2952 
2953 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2954 {
2955     if (a->rn == 31) {
2956         gen_check_sp_alignment(s);
2957     }
2958     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2959     if (a->lasr) {
2960         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2961     }
2962     return true;
2963 }
2964 
2965 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2966 {
2967     TCGv_i64 clean_addr;
2968     MemOp memop;
2969     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2970 
2971     /*
2972      * StoreLORelease is the same as Store-Release for QEMU, but
2973      * needs the feature-test.
2974      */
2975     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2976         return false;
2977     }
2978     /* Generate ISS for non-exclusive accesses including LASR.  */
2979     if (a->rn == 31) {
2980         gen_check_sp_alignment(s);
2981     }
2982     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2983     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2984     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2985                                 true, a->rn != 31, memop);
2986     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2987               iss_sf, a->lasr);
2988     return true;
2989 }
2990 
2991 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2992 {
2993     TCGv_i64 clean_addr;
2994     MemOp memop;
2995     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2996 
2997     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2998     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2999         return false;
3000     }
3001     /* Generate ISS for non-exclusive accesses including LASR.  */
3002     if (a->rn == 31) {
3003         gen_check_sp_alignment(s);
3004     }
3005     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3006     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3007                                 false, a->rn != 31, memop);
3008     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3009               a->rt, iss_sf, a->lasr);
3010     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3011     return true;
3012 }
3013 
3014 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3015 {
3016     if (a->rn == 31) {
3017         gen_check_sp_alignment(s);
3018     }
3019     if (a->lasr) {
3020         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3021     }
3022     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3023     return true;
3024 }
3025 
3026 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3027 {
3028     if (a->rn == 31) {
3029         gen_check_sp_alignment(s);
3030     }
3031     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3032     if (a->lasr) {
3033         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3034     }
3035     return true;
3036 }
3037 
3038 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3039 {
3040     if (!dc_isar_feature(aa64_atomics, s)) {
3041         return false;
3042     }
3043     if (((a->rt | a->rs) & 1) != 0) {
3044         return false;
3045     }
3046 
3047     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3048     return true;
3049 }
3050 
3051 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3052 {
3053     if (!dc_isar_feature(aa64_atomics, s)) {
3054         return false;
3055     }
3056     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3057     return true;
3058 }
3059 
3060 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3061 {
3062     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3063     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3064     TCGv_i64 clean_addr = tcg_temp_new_i64();
3065     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3066 
3067     gen_pc_plus_diff(s, clean_addr, a->imm);
3068     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3069               false, true, a->rt, iss_sf, false);
3070     return true;
3071 }
3072 
3073 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3074 {
3075     /* Load register (literal), vector version */
3076     TCGv_i64 clean_addr;
3077     MemOp memop;
3078 
3079     if (!fp_access_check(s)) {
3080         return true;
3081     }
3082     memop = finalize_memop_asimd(s, a->sz);
3083     clean_addr = tcg_temp_new_i64();
3084     gen_pc_plus_diff(s, clean_addr, a->imm);
3085     do_fp_ld(s, a->rt, clean_addr, memop);
3086     return true;
3087 }
3088 
3089 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3090                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3091                                  uint64_t offset, bool is_store, MemOp mop)
3092 {
3093     if (a->rn == 31) {
3094         gen_check_sp_alignment(s);
3095     }
3096 
3097     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3098     if (!a->p) {
3099         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3100     }
3101 
3102     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3103                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3104 }
3105 
3106 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3107                                   TCGv_i64 dirty_addr, uint64_t offset)
3108 {
3109     if (a->w) {
3110         if (a->p) {
3111             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3112         }
3113         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3114     }
3115 }
3116 
3117 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3118 {
3119     uint64_t offset = a->imm << a->sz;
3120     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3121     MemOp mop = finalize_memop(s, a->sz);
3122 
3123     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3124     tcg_rt = cpu_reg(s, a->rt);
3125     tcg_rt2 = cpu_reg(s, a->rt2);
3126     /*
3127      * We built mop above for the single logical access -- rebuild it
3128      * now for the paired operation.
3129      *
3130      * With LSE2, non-sign-extending pairs are treated atomically if
3131      * aligned, and if unaligned one of the pair will be completely
3132      * within a 16-byte block and that element will be atomic.
3133      * Otherwise each element is separately atomic.
3134      * In all cases, issue one operation with the correct atomicity.
3135      */
3136     mop = a->sz + 1;
3137     if (s->align_mem) {
3138         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3139     }
3140     mop = finalize_memop_pair(s, mop);
3141     if (a->sz == 2) {
3142         TCGv_i64 tmp = tcg_temp_new_i64();
3143 
3144         if (s->be_data == MO_LE) {
3145             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3146         } else {
3147             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3148         }
3149         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3150     } else {
3151         TCGv_i128 tmp = tcg_temp_new_i128();
3152 
3153         if (s->be_data == MO_LE) {
3154             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3155         } else {
3156             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3157         }
3158         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3159     }
3160     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3161     return true;
3162 }
3163 
3164 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3165 {
3166     uint64_t offset = a->imm << a->sz;
3167     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3168     MemOp mop = finalize_memop(s, a->sz);
3169 
3170     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3171     tcg_rt = cpu_reg(s, a->rt);
3172     tcg_rt2 = cpu_reg(s, a->rt2);
3173 
3174     /*
3175      * We built mop above for the single logical access -- rebuild it
3176      * now for the paired operation.
3177      *
3178      * With LSE2, non-sign-extending pairs are treated atomically if
3179      * aligned, and if unaligned one of the pair will be completely
3180      * within a 16-byte block and that element will be atomic.
3181      * Otherwise each element is separately atomic.
3182      * In all cases, issue one operation with the correct atomicity.
3183      *
3184      * This treats sign-extending loads like zero-extending loads,
3185      * since that reuses the most code below.
3186      */
3187     mop = a->sz + 1;
3188     if (s->align_mem) {
3189         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3190     }
3191     mop = finalize_memop_pair(s, mop);
3192     if (a->sz == 2) {
3193         int o2 = s->be_data == MO_LE ? 32 : 0;
3194         int o1 = o2 ^ 32;
3195 
3196         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3197         if (a->sign) {
3198             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3199             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3200         } else {
3201             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3202             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3203         }
3204     } else {
3205         TCGv_i128 tmp = tcg_temp_new_i128();
3206 
3207         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3208         if (s->be_data == MO_LE) {
3209             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3210         } else {
3211             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3212         }
3213     }
3214     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3215     return true;
3216 }
3217 
3218 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3219 {
3220     uint64_t offset = a->imm << a->sz;
3221     TCGv_i64 clean_addr, dirty_addr;
3222     MemOp mop;
3223 
3224     if (!fp_access_check(s)) {
3225         return true;
3226     }
3227 
3228     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3229     mop = finalize_memop_asimd(s, a->sz);
3230     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3231     do_fp_st(s, a->rt, clean_addr, mop);
3232     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3233     do_fp_st(s, a->rt2, clean_addr, mop);
3234     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3235     return true;
3236 }
3237 
3238 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3239 {
3240     uint64_t offset = a->imm << a->sz;
3241     TCGv_i64 clean_addr, dirty_addr;
3242     MemOp mop;
3243 
3244     if (!fp_access_check(s)) {
3245         return true;
3246     }
3247 
3248     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3249     mop = finalize_memop_asimd(s, a->sz);
3250     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3251     do_fp_ld(s, a->rt, clean_addr, mop);
3252     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3253     do_fp_ld(s, a->rt2, clean_addr, mop);
3254     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3255     return true;
3256 }
3257 
3258 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3259 {
3260     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3261     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3262     MemOp mop;
3263     TCGv_i128 tmp;
3264 
3265     /* STGP only comes in one size. */
3266     tcg_debug_assert(a->sz == MO_64);
3267 
3268     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3269         return false;
3270     }
3271 
3272     if (a->rn == 31) {
3273         gen_check_sp_alignment(s);
3274     }
3275 
3276     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3277     if (!a->p) {
3278         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3279     }
3280 
3281     clean_addr = clean_data_tbi(s, dirty_addr);
3282     tcg_rt = cpu_reg(s, a->rt);
3283     tcg_rt2 = cpu_reg(s, a->rt2);
3284 
3285     /*
3286      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3287      * and one tag operation.  We implement it as one single aligned 16-byte
3288      * memory operation for convenience.  Note that the alignment ensures
3289      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3290      */
3291     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3292 
3293     tmp = tcg_temp_new_i128();
3294     if (s->be_data == MO_LE) {
3295         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3296     } else {
3297         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3298     }
3299     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3300 
3301     /* Perform the tag store, if tag access enabled. */
3302     if (s->ata[0]) {
3303         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3304             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3305         } else {
3306             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3307         }
3308     }
3309 
3310     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3311     return true;
3312 }
3313 
3314 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3315                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3316                                  uint64_t offset, bool is_store, MemOp mop)
3317 {
3318     int memidx;
3319 
3320     if (a->rn == 31) {
3321         gen_check_sp_alignment(s);
3322     }
3323 
3324     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3325     if (!a->p) {
3326         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3327     }
3328     memidx = get_a64_user_mem_index(s, a->unpriv);
3329     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3330                                         a->w || a->rn != 31,
3331                                         mop, a->unpriv, memidx);
3332 }
3333 
3334 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3335                                   TCGv_i64 dirty_addr, uint64_t offset)
3336 {
3337     if (a->w) {
3338         if (a->p) {
3339             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3340         }
3341         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3342     }
3343 }
3344 
3345 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3346 {
3347     bool iss_sf, iss_valid = !a->w;
3348     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3349     int memidx = get_a64_user_mem_index(s, a->unpriv);
3350     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3351 
3352     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3353 
3354     tcg_rt = cpu_reg(s, a->rt);
3355     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3356 
3357     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3358                      iss_valid, a->rt, iss_sf, false);
3359     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3360     return true;
3361 }
3362 
3363 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3364 {
3365     bool iss_sf, iss_valid = !a->w;
3366     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3367     int memidx = get_a64_user_mem_index(s, a->unpriv);
3368     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3369 
3370     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3371 
3372     tcg_rt = cpu_reg(s, a->rt);
3373     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3374 
3375     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3376                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3377     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3378     return true;
3379 }
3380 
3381 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3382 {
3383     TCGv_i64 clean_addr, dirty_addr;
3384     MemOp mop;
3385 
3386     if (!fp_access_check(s)) {
3387         return true;
3388     }
3389     mop = finalize_memop_asimd(s, a->sz);
3390     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3391     do_fp_st(s, a->rt, clean_addr, mop);
3392     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3393     return true;
3394 }
3395 
3396 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3397 {
3398     TCGv_i64 clean_addr, dirty_addr;
3399     MemOp mop;
3400 
3401     if (!fp_access_check(s)) {
3402         return true;
3403     }
3404     mop = finalize_memop_asimd(s, a->sz);
3405     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3406     do_fp_ld(s, a->rt, clean_addr, mop);
3407     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3408     return true;
3409 }
3410 
3411 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3412                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3413                              bool is_store, MemOp memop)
3414 {
3415     TCGv_i64 tcg_rm;
3416 
3417     if (a->rn == 31) {
3418         gen_check_sp_alignment(s);
3419     }
3420     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3421 
3422     tcg_rm = read_cpu_reg(s, a->rm, 1);
3423     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3424 
3425     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3426     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3427 }
3428 
3429 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3430 {
3431     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3432     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3433     MemOp memop;
3434 
3435     if (extract32(a->opt, 1, 1) == 0) {
3436         return false;
3437     }
3438 
3439     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3440     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3441     tcg_rt = cpu_reg(s, a->rt);
3442     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3443               a->ext, true, a->rt, iss_sf, false);
3444     return true;
3445 }
3446 
3447 static bool trans_STR(DisasContext *s, arg_ldst *a)
3448 {
3449     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3450     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3451     MemOp memop;
3452 
3453     if (extract32(a->opt, 1, 1) == 0) {
3454         return false;
3455     }
3456 
3457     memop = finalize_memop(s, a->sz);
3458     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3459     tcg_rt = cpu_reg(s, a->rt);
3460     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3461     return true;
3462 }
3463 
3464 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3465 {
3466     TCGv_i64 clean_addr, dirty_addr;
3467     MemOp memop;
3468 
3469     if (extract32(a->opt, 1, 1) == 0) {
3470         return false;
3471     }
3472 
3473     if (!fp_access_check(s)) {
3474         return true;
3475     }
3476 
3477     memop = finalize_memop_asimd(s, a->sz);
3478     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3479     do_fp_ld(s, a->rt, clean_addr, memop);
3480     return true;
3481 }
3482 
3483 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3484 {
3485     TCGv_i64 clean_addr, dirty_addr;
3486     MemOp memop;
3487 
3488     if (extract32(a->opt, 1, 1) == 0) {
3489         return false;
3490     }
3491 
3492     if (!fp_access_check(s)) {
3493         return true;
3494     }
3495 
3496     memop = finalize_memop_asimd(s, a->sz);
3497     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3498     do_fp_st(s, a->rt, clean_addr, memop);
3499     return true;
3500 }
3501 
3502 
3503 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3504                          int sign, bool invert)
3505 {
3506     MemOp mop = a->sz | sign;
3507     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3508 
3509     if (a->rn == 31) {
3510         gen_check_sp_alignment(s);
3511     }
3512     mop = check_atomic_align(s, a->rn, mop);
3513     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3514                                 a->rn != 31, mop);
3515     tcg_rs = read_cpu_reg(s, a->rs, true);
3516     tcg_rt = cpu_reg(s, a->rt);
3517     if (invert) {
3518         tcg_gen_not_i64(tcg_rs, tcg_rs);
3519     }
3520     /*
3521      * The tcg atomic primitives are all full barriers.  Therefore we
3522      * can ignore the Acquire and Release bits of this instruction.
3523      */
3524     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3525 
3526     if (mop & MO_SIGN) {
3527         switch (a->sz) {
3528         case MO_8:
3529             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3530             break;
3531         case MO_16:
3532             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3533             break;
3534         case MO_32:
3535             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3536             break;
3537         case MO_64:
3538             break;
3539         default:
3540             g_assert_not_reached();
3541         }
3542     }
3543     return true;
3544 }
3545 
3546 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3547 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3548 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3549 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3550 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3551 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3552 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3553 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3554 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3555 
3556 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3557 {
3558     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3559     TCGv_i64 clean_addr;
3560     MemOp mop;
3561 
3562     if (!dc_isar_feature(aa64_atomics, s) ||
3563         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3564         return false;
3565     }
3566     if (a->rn == 31) {
3567         gen_check_sp_alignment(s);
3568     }
3569     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3570     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3571                                 a->rn != 31, mop);
3572     /*
3573      * LDAPR* are a special case because they are a simple load, not a
3574      * fetch-and-do-something op.
3575      * The architectural consistency requirements here are weaker than
3576      * full load-acquire (we only need "load-acquire processor consistent"),
3577      * but we choose to implement them as full LDAQ.
3578      */
3579     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3580               true, a->rt, iss_sf, true);
3581     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3582     return true;
3583 }
3584 
3585 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3586 {
3587     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3588     MemOp memop;
3589 
3590     /* Load with pointer authentication */
3591     if (!dc_isar_feature(aa64_pauth, s)) {
3592         return false;
3593     }
3594 
3595     if (a->rn == 31) {
3596         gen_check_sp_alignment(s);
3597     }
3598     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3599 
3600     if (s->pauth_active) {
3601         if (!a->m) {
3602             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3603                                       tcg_constant_i64(0));
3604         } else {
3605             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3606                                       tcg_constant_i64(0));
3607         }
3608     }
3609 
3610     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3611 
3612     memop = finalize_memop(s, MO_64);
3613 
3614     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3615     clean_addr = gen_mte_check1(s, dirty_addr, false,
3616                                 a->w || a->rn != 31, memop);
3617 
3618     tcg_rt = cpu_reg(s, a->rt);
3619     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3620               /* extend */ false, /* iss_valid */ !a->w,
3621               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3622 
3623     if (a->w) {
3624         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3625     }
3626     return true;
3627 }
3628 
3629 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3630 {
3631     TCGv_i64 clean_addr, dirty_addr;
3632     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3633     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3634 
3635     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3636         return false;
3637     }
3638 
3639     if (a->rn == 31) {
3640         gen_check_sp_alignment(s);
3641     }
3642 
3643     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3644     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3645     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3646     clean_addr = clean_data_tbi(s, dirty_addr);
3647 
3648     /*
3649      * Load-AcquirePC semantics; we implement as the slightly more
3650      * restrictive Load-Acquire.
3651      */
3652     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3653               a->rt, iss_sf, true);
3654     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3655     return true;
3656 }
3657 
3658 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3659 {
3660     TCGv_i64 clean_addr, dirty_addr;
3661     MemOp mop = a->sz;
3662     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3663 
3664     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3665         return false;
3666     }
3667 
3668     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3669 
3670     if (a->rn == 31) {
3671         gen_check_sp_alignment(s);
3672     }
3673 
3674     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3675     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3676     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3677     clean_addr = clean_data_tbi(s, dirty_addr);
3678 
3679     /* Store-Release semantics */
3680     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3681     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3682     return true;
3683 }
3684 
3685 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3686 {
3687     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3688     MemOp endian, align, mop;
3689 
3690     int total;    /* total bytes */
3691     int elements; /* elements per vector */
3692     int r;
3693     int size = a->sz;
3694 
3695     if (!a->p && a->rm != 0) {
3696         /* For non-postindexed accesses the Rm field must be 0 */
3697         return false;
3698     }
3699     if (size == 3 && !a->q && a->selem != 1) {
3700         return false;
3701     }
3702     if (!fp_access_check(s)) {
3703         return true;
3704     }
3705 
3706     if (a->rn == 31) {
3707         gen_check_sp_alignment(s);
3708     }
3709 
3710     /* For our purposes, bytes are always little-endian.  */
3711     endian = s->be_data;
3712     if (size == 0) {
3713         endian = MO_LE;
3714     }
3715 
3716     total = a->rpt * a->selem * (a->q ? 16 : 8);
3717     tcg_rn = cpu_reg_sp(s, a->rn);
3718 
3719     /*
3720      * Issue the MTE check vs the logical repeat count, before we
3721      * promote consecutive little-endian elements below.
3722      */
3723     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3724                                 finalize_memop_asimd(s, size));
3725 
3726     /*
3727      * Consecutive little-endian elements from a single register
3728      * can be promoted to a larger little-endian operation.
3729      */
3730     align = MO_ALIGN;
3731     if (a->selem == 1 && endian == MO_LE) {
3732         align = pow2_align(size);
3733         size = 3;
3734     }
3735     if (!s->align_mem) {
3736         align = 0;
3737     }
3738     mop = endian | size | align;
3739 
3740     elements = (a->q ? 16 : 8) >> size;
3741     tcg_ebytes = tcg_constant_i64(1 << size);
3742     for (r = 0; r < a->rpt; r++) {
3743         int e;
3744         for (e = 0; e < elements; e++) {
3745             int xs;
3746             for (xs = 0; xs < a->selem; xs++) {
3747                 int tt = (a->rt + r + xs) % 32;
3748                 do_vec_ld(s, tt, e, clean_addr, mop);
3749                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3750             }
3751         }
3752     }
3753 
3754     /*
3755      * For non-quad operations, setting a slice of the low 64 bits of
3756      * the register clears the high 64 bits (in the ARM ARM pseudocode
3757      * this is implicit in the fact that 'rval' is a 64 bit wide
3758      * variable).  For quad operations, we might still need to zero
3759      * the high bits of SVE.
3760      */
3761     for (r = 0; r < a->rpt * a->selem; r++) {
3762         int tt = (a->rt + r) % 32;
3763         clear_vec_high(s, a->q, tt);
3764     }
3765 
3766     if (a->p) {
3767         if (a->rm == 31) {
3768             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3769         } else {
3770             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3771         }
3772     }
3773     return true;
3774 }
3775 
3776 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3777 {
3778     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3779     MemOp endian, align, mop;
3780 
3781     int total;    /* total bytes */
3782     int elements; /* elements per vector */
3783     int r;
3784     int size = a->sz;
3785 
3786     if (!a->p && a->rm != 0) {
3787         /* For non-postindexed accesses the Rm field must be 0 */
3788         return false;
3789     }
3790     if (size == 3 && !a->q && a->selem != 1) {
3791         return false;
3792     }
3793     if (!fp_access_check(s)) {
3794         return true;
3795     }
3796 
3797     if (a->rn == 31) {
3798         gen_check_sp_alignment(s);
3799     }
3800 
3801     /* For our purposes, bytes are always little-endian.  */
3802     endian = s->be_data;
3803     if (size == 0) {
3804         endian = MO_LE;
3805     }
3806 
3807     total = a->rpt * a->selem * (a->q ? 16 : 8);
3808     tcg_rn = cpu_reg_sp(s, a->rn);
3809 
3810     /*
3811      * Issue the MTE check vs the logical repeat count, before we
3812      * promote consecutive little-endian elements below.
3813      */
3814     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3815                                 finalize_memop_asimd(s, size));
3816 
3817     /*
3818      * Consecutive little-endian elements from a single register
3819      * can be promoted to a larger little-endian operation.
3820      */
3821     align = MO_ALIGN;
3822     if (a->selem == 1 && endian == MO_LE) {
3823         align = pow2_align(size);
3824         size = 3;
3825     }
3826     if (!s->align_mem) {
3827         align = 0;
3828     }
3829     mop = endian | size | align;
3830 
3831     elements = (a->q ? 16 : 8) >> size;
3832     tcg_ebytes = tcg_constant_i64(1 << size);
3833     for (r = 0; r < a->rpt; r++) {
3834         int e;
3835         for (e = 0; e < elements; e++) {
3836             int xs;
3837             for (xs = 0; xs < a->selem; xs++) {
3838                 int tt = (a->rt + r + xs) % 32;
3839                 do_vec_st(s, tt, e, clean_addr, mop);
3840                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3841             }
3842         }
3843     }
3844 
3845     if (a->p) {
3846         if (a->rm == 31) {
3847             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3848         } else {
3849             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3850         }
3851     }
3852     return true;
3853 }
3854 
3855 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3856 {
3857     int xs, total, rt;
3858     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3859     MemOp mop;
3860 
3861     if (!a->p && a->rm != 0) {
3862         return false;
3863     }
3864     if (!fp_access_check(s)) {
3865         return true;
3866     }
3867 
3868     if (a->rn == 31) {
3869         gen_check_sp_alignment(s);
3870     }
3871 
3872     total = a->selem << a->scale;
3873     tcg_rn = cpu_reg_sp(s, a->rn);
3874 
3875     mop = finalize_memop_asimd(s, a->scale);
3876     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3877                                 total, mop);
3878 
3879     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3880     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3881         do_vec_st(s, rt, a->index, clean_addr, mop);
3882         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3883     }
3884 
3885     if (a->p) {
3886         if (a->rm == 31) {
3887             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3888         } else {
3889             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3890         }
3891     }
3892     return true;
3893 }
3894 
3895 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3896 {
3897     int xs, total, rt;
3898     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3899     MemOp mop;
3900 
3901     if (!a->p && a->rm != 0) {
3902         return false;
3903     }
3904     if (!fp_access_check(s)) {
3905         return true;
3906     }
3907 
3908     if (a->rn == 31) {
3909         gen_check_sp_alignment(s);
3910     }
3911 
3912     total = a->selem << a->scale;
3913     tcg_rn = cpu_reg_sp(s, a->rn);
3914 
3915     mop = finalize_memop_asimd(s, a->scale);
3916     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3917                                 total, mop);
3918 
3919     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3920     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3921         do_vec_ld(s, rt, a->index, clean_addr, mop);
3922         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3923     }
3924 
3925     if (a->p) {
3926         if (a->rm == 31) {
3927             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3928         } else {
3929             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3930         }
3931     }
3932     return true;
3933 }
3934 
3935 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3936 {
3937     int xs, total, rt;
3938     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3939     MemOp mop;
3940 
3941     if (!a->p && a->rm != 0) {
3942         return false;
3943     }
3944     if (!fp_access_check(s)) {
3945         return true;
3946     }
3947 
3948     if (a->rn == 31) {
3949         gen_check_sp_alignment(s);
3950     }
3951 
3952     total = a->selem << a->scale;
3953     tcg_rn = cpu_reg_sp(s, a->rn);
3954 
3955     mop = finalize_memop_asimd(s, a->scale);
3956     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3957                                 total, mop);
3958 
3959     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3960     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3961         /* Load and replicate to all elements */
3962         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3963 
3964         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3965         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3966                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3967         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3968     }
3969 
3970     if (a->p) {
3971         if (a->rm == 31) {
3972             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3973         } else {
3974             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3975         }
3976     }
3977     return true;
3978 }
3979 
3980 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3981 {
3982     TCGv_i64 addr, clean_addr, tcg_rt;
3983     int size = 4 << s->dcz_blocksize;
3984 
3985     if (!dc_isar_feature(aa64_mte, s)) {
3986         return false;
3987     }
3988     if (s->current_el == 0) {
3989         return false;
3990     }
3991 
3992     if (a->rn == 31) {
3993         gen_check_sp_alignment(s);
3994     }
3995 
3996     addr = read_cpu_reg_sp(s, a->rn, true);
3997     tcg_gen_addi_i64(addr, addr, a->imm);
3998     tcg_rt = cpu_reg(s, a->rt);
3999 
4000     if (s->ata[0]) {
4001         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4002     }
4003     /*
4004      * The non-tags portion of STZGM is mostly like DC_ZVA,
4005      * except the alignment happens before the access.
4006      */
4007     clean_addr = clean_data_tbi(s, addr);
4008     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4009     gen_helper_dc_zva(tcg_env, clean_addr);
4010     return true;
4011 }
4012 
4013 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4014 {
4015     TCGv_i64 addr, clean_addr, tcg_rt;
4016 
4017     if (!dc_isar_feature(aa64_mte, s)) {
4018         return false;
4019     }
4020     if (s->current_el == 0) {
4021         return false;
4022     }
4023 
4024     if (a->rn == 31) {
4025         gen_check_sp_alignment(s);
4026     }
4027 
4028     addr = read_cpu_reg_sp(s, a->rn, true);
4029     tcg_gen_addi_i64(addr, addr, a->imm);
4030     tcg_rt = cpu_reg(s, a->rt);
4031 
4032     if (s->ata[0]) {
4033         gen_helper_stgm(tcg_env, addr, tcg_rt);
4034     } else {
4035         MMUAccessType acc = MMU_DATA_STORE;
4036         int size = 4 << s->gm_blocksize;
4037 
4038         clean_addr = clean_data_tbi(s, addr);
4039         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4040         gen_probe_access(s, clean_addr, acc, size);
4041     }
4042     return true;
4043 }
4044 
4045 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4046 {
4047     TCGv_i64 addr, clean_addr, tcg_rt;
4048 
4049     if (!dc_isar_feature(aa64_mte, s)) {
4050         return false;
4051     }
4052     if (s->current_el == 0) {
4053         return false;
4054     }
4055 
4056     if (a->rn == 31) {
4057         gen_check_sp_alignment(s);
4058     }
4059 
4060     addr = read_cpu_reg_sp(s, a->rn, true);
4061     tcg_gen_addi_i64(addr, addr, a->imm);
4062     tcg_rt = cpu_reg(s, a->rt);
4063 
4064     if (s->ata[0]) {
4065         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4066     } else {
4067         MMUAccessType acc = MMU_DATA_LOAD;
4068         int size = 4 << s->gm_blocksize;
4069 
4070         clean_addr = clean_data_tbi(s, addr);
4071         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4072         gen_probe_access(s, clean_addr, acc, size);
4073         /* The result tags are zeros.  */
4074         tcg_gen_movi_i64(tcg_rt, 0);
4075     }
4076     return true;
4077 }
4078 
4079 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4080 {
4081     TCGv_i64 addr, clean_addr, tcg_rt;
4082 
4083     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4084         return false;
4085     }
4086 
4087     if (a->rn == 31) {
4088         gen_check_sp_alignment(s);
4089     }
4090 
4091     addr = read_cpu_reg_sp(s, a->rn, true);
4092     if (!a->p) {
4093         /* pre-index or signed offset */
4094         tcg_gen_addi_i64(addr, addr, a->imm);
4095     }
4096 
4097     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4098     tcg_rt = cpu_reg(s, a->rt);
4099     if (s->ata[0]) {
4100         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4101     } else {
4102         /*
4103          * Tag access disabled: we must check for aborts on the load
4104          * load from [rn+offset], and then insert a 0 tag into rt.
4105          */
4106         clean_addr = clean_data_tbi(s, addr);
4107         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4108         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4109     }
4110 
4111     if (a->w) {
4112         /* pre-index or post-index */
4113         if (a->p) {
4114             /* post-index */
4115             tcg_gen_addi_i64(addr, addr, a->imm);
4116         }
4117         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4118     }
4119     return true;
4120 }
4121 
4122 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4123 {
4124     TCGv_i64 addr, tcg_rt;
4125 
4126     if (a->rn == 31) {
4127         gen_check_sp_alignment(s);
4128     }
4129 
4130     addr = read_cpu_reg_sp(s, a->rn, true);
4131     if (!a->p) {
4132         /* pre-index or signed offset */
4133         tcg_gen_addi_i64(addr, addr, a->imm);
4134     }
4135     tcg_rt = cpu_reg_sp(s, a->rt);
4136     if (!s->ata[0]) {
4137         /*
4138          * For STG and ST2G, we need to check alignment and probe memory.
4139          * TODO: For STZG and STZ2G, we could rely on the stores below,
4140          * at least for system mode; user-only won't enforce alignment.
4141          */
4142         if (is_pair) {
4143             gen_helper_st2g_stub(tcg_env, addr);
4144         } else {
4145             gen_helper_stg_stub(tcg_env, addr);
4146         }
4147     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4148         if (is_pair) {
4149             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4150         } else {
4151             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4152         }
4153     } else {
4154         if (is_pair) {
4155             gen_helper_st2g(tcg_env, addr, tcg_rt);
4156         } else {
4157             gen_helper_stg(tcg_env, addr, tcg_rt);
4158         }
4159     }
4160 
4161     if (is_zero) {
4162         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4163         TCGv_i64 zero64 = tcg_constant_i64(0);
4164         TCGv_i128 zero128 = tcg_temp_new_i128();
4165         int mem_index = get_mem_index(s);
4166         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4167 
4168         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4169 
4170         /* This is 1 or 2 atomic 16-byte operations. */
4171         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4172         if (is_pair) {
4173             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4174             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4175         }
4176     }
4177 
4178     if (a->w) {
4179         /* pre-index or post-index */
4180         if (a->p) {
4181             /* post-index */
4182             tcg_gen_addi_i64(addr, addr, a->imm);
4183         }
4184         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4185     }
4186     return true;
4187 }
4188 
4189 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4190 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4191 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4192 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4193 
4194 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4195 
4196 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4197                    bool is_setg, SetFn fn)
4198 {
4199     int memidx;
4200     uint32_t syndrome, desc = 0;
4201 
4202     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4203         return false;
4204     }
4205 
4206     /*
4207      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4208      * us to pull this check before the CheckMOPSEnabled() test
4209      * (which we do in the helper function)
4210      */
4211     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4212         a->rd == 31 || a->rn == 31) {
4213         return false;
4214     }
4215 
4216     memidx = get_a64_user_mem_index(s, a->unpriv);
4217 
4218     /*
4219      * We pass option_a == true, matching our implementation;
4220      * we pass wrong_option == false: helper function may set that bit.
4221      */
4222     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4223                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4224 
4225     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4226         /* We may need to do MTE tag checking, so assemble the descriptor */
4227         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4228         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4229         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4230         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4231     }
4232     /* The helper function always needs the memidx even with MTE disabled */
4233     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4234 
4235     /*
4236      * The helper needs the register numbers, but since they're in
4237      * the syndrome anyway, we let it extract them from there rather
4238      * than passing in an extra three integer arguments.
4239      */
4240     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4241     return true;
4242 }
4243 
4244 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4245 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4246 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4247 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4248 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4249 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4250 
4251 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4252 
4253 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4254 {
4255     int rmemidx, wmemidx;
4256     uint32_t syndrome, rdesc = 0, wdesc = 0;
4257     bool wunpriv = extract32(a->options, 0, 1);
4258     bool runpriv = extract32(a->options, 1, 1);
4259 
4260     /*
4261      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4262      * us to pull this check before the CheckMOPSEnabled() test
4263      * (which we do in the helper function)
4264      */
4265     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4266         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4267         return false;
4268     }
4269 
4270     rmemidx = get_a64_user_mem_index(s, runpriv);
4271     wmemidx = get_a64_user_mem_index(s, wunpriv);
4272 
4273     /*
4274      * We pass option_a == true, matching our implementation;
4275      * we pass wrong_option == false: helper function may set that bit.
4276      */
4277     syndrome = syn_mop(false, false, a->options, is_epilogue,
4278                        false, true, a->rd, a->rs, a->rn);
4279 
4280     /* If we need to do MTE tag checking, assemble the descriptors */
4281     if (s->mte_active[runpriv]) {
4282         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4283         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4284     }
4285     if (s->mte_active[wunpriv]) {
4286         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4287         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4288         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4289     }
4290     /* The helper function needs these parts of the descriptor regardless */
4291     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4292     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4293 
4294     /*
4295      * The helper needs the register numbers, but since they're in
4296      * the syndrome anyway, we let it extract them from there rather
4297      * than passing in an extra three integer arguments.
4298      */
4299     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4300        tcg_constant_i32(rdesc));
4301     return true;
4302 }
4303 
4304 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4305 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4306 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4307 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4308 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4309 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4310 
4311 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4312 
4313 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4314                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4315 {
4316     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4317     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4318     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4319 
4320     fn(tcg_rd, tcg_rn, tcg_imm);
4321     if (!a->sf) {
4322         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4323     }
4324     return true;
4325 }
4326 
4327 /*
4328  * PC-rel. addressing
4329  */
4330 
4331 static bool trans_ADR(DisasContext *s, arg_ri *a)
4332 {
4333     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4334     return true;
4335 }
4336 
4337 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4338 {
4339     int64_t offset = (int64_t)a->imm << 12;
4340 
4341     /* The page offset is ok for CF_PCREL. */
4342     offset -= s->pc_curr & 0xfff;
4343     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4344     return true;
4345 }
4346 
4347 /*
4348  * Add/subtract (immediate)
4349  */
4350 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4351 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4352 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4353 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4354 
4355 /*
4356  * Add/subtract (immediate, with tags)
4357  */
4358 
4359 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4360                                       bool sub_op)
4361 {
4362     TCGv_i64 tcg_rn, tcg_rd;
4363     int imm;
4364 
4365     imm = a->uimm6 << LOG2_TAG_GRANULE;
4366     if (sub_op) {
4367         imm = -imm;
4368     }
4369 
4370     tcg_rn = cpu_reg_sp(s, a->rn);
4371     tcg_rd = cpu_reg_sp(s, a->rd);
4372 
4373     if (s->ata[0]) {
4374         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4375                            tcg_constant_i32(imm),
4376                            tcg_constant_i32(a->uimm4));
4377     } else {
4378         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4379         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4380     }
4381     return true;
4382 }
4383 
4384 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4385 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4386 
4387 /* The input should be a value in the bottom e bits (with higher
4388  * bits zero); returns that value replicated into every element
4389  * of size e in a 64 bit integer.
4390  */
4391 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4392 {
4393     assert(e != 0);
4394     while (e < 64) {
4395         mask |= mask << e;
4396         e *= 2;
4397     }
4398     return mask;
4399 }
4400 
4401 /*
4402  * Logical (immediate)
4403  */
4404 
4405 /*
4406  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4407  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4408  * value (ie should cause a guest UNDEF exception), and true if they are
4409  * valid, in which case the decoded bit pattern is written to result.
4410  */
4411 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4412                             unsigned int imms, unsigned int immr)
4413 {
4414     uint64_t mask;
4415     unsigned e, levels, s, r;
4416     int len;
4417 
4418     assert(immn < 2 && imms < 64 && immr < 64);
4419 
4420     /* The bit patterns we create here are 64 bit patterns which
4421      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4422      * 64 bits each. Each element contains the same value: a run
4423      * of between 1 and e-1 non-zero bits, rotated within the
4424      * element by between 0 and e-1 bits.
4425      *
4426      * The element size and run length are encoded into immn (1 bit)
4427      * and imms (6 bits) as follows:
4428      * 64 bit elements: immn = 1, imms = <length of run - 1>
4429      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4430      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4431      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4432      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4433      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4434      * Notice that immn = 0, imms = 11111x is the only combination
4435      * not covered by one of the above options; this is reserved.
4436      * Further, <length of run - 1> all-ones is a reserved pattern.
4437      *
4438      * In all cases the rotation is by immr % e (and immr is 6 bits).
4439      */
4440 
4441     /* First determine the element size */
4442     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4443     if (len < 1) {
4444         /* This is the immn == 0, imms == 0x11111x case */
4445         return false;
4446     }
4447     e = 1 << len;
4448 
4449     levels = e - 1;
4450     s = imms & levels;
4451     r = immr & levels;
4452 
4453     if (s == levels) {
4454         /* <length of run - 1> mustn't be all-ones. */
4455         return false;
4456     }
4457 
4458     /* Create the value of one element: s+1 set bits rotated
4459      * by r within the element (which is e bits wide)...
4460      */
4461     mask = MAKE_64BIT_MASK(0, s + 1);
4462     if (r) {
4463         mask = (mask >> r) | (mask << (e - r));
4464         mask &= MAKE_64BIT_MASK(0, e);
4465     }
4466     /* ...then replicate the element over the whole 64 bit value */
4467     mask = bitfield_replicate(mask, e);
4468     *result = mask;
4469     return true;
4470 }
4471 
4472 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4473                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4474 {
4475     TCGv_i64 tcg_rd, tcg_rn;
4476     uint64_t imm;
4477 
4478     /* Some immediate field values are reserved. */
4479     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4480                                 extract32(a->dbm, 0, 6),
4481                                 extract32(a->dbm, 6, 6))) {
4482         return false;
4483     }
4484     if (!a->sf) {
4485         imm &= 0xffffffffull;
4486     }
4487 
4488     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4489     tcg_rn = cpu_reg(s, a->rn);
4490 
4491     fn(tcg_rd, tcg_rn, imm);
4492     if (set_cc) {
4493         gen_logic_CC(a->sf, tcg_rd);
4494     }
4495     if (!a->sf) {
4496         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4497     }
4498     return true;
4499 }
4500 
4501 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4502 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4503 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4504 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4505 
4506 /*
4507  * Move wide (immediate)
4508  */
4509 
4510 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4511 {
4512     int pos = a->hw << 4;
4513     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4514     return true;
4515 }
4516 
4517 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4518 {
4519     int pos = a->hw << 4;
4520     uint64_t imm = a->imm;
4521 
4522     imm = ~(imm << pos);
4523     if (!a->sf) {
4524         imm = (uint32_t)imm;
4525     }
4526     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4527     return true;
4528 }
4529 
4530 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4531 {
4532     int pos = a->hw << 4;
4533     TCGv_i64 tcg_rd, tcg_im;
4534 
4535     tcg_rd = cpu_reg(s, a->rd);
4536     tcg_im = tcg_constant_i64(a->imm);
4537     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4538     if (!a->sf) {
4539         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4540     }
4541     return true;
4542 }
4543 
4544 /*
4545  * Bitfield
4546  */
4547 
4548 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4549 {
4550     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4551     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4552     unsigned int bitsize = a->sf ? 64 : 32;
4553     unsigned int ri = a->immr;
4554     unsigned int si = a->imms;
4555     unsigned int pos, len;
4556 
4557     if (si >= ri) {
4558         /* Wd<s-r:0> = Wn<s:r> */
4559         len = (si - ri) + 1;
4560         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4561         if (!a->sf) {
4562             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4563         }
4564     } else {
4565         /* Wd<32+s-r,32-r> = Wn<s:0> */
4566         len = si + 1;
4567         pos = (bitsize - ri) & (bitsize - 1);
4568 
4569         if (len < ri) {
4570             /*
4571              * Sign extend the destination field from len to fill the
4572              * balance of the word.  Let the deposit below insert all
4573              * of those sign bits.
4574              */
4575             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4576             len = ri;
4577         }
4578 
4579         /*
4580          * We start with zero, and we haven't modified any bits outside
4581          * bitsize, therefore no final zero-extension is unneeded for !sf.
4582          */
4583         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4584     }
4585     return true;
4586 }
4587 
4588 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4589 {
4590     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4591     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4592     unsigned int bitsize = a->sf ? 64 : 32;
4593     unsigned int ri = a->immr;
4594     unsigned int si = a->imms;
4595     unsigned int pos, len;
4596 
4597     tcg_rd = cpu_reg(s, a->rd);
4598     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4599 
4600     if (si >= ri) {
4601         /* Wd<s-r:0> = Wn<s:r> */
4602         len = (si - ri) + 1;
4603         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4604     } else {
4605         /* Wd<32+s-r,32-r> = Wn<s:0> */
4606         len = si + 1;
4607         pos = (bitsize - ri) & (bitsize - 1);
4608         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4609     }
4610     return true;
4611 }
4612 
4613 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4614 {
4615     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4616     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4617     unsigned int bitsize = a->sf ? 64 : 32;
4618     unsigned int ri = a->immr;
4619     unsigned int si = a->imms;
4620     unsigned int pos, len;
4621 
4622     tcg_rd = cpu_reg(s, a->rd);
4623     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4624 
4625     if (si >= ri) {
4626         /* Wd<s-r:0> = Wn<s:r> */
4627         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4628         len = (si - ri) + 1;
4629         pos = 0;
4630     } else {
4631         /* Wd<32+s-r,32-r> = Wn<s:0> */
4632         len = si + 1;
4633         pos = (bitsize - ri) & (bitsize - 1);
4634     }
4635 
4636     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4637     if (!a->sf) {
4638         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4639     }
4640     return true;
4641 }
4642 
4643 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4644 {
4645     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4646 
4647     tcg_rd = cpu_reg(s, a->rd);
4648 
4649     if (unlikely(a->imm == 0)) {
4650         /*
4651          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4652          * so an extract from bit 0 is a special case.
4653          */
4654         if (a->sf) {
4655             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4656         } else {
4657             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4658         }
4659     } else {
4660         tcg_rm = cpu_reg(s, a->rm);
4661         tcg_rn = cpu_reg(s, a->rn);
4662 
4663         if (a->sf) {
4664             /* Specialization to ROR happens in EXTRACT2.  */
4665             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4666         } else {
4667             TCGv_i32 t0 = tcg_temp_new_i32();
4668 
4669             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4670             if (a->rm == a->rn) {
4671                 tcg_gen_rotri_i32(t0, t0, a->imm);
4672             } else {
4673                 TCGv_i32 t1 = tcg_temp_new_i32();
4674                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4675                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4676             }
4677             tcg_gen_extu_i32_i64(tcg_rd, t0);
4678         }
4679     }
4680     return true;
4681 }
4682 
4683 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4684 {
4685     if (fp_access_check(s)) {
4686         int len = (a->len + 1) * 16;
4687 
4688         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4689                            vec_full_reg_offset(s, a->rm), tcg_env,
4690                            a->q ? 16 : 8, vec_full_reg_size(s),
4691                            (len << 6) | (a->tbx << 5) | a->rn,
4692                            gen_helper_simd_tblx);
4693     }
4694     return true;
4695 }
4696 
4697 typedef int simd_permute_idx_fn(int i, int part, int elements);
4698 
4699 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4700                             simd_permute_idx_fn *fn, int part)
4701 {
4702     MemOp esz = a->esz;
4703     int datasize = a->q ? 16 : 8;
4704     int elements = datasize >> esz;
4705     TCGv_i64 tcg_res[2], tcg_ele;
4706 
4707     if (esz == MO_64 && !a->q) {
4708         return false;
4709     }
4710     if (!fp_access_check(s)) {
4711         return true;
4712     }
4713 
4714     tcg_res[0] = tcg_temp_new_i64();
4715     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4716     tcg_ele = tcg_temp_new_i64();
4717 
4718     for (int i = 0; i < elements; i++) {
4719         int o, w, idx;
4720 
4721         idx = fn(i, part, elements);
4722         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4723                          idx & (elements - 1), esz);
4724 
4725         w = (i << (esz + 3)) / 64;
4726         o = (i << (esz + 3)) % 64;
4727         if (o == 0) {
4728             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4729         } else {
4730             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4731         }
4732     }
4733 
4734     for (int i = a->q; i >= 0; --i) {
4735         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4736     }
4737     clear_vec_high(s, a->q, a->rd);
4738     return true;
4739 }
4740 
4741 static int permute_load_uzp(int i, int part, int elements)
4742 {
4743     return 2 * i + part;
4744 }
4745 
4746 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4747 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4748 
4749 static int permute_load_trn(int i, int part, int elements)
4750 {
4751     return (i & 1) * elements + (i & ~1) + part;
4752 }
4753 
4754 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4755 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4756 
4757 static int permute_load_zip(int i, int part, int elements)
4758 {
4759     return (i & 1) * elements + ((part * elements + i) >> 1);
4760 }
4761 
4762 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4763 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4764 
4765 /*
4766  * Cryptographic AES, SHA, SHA512
4767  */
4768 
4769 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4770 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4771 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4772 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4773 
4774 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4775 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4776 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4777 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4778 
4779 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4780 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4781 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4782 
4783 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4784 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4785 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4786 
4787 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4788 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4789 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4790 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4791 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4792 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4793 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4794 
4795 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4796 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4797 
4798 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4799 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4800 
4801 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4802 {
4803     if (!dc_isar_feature(aa64_sm3, s)) {
4804         return false;
4805     }
4806     if (fp_access_check(s)) {
4807         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4808         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4809         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4810         TCGv_i32 tcg_res = tcg_temp_new_i32();
4811         unsigned vsz, dofs;
4812 
4813         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4814         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4815         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4816 
4817         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4818         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4819         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4820         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4821 
4822         /* Clear the whole register first, then store bits [127:96]. */
4823         vsz = vec_full_reg_size(s);
4824         dofs = vec_full_reg_offset(s, a->rd);
4825         tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
4826         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4827     }
4828     return true;
4829 }
4830 
4831 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4832 {
4833     if (fp_access_check(s)) {
4834         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4835     }
4836     return true;
4837 }
4838 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4839 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4840 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4841 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4842 
4843 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4844 {
4845     if (!dc_isar_feature(aa64_sha3, s)) {
4846         return false;
4847     }
4848     if (fp_access_check(s)) {
4849         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4850                      vec_full_reg_offset(s, a->rn),
4851                      vec_full_reg_offset(s, a->rm), a->imm, 16,
4852                      vec_full_reg_size(s));
4853     }
4854     return true;
4855 }
4856 
4857 /*
4858  * Advanced SIMD copy
4859  */
4860 
4861 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4862 {
4863     unsigned esz = ctz32(imm);
4864     if (esz <= MO_64) {
4865         *pesz = esz;
4866         *pidx = imm >> (esz + 1);
4867         return true;
4868     }
4869     return false;
4870 }
4871 
4872 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4873 {
4874     MemOp esz;
4875     unsigned idx;
4876 
4877     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4878         return false;
4879     }
4880     if (fp_access_check(s)) {
4881         /*
4882          * This instruction just extracts the specified element and
4883          * zero-extends it into the bottom of the destination register.
4884          */
4885         TCGv_i64 tmp = tcg_temp_new_i64();
4886         read_vec_element(s, tmp, a->rn, idx, esz);
4887         write_fp_dreg(s, a->rd, tmp);
4888     }
4889     return true;
4890 }
4891 
4892 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4893 {
4894     MemOp esz;
4895     unsigned idx;
4896 
4897     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4898         return false;
4899     }
4900     if (esz == MO_64 && !a->q) {
4901         return false;
4902     }
4903     if (fp_access_check(s)) {
4904         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4905                              vec_reg_offset(s, a->rn, idx, esz),
4906                              a->q ? 16 : 8, vec_full_reg_size(s));
4907     }
4908     return true;
4909 }
4910 
4911 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4912 {
4913     MemOp esz;
4914     unsigned idx;
4915 
4916     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4917         return false;
4918     }
4919     if (esz == MO_64 && !a->q) {
4920         return false;
4921     }
4922     if (fp_access_check(s)) {
4923         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4924                              a->q ? 16 : 8, vec_full_reg_size(s),
4925                              cpu_reg(s, a->rn));
4926     }
4927     return true;
4928 }
4929 
4930 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4931 {
4932     MemOp esz;
4933     unsigned idx;
4934 
4935     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4936         return false;
4937     }
4938     if (is_signed) {
4939         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4940             return false;
4941         }
4942     } else {
4943         if (esz == MO_64 ? !a->q : a->q) {
4944             return false;
4945         }
4946     }
4947     if (fp_access_check(s)) {
4948         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4949         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4950         if (is_signed && !a->q) {
4951             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4952         }
4953     }
4954     return true;
4955 }
4956 
4957 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4958 TRANS(UMOV, do_smov_umov, a, 0)
4959 
4960 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4961 {
4962     MemOp esz;
4963     unsigned idx;
4964 
4965     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4966         return false;
4967     }
4968     if (fp_access_check(s)) {
4969         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
4970         clear_vec_high(s, true, a->rd);
4971     }
4972     return true;
4973 }
4974 
4975 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
4976 {
4977     MemOp esz;
4978     unsigned didx, sidx;
4979 
4980     if (!decode_esz_idx(a->di, &esz, &didx)) {
4981         return false;
4982     }
4983     sidx = a->si >> esz;
4984     if (fp_access_check(s)) {
4985         TCGv_i64 tmp = tcg_temp_new_i64();
4986 
4987         read_vec_element(s, tmp, a->rn, sidx, esz);
4988         write_vec_element(s, tmp, a->rd, didx, esz);
4989 
4990         /* INS is considered a 128-bit write for SVE. */
4991         clear_vec_high(s, true, a->rd);
4992     }
4993     return true;
4994 }
4995 
4996 /*
4997  * Advanced SIMD three same
4998  */
4999 
5000 typedef struct FPScalar {
5001     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5002     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5003     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5004 } FPScalar;
5005 
5006 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
5007 {
5008     switch (a->esz) {
5009     case MO_64:
5010         if (fp_access_check(s)) {
5011             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5012             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5013             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5014             write_fp_dreg(s, a->rd, t0);
5015         }
5016         break;
5017     case MO_32:
5018         if (fp_access_check(s)) {
5019             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5020             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5021             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5022             write_fp_sreg(s, a->rd, t0);
5023         }
5024         break;
5025     case MO_16:
5026         if (!dc_isar_feature(aa64_fp16, s)) {
5027             return false;
5028         }
5029         if (fp_access_check(s)) {
5030             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5031             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5032             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
5033             write_fp_sreg(s, a->rd, t0);
5034         }
5035         break;
5036     default:
5037         return false;
5038     }
5039     return true;
5040 }
5041 
5042 static const FPScalar f_scalar_fadd = {
5043     gen_helper_vfp_addh,
5044     gen_helper_vfp_adds,
5045     gen_helper_vfp_addd,
5046 };
5047 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
5048 
5049 static const FPScalar f_scalar_fsub = {
5050     gen_helper_vfp_subh,
5051     gen_helper_vfp_subs,
5052     gen_helper_vfp_subd,
5053 };
5054 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
5055 
5056 static const FPScalar f_scalar_fdiv = {
5057     gen_helper_vfp_divh,
5058     gen_helper_vfp_divs,
5059     gen_helper_vfp_divd,
5060 };
5061 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
5062 
5063 static const FPScalar f_scalar_fmul = {
5064     gen_helper_vfp_mulh,
5065     gen_helper_vfp_muls,
5066     gen_helper_vfp_muld,
5067 };
5068 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
5069 
5070 static const FPScalar f_scalar_fmax = {
5071     gen_helper_advsimd_maxh,
5072     gen_helper_vfp_maxs,
5073     gen_helper_vfp_maxd,
5074 };
5075 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
5076 
5077 static const FPScalar f_scalar_fmin = {
5078     gen_helper_advsimd_minh,
5079     gen_helper_vfp_mins,
5080     gen_helper_vfp_mind,
5081 };
5082 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
5083 
5084 static const FPScalar f_scalar_fmaxnm = {
5085     gen_helper_advsimd_maxnumh,
5086     gen_helper_vfp_maxnums,
5087     gen_helper_vfp_maxnumd,
5088 };
5089 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
5090 
5091 static const FPScalar f_scalar_fminnm = {
5092     gen_helper_advsimd_minnumh,
5093     gen_helper_vfp_minnums,
5094     gen_helper_vfp_minnumd,
5095 };
5096 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
5097 
5098 static const FPScalar f_scalar_fmulx = {
5099     gen_helper_advsimd_mulxh,
5100     gen_helper_vfp_mulxs,
5101     gen_helper_vfp_mulxd,
5102 };
5103 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5104 
5105 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5106 {
5107     gen_helper_vfp_mulh(d, n, m, s);
5108     gen_vfp_negh(d, d);
5109 }
5110 
5111 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5112 {
5113     gen_helper_vfp_muls(d, n, m, s);
5114     gen_vfp_negs(d, d);
5115 }
5116 
5117 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5118 {
5119     gen_helper_vfp_muld(d, n, m, s);
5120     gen_vfp_negd(d, d);
5121 }
5122 
5123 static const FPScalar f_scalar_fnmul = {
5124     gen_fnmul_h,
5125     gen_fnmul_s,
5126     gen_fnmul_d,
5127 };
5128 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5129 
5130 static const FPScalar f_scalar_fcmeq = {
5131     gen_helper_advsimd_ceq_f16,
5132     gen_helper_neon_ceq_f32,
5133     gen_helper_neon_ceq_f64,
5134 };
5135 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5136 
5137 static const FPScalar f_scalar_fcmge = {
5138     gen_helper_advsimd_cge_f16,
5139     gen_helper_neon_cge_f32,
5140     gen_helper_neon_cge_f64,
5141 };
5142 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5143 
5144 static const FPScalar f_scalar_fcmgt = {
5145     gen_helper_advsimd_cgt_f16,
5146     gen_helper_neon_cgt_f32,
5147     gen_helper_neon_cgt_f64,
5148 };
5149 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5150 
5151 static const FPScalar f_scalar_facge = {
5152     gen_helper_advsimd_acge_f16,
5153     gen_helper_neon_acge_f32,
5154     gen_helper_neon_acge_f64,
5155 };
5156 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5157 
5158 static const FPScalar f_scalar_facgt = {
5159     gen_helper_advsimd_acgt_f16,
5160     gen_helper_neon_acgt_f32,
5161     gen_helper_neon_acgt_f64,
5162 };
5163 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5164 
5165 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5166 {
5167     gen_helper_vfp_subh(d, n, m, s);
5168     gen_vfp_absh(d, d);
5169 }
5170 
5171 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5172 {
5173     gen_helper_vfp_subs(d, n, m, s);
5174     gen_vfp_abss(d, d);
5175 }
5176 
5177 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5178 {
5179     gen_helper_vfp_subd(d, n, m, s);
5180     gen_vfp_absd(d, d);
5181 }
5182 
5183 static const FPScalar f_scalar_fabd = {
5184     gen_fabd_h,
5185     gen_fabd_s,
5186     gen_fabd_d,
5187 };
5188 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5189 
5190 static const FPScalar f_scalar_frecps = {
5191     gen_helper_recpsf_f16,
5192     gen_helper_recpsf_f32,
5193     gen_helper_recpsf_f64,
5194 };
5195 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5196 
5197 static const FPScalar f_scalar_frsqrts = {
5198     gen_helper_rsqrtsf_f16,
5199     gen_helper_rsqrtsf_f32,
5200     gen_helper_rsqrtsf_f64,
5201 };
5202 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5203 
5204 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5205                 MemOp sgn_n, MemOp sgn_m,
5206                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5207                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5208 {
5209     TCGv_i64 t0, t1, t2, qc;
5210     MemOp esz = a->esz;
5211 
5212     if (!fp_access_check(s)) {
5213         return true;
5214     }
5215 
5216     t0 = tcg_temp_new_i64();
5217     t1 = tcg_temp_new_i64();
5218     t2 = tcg_temp_new_i64();
5219     qc = tcg_temp_new_i64();
5220     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5221     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5222     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5223 
5224     if (esz == MO_64) {
5225         gen_d(t0, qc, t1, t2);
5226     } else {
5227         gen_bhs(t0, qc, t1, t2, esz);
5228         tcg_gen_ext_i64(t0, t0, esz);
5229     }
5230 
5231     write_fp_dreg(s, a->rd, t0);
5232     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5233     return true;
5234 }
5235 
5236 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5237 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5238 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5239 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5240 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5241 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5242 
5243 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5244                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5245 {
5246     if (fp_access_check(s)) {
5247         TCGv_i64 t0 = tcg_temp_new_i64();
5248         TCGv_i64 t1 = tcg_temp_new_i64();
5249 
5250         read_vec_element(s, t0, a->rn, 0, MO_64);
5251         read_vec_element(s, t1, a->rm, 0, MO_64);
5252         fn(t0, t0, t1);
5253         write_fp_dreg(s, a->rd, t0);
5254     }
5255     return true;
5256 }
5257 
5258 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5259 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5260 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5261 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5262 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5263 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5264 
5265 typedef struct ENVScalar2 {
5266     NeonGenTwoOpEnvFn *gen_bhs[3];
5267     NeonGenTwo64OpEnvFn *gen_d;
5268 } ENVScalar2;
5269 
5270 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5271 {
5272     if (!fp_access_check(s)) {
5273         return true;
5274     }
5275     if (a->esz == MO_64) {
5276         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5277         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5278         f->gen_d(t0, tcg_env, t0, t1);
5279         write_fp_dreg(s, a->rd, t0);
5280     } else {
5281         TCGv_i32 t0 = tcg_temp_new_i32();
5282         TCGv_i32 t1 = tcg_temp_new_i32();
5283 
5284         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5285         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5286         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5287         write_fp_sreg(s, a->rd, t0);
5288     }
5289     return true;
5290 }
5291 
5292 static const ENVScalar2 f_scalar_sqshl = {
5293     { gen_helper_neon_qshl_s8,
5294       gen_helper_neon_qshl_s16,
5295       gen_helper_neon_qshl_s32 },
5296     gen_helper_neon_qshl_s64,
5297 };
5298 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5299 
5300 static const ENVScalar2 f_scalar_uqshl = {
5301     { gen_helper_neon_qshl_u8,
5302       gen_helper_neon_qshl_u16,
5303       gen_helper_neon_qshl_u32 },
5304     gen_helper_neon_qshl_u64,
5305 };
5306 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5307 
5308 static const ENVScalar2 f_scalar_sqrshl = {
5309     { gen_helper_neon_qrshl_s8,
5310       gen_helper_neon_qrshl_s16,
5311       gen_helper_neon_qrshl_s32 },
5312     gen_helper_neon_qrshl_s64,
5313 };
5314 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5315 
5316 static const ENVScalar2 f_scalar_uqrshl = {
5317     { gen_helper_neon_qrshl_u8,
5318       gen_helper_neon_qrshl_u16,
5319       gen_helper_neon_qrshl_u32 },
5320     gen_helper_neon_qrshl_u64,
5321 };
5322 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5323 
5324 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5325                               const ENVScalar2 *f)
5326 {
5327     if (a->esz == MO_16 || a->esz == MO_32) {
5328         return do_env_scalar2(s, a, f);
5329     }
5330     return false;
5331 }
5332 
5333 static const ENVScalar2 f_scalar_sqdmulh = {
5334     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5335 };
5336 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5337 
5338 static const ENVScalar2 f_scalar_sqrdmulh = {
5339     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5340 };
5341 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5342 
5343 typedef struct ENVScalar3 {
5344     NeonGenThreeOpEnvFn *gen_hs[2];
5345 } ENVScalar3;
5346 
5347 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5348                               const ENVScalar3 *f)
5349 {
5350     TCGv_i32 t0, t1, t2;
5351 
5352     if (a->esz != MO_16 && a->esz != MO_32) {
5353         return false;
5354     }
5355     if (!fp_access_check(s)) {
5356         return true;
5357     }
5358 
5359     t0 = tcg_temp_new_i32();
5360     t1 = tcg_temp_new_i32();
5361     t2 = tcg_temp_new_i32();
5362     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5363     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5364     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5365     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5366     write_fp_sreg(s, a->rd, t0);
5367     return true;
5368 }
5369 
5370 static const ENVScalar3 f_scalar_sqrdmlah = {
5371     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5372 };
5373 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5374 
5375 static const ENVScalar3 f_scalar_sqrdmlsh = {
5376     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5377 };
5378 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5379 
5380 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5381 {
5382     if (fp_access_check(s)) {
5383         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5384         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5385         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5386         write_fp_dreg(s, a->rd, t0);
5387     }
5388     return true;
5389 }
5390 
5391 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5392 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5393 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5394 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5395 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5396 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5397 
5398 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5399                           gen_helper_gvec_3_ptr * const fns[3])
5400 {
5401     MemOp esz = a->esz;
5402 
5403     switch (esz) {
5404     case MO_64:
5405         if (!a->q) {
5406             return false;
5407         }
5408         break;
5409     case MO_32:
5410         break;
5411     case MO_16:
5412         if (!dc_isar_feature(aa64_fp16, s)) {
5413             return false;
5414         }
5415         break;
5416     default:
5417         return false;
5418     }
5419     if (fp_access_check(s)) {
5420         gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5421                           esz == MO_16, data, fns[esz - 1]);
5422     }
5423     return true;
5424 }
5425 
5426 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5427     gen_helper_gvec_fadd_h,
5428     gen_helper_gvec_fadd_s,
5429     gen_helper_gvec_fadd_d,
5430 };
5431 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5432 
5433 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5434     gen_helper_gvec_fsub_h,
5435     gen_helper_gvec_fsub_s,
5436     gen_helper_gvec_fsub_d,
5437 };
5438 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5439 
5440 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5441     gen_helper_gvec_fdiv_h,
5442     gen_helper_gvec_fdiv_s,
5443     gen_helper_gvec_fdiv_d,
5444 };
5445 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5446 
5447 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5448     gen_helper_gvec_fmul_h,
5449     gen_helper_gvec_fmul_s,
5450     gen_helper_gvec_fmul_d,
5451 };
5452 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5453 
5454 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5455     gen_helper_gvec_fmax_h,
5456     gen_helper_gvec_fmax_s,
5457     gen_helper_gvec_fmax_d,
5458 };
5459 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5460 
5461 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5462     gen_helper_gvec_fmin_h,
5463     gen_helper_gvec_fmin_s,
5464     gen_helper_gvec_fmin_d,
5465 };
5466 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5467 
5468 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5469     gen_helper_gvec_fmaxnum_h,
5470     gen_helper_gvec_fmaxnum_s,
5471     gen_helper_gvec_fmaxnum_d,
5472 };
5473 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5474 
5475 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5476     gen_helper_gvec_fminnum_h,
5477     gen_helper_gvec_fminnum_s,
5478     gen_helper_gvec_fminnum_d,
5479 };
5480 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5481 
5482 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5483     gen_helper_gvec_fmulx_h,
5484     gen_helper_gvec_fmulx_s,
5485     gen_helper_gvec_fmulx_d,
5486 };
5487 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5488 
5489 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5490     gen_helper_gvec_vfma_h,
5491     gen_helper_gvec_vfma_s,
5492     gen_helper_gvec_vfma_d,
5493 };
5494 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5495 
5496 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5497     gen_helper_gvec_vfms_h,
5498     gen_helper_gvec_vfms_s,
5499     gen_helper_gvec_vfms_d,
5500 };
5501 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5502 
5503 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5504     gen_helper_gvec_fceq_h,
5505     gen_helper_gvec_fceq_s,
5506     gen_helper_gvec_fceq_d,
5507 };
5508 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5509 
5510 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5511     gen_helper_gvec_fcge_h,
5512     gen_helper_gvec_fcge_s,
5513     gen_helper_gvec_fcge_d,
5514 };
5515 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5516 
5517 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5518     gen_helper_gvec_fcgt_h,
5519     gen_helper_gvec_fcgt_s,
5520     gen_helper_gvec_fcgt_d,
5521 };
5522 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5523 
5524 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5525     gen_helper_gvec_facge_h,
5526     gen_helper_gvec_facge_s,
5527     gen_helper_gvec_facge_d,
5528 };
5529 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5530 
5531 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5532     gen_helper_gvec_facgt_h,
5533     gen_helper_gvec_facgt_s,
5534     gen_helper_gvec_facgt_d,
5535 };
5536 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5537 
5538 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5539     gen_helper_gvec_fabd_h,
5540     gen_helper_gvec_fabd_s,
5541     gen_helper_gvec_fabd_d,
5542 };
5543 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5544 
5545 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5546     gen_helper_gvec_recps_h,
5547     gen_helper_gvec_recps_s,
5548     gen_helper_gvec_recps_d,
5549 };
5550 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5551 
5552 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5553     gen_helper_gvec_rsqrts_h,
5554     gen_helper_gvec_rsqrts_s,
5555     gen_helper_gvec_rsqrts_d,
5556 };
5557 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5558 
5559 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5560     gen_helper_gvec_faddp_h,
5561     gen_helper_gvec_faddp_s,
5562     gen_helper_gvec_faddp_d,
5563 };
5564 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5565 
5566 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5567     gen_helper_gvec_fmaxp_h,
5568     gen_helper_gvec_fmaxp_s,
5569     gen_helper_gvec_fmaxp_d,
5570 };
5571 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5572 
5573 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5574     gen_helper_gvec_fminp_h,
5575     gen_helper_gvec_fminp_s,
5576     gen_helper_gvec_fminp_d,
5577 };
5578 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5579 
5580 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5581     gen_helper_gvec_fmaxnump_h,
5582     gen_helper_gvec_fmaxnump_s,
5583     gen_helper_gvec_fmaxnump_d,
5584 };
5585 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5586 
5587 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5588     gen_helper_gvec_fminnump_h,
5589     gen_helper_gvec_fminnump_s,
5590     gen_helper_gvec_fminnump_d,
5591 };
5592 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5593 
5594 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5595 {
5596     if (fp_access_check(s)) {
5597         int data = (is_2 << 1) | is_s;
5598         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5599                            vec_full_reg_offset(s, a->rn),
5600                            vec_full_reg_offset(s, a->rm), tcg_env,
5601                            a->q ? 16 : 8, vec_full_reg_size(s),
5602                            data, gen_helper_gvec_fmlal_a64);
5603     }
5604     return true;
5605 }
5606 
5607 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5608 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5609 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5610 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5611 
5612 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5613 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5614 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5615 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5616 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5617 
5618 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5619 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5620 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5621 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5622 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5623 
5624 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5625 {
5626     if (fp_access_check(s)) {
5627         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5628     }
5629     return true;
5630 }
5631 
5632 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5633 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5634 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5635 
5636 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5637 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5638 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5639 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5640 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5641 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5642 
5643 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5644 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5645 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5646 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5647 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5648 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5649 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5650 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5651 
5652 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5653 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5654 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5655 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5656 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5657 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5658 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5659 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5660 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5661 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5662 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5663 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5664 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5665 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5666 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5667 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5668 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5669 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5670 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5671 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5672 
5673 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5674 {
5675     if (a->esz == MO_64 && !a->q) {
5676         return false;
5677     }
5678     if (fp_access_check(s)) {
5679         tcg_gen_gvec_cmp(cond, a->esz,
5680                          vec_full_reg_offset(s, a->rd),
5681                          vec_full_reg_offset(s, a->rn),
5682                          vec_full_reg_offset(s, a->rm),
5683                          a->q ? 16 : 8, vec_full_reg_size(s));
5684     }
5685     return true;
5686 }
5687 
5688 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5689 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5690 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5691 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5692 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5693 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5694 
5695 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5696 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5697 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5698 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5699 
5700 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5701                           gen_helper_gvec_4 *fn)
5702 {
5703     if (fp_access_check(s)) {
5704         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5705     }
5706     return true;
5707 }
5708 
5709 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
5710                               gen_helper_gvec_4_ptr *fn)
5711 {
5712     if (fp_access_check(s)) {
5713         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5714     }
5715     return true;
5716 }
5717 
5718 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5719 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5720 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5721 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
5722 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
5723 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5724 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5725 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5726 
5727 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5728 {
5729     if (!dc_isar_feature(aa64_bf16, s)) {
5730         return false;
5731     }
5732     if (fp_access_check(s)) {
5733         /* Q bit selects BFMLALB vs BFMLALT. */
5734         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5735                           gen_helper_gvec_bfmlal);
5736     }
5737     return true;
5738 }
5739 
5740 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5741     gen_helper_gvec_fcaddh,
5742     gen_helper_gvec_fcadds,
5743     gen_helper_gvec_fcaddd,
5744 };
5745 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5746 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5747 
5748 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5749 {
5750     gen_helper_gvec_4_ptr *fn;
5751 
5752     if (!dc_isar_feature(aa64_fcma, s)) {
5753         return false;
5754     }
5755     switch (a->esz) {
5756     case MO_64:
5757         if (!a->q) {
5758             return false;
5759         }
5760         fn = gen_helper_gvec_fcmlad;
5761         break;
5762     case MO_32:
5763         fn = gen_helper_gvec_fcmlas;
5764         break;
5765     case MO_16:
5766         if (!dc_isar_feature(aa64_fp16, s)) {
5767             return false;
5768         }
5769         fn = gen_helper_gvec_fcmlah;
5770         break;
5771     default:
5772         return false;
5773     }
5774     if (fp_access_check(s)) {
5775         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5776                           a->esz == MO_16, a->rot, fn);
5777     }
5778     return true;
5779 }
5780 
5781 /*
5782  * Widening vector x vector/indexed.
5783  *
5784  * These read from the top or bottom half of a 128-bit vector.
5785  * After widening, optionally accumulate with a 128-bit vector.
5786  * Implement these inline, as the number of elements are limited
5787  * and the related SVE and SME operations on larger vectors use
5788  * even/odd elements instead of top/bottom half.
5789  *
5790  * If idx >= 0, operand 2 is indexed, otherwise vector.
5791  * If acc, operand 0 is loaded with rd.
5792  */
5793 
5794 /* For low half, iterating up. */
5795 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5796                             int rd, int rn, int rm, int idx,
5797                             NeonGenTwo64OpFn *fn, bool acc)
5798 {
5799     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5800     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5801     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5802     MemOp esz = memop & MO_SIZE;
5803     int half = 8 >> esz;
5804     int top_swap, top_half;
5805 
5806     /* There are no 64x64->128 bit operations. */
5807     if (esz >= MO_64) {
5808         return false;
5809     }
5810     if (!fp_access_check(s)) {
5811         return true;
5812     }
5813 
5814     if (idx >= 0) {
5815         read_vec_element(s, tcg_op2, rm, idx, memop);
5816     }
5817 
5818     /*
5819      * For top half inputs, iterate forward; backward for bottom half.
5820      * This means the store to the destination will not occur until
5821      * overlapping input inputs are consumed.
5822      * Use top_swap to conditionally invert the forward iteration index.
5823      */
5824     top_swap = top ? 0 : half - 1;
5825     top_half = top ? half : 0;
5826 
5827     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5828         int elt = elt_fwd ^ top_swap;
5829 
5830         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5831         if (idx < 0) {
5832             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5833         }
5834         if (acc) {
5835             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5836         }
5837         fn(tcg_op0, tcg_op1, tcg_op2);
5838         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5839     }
5840     clear_vec_high(s, 1, rd);
5841     return true;
5842 }
5843 
5844 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5845 {
5846     TCGv_i64 t = tcg_temp_new_i64();
5847     tcg_gen_mul_i64(t, n, m);
5848     tcg_gen_add_i64(d, d, t);
5849 }
5850 
5851 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5852 {
5853     TCGv_i64 t = tcg_temp_new_i64();
5854     tcg_gen_mul_i64(t, n, m);
5855     tcg_gen_sub_i64(d, d, t);
5856 }
5857 
5858 TRANS(SMULL_v, do_3op_widening,
5859       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5860       tcg_gen_mul_i64, false)
5861 TRANS(UMULL_v, do_3op_widening,
5862       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5863       tcg_gen_mul_i64, false)
5864 TRANS(SMLAL_v, do_3op_widening,
5865       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5866       gen_muladd_i64, true)
5867 TRANS(UMLAL_v, do_3op_widening,
5868       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5869       gen_muladd_i64, true)
5870 TRANS(SMLSL_v, do_3op_widening,
5871       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5872       gen_mulsub_i64, true)
5873 TRANS(UMLSL_v, do_3op_widening,
5874       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5875       gen_mulsub_i64, true)
5876 
5877 TRANS(SMULL_vi, do_3op_widening,
5878       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5879       tcg_gen_mul_i64, false)
5880 TRANS(UMULL_vi, do_3op_widening,
5881       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5882       tcg_gen_mul_i64, false)
5883 TRANS(SMLAL_vi, do_3op_widening,
5884       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5885       gen_muladd_i64, true)
5886 TRANS(UMLAL_vi, do_3op_widening,
5887       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5888       gen_muladd_i64, true)
5889 TRANS(SMLSL_vi, do_3op_widening,
5890       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5891       gen_mulsub_i64, true)
5892 TRANS(UMLSL_vi, do_3op_widening,
5893       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5894       gen_mulsub_i64, true)
5895 
5896 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5897 {
5898     TCGv_i64 t1 = tcg_temp_new_i64();
5899     TCGv_i64 t2 = tcg_temp_new_i64();
5900 
5901     tcg_gen_sub_i64(t1, n, m);
5902     tcg_gen_sub_i64(t2, m, n);
5903     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5904 }
5905 
5906 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5907 {
5908     TCGv_i64 t1 = tcg_temp_new_i64();
5909     TCGv_i64 t2 = tcg_temp_new_i64();
5910 
5911     tcg_gen_sub_i64(t1, n, m);
5912     tcg_gen_sub_i64(t2, m, n);
5913     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5914 }
5915 
5916 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5917 {
5918     TCGv_i64 t = tcg_temp_new_i64();
5919     gen_sabd_i64(t, n, m);
5920     tcg_gen_add_i64(d, d, t);
5921 }
5922 
5923 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5924 {
5925     TCGv_i64 t = tcg_temp_new_i64();
5926     gen_uabd_i64(t, n, m);
5927     tcg_gen_add_i64(d, d, t);
5928 }
5929 
5930 TRANS(SADDL_v, do_3op_widening,
5931       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5932       tcg_gen_add_i64, false)
5933 TRANS(UADDL_v, do_3op_widening,
5934       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5935       tcg_gen_add_i64, false)
5936 TRANS(SSUBL_v, do_3op_widening,
5937       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5938       tcg_gen_sub_i64, false)
5939 TRANS(USUBL_v, do_3op_widening,
5940       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5941       tcg_gen_sub_i64, false)
5942 TRANS(SABDL_v, do_3op_widening,
5943       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5944       gen_sabd_i64, false)
5945 TRANS(UABDL_v, do_3op_widening,
5946       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5947       gen_uabd_i64, false)
5948 TRANS(SABAL_v, do_3op_widening,
5949       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5950       gen_saba_i64, true)
5951 TRANS(UABAL_v, do_3op_widening,
5952       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5953       gen_uaba_i64, true)
5954 
5955 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5956 {
5957     tcg_gen_mul_i64(d, n, m);
5958     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
5959 }
5960 
5961 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5962 {
5963     tcg_gen_mul_i64(d, n, m);
5964     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
5965 }
5966 
5967 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5968 {
5969     TCGv_i64 t = tcg_temp_new_i64();
5970 
5971     tcg_gen_mul_i64(t, n, m);
5972     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5973     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5974 }
5975 
5976 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5977 {
5978     TCGv_i64 t = tcg_temp_new_i64();
5979 
5980     tcg_gen_mul_i64(t, n, m);
5981     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
5982     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
5983 }
5984 
5985 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5986 {
5987     TCGv_i64 t = tcg_temp_new_i64();
5988 
5989     tcg_gen_mul_i64(t, n, m);
5990     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5991     tcg_gen_neg_i64(t, t);
5992     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5993 }
5994 
5995 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5996 {
5997     TCGv_i64 t = tcg_temp_new_i64();
5998 
5999     tcg_gen_mul_i64(t, n, m);
6000     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6001     tcg_gen_neg_i64(t, t);
6002     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6003 }
6004 
6005 TRANS(SQDMULL_v, do_3op_widening,
6006       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6007       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6008 TRANS(SQDMLAL_v, do_3op_widening,
6009       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6010       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6011 TRANS(SQDMLSL_v, do_3op_widening,
6012       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6013       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6014 
6015 TRANS(SQDMULL_vi, do_3op_widening,
6016       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6017       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6018 TRANS(SQDMLAL_vi, do_3op_widening,
6019       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6020       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6021 TRANS(SQDMLSL_vi, do_3op_widening,
6022       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6023       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6024 
6025 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6026                            MemOp sign, bool sub)
6027 {
6028     TCGv_i64 tcg_op0, tcg_op1;
6029     MemOp esz = a->esz;
6030     int half = 8 >> esz;
6031     bool top = a->q;
6032     int top_swap = top ? 0 : half - 1;
6033     int top_half = top ? half : 0;
6034 
6035     /* There are no 64x64->128 bit operations. */
6036     if (esz >= MO_64) {
6037         return false;
6038     }
6039     if (!fp_access_check(s)) {
6040         return true;
6041     }
6042     tcg_op0 = tcg_temp_new_i64();
6043     tcg_op1 = tcg_temp_new_i64();
6044 
6045     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6046         int elt = elt_fwd ^ top_swap;
6047 
6048         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6049         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6050         if (sub) {
6051             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6052         } else {
6053             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6054         }
6055         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6056     }
6057     clear_vec_high(s, 1, a->rd);
6058     return true;
6059 }
6060 
6061 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6062 TRANS(UADDW, do_addsub_wide, a, 0, false)
6063 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6064 TRANS(USUBW, do_addsub_wide, a, 0, true)
6065 
6066 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6067                                  bool sub, bool round)
6068 {
6069     TCGv_i64 tcg_op0, tcg_op1;
6070     MemOp esz = a->esz;
6071     int half = 8 >> esz;
6072     bool top = a->q;
6073     int ebits = 8 << esz;
6074     uint64_t rbit = 1ull << (ebits - 1);
6075     int top_swap, top_half;
6076 
6077     /* There are no 128x128->64 bit operations. */
6078     if (esz >= MO_64) {
6079         return false;
6080     }
6081     if (!fp_access_check(s)) {
6082         return true;
6083     }
6084     tcg_op0 = tcg_temp_new_i64();
6085     tcg_op1 = tcg_temp_new_i64();
6086 
6087     /*
6088      * For top half inputs, iterate backward; forward for bottom half.
6089      * This means the store to the destination will not occur until
6090      * overlapping input inputs are consumed.
6091      */
6092     top_swap = top ? half - 1 : 0;
6093     top_half = top ? half : 0;
6094 
6095     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6096         int elt = elt_fwd ^ top_swap;
6097 
6098         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6099         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6100         if (sub) {
6101             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6102         } else {
6103             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6104         }
6105         if (round) {
6106             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6107         }
6108         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6109         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6110     }
6111     clear_vec_high(s, top, a->rd);
6112     return true;
6113 }
6114 
6115 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6116 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6117 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6118 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6119 
6120 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6121 {
6122     if (fp_access_check(s)) {
6123         /* The Q field specifies lo/hi half input for these insns.  */
6124         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6125     }
6126     return true;
6127 }
6128 
6129 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6130 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6131 
6132 /*
6133  * Advanced SIMD scalar/vector x indexed element
6134  */
6135 
6136 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6137 {
6138     switch (a->esz) {
6139     case MO_64:
6140         if (fp_access_check(s)) {
6141             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6142             TCGv_i64 t1 = tcg_temp_new_i64();
6143 
6144             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6145             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6146             write_fp_dreg(s, a->rd, t0);
6147         }
6148         break;
6149     case MO_32:
6150         if (fp_access_check(s)) {
6151             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6152             TCGv_i32 t1 = tcg_temp_new_i32();
6153 
6154             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6155             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6156             write_fp_sreg(s, a->rd, t0);
6157         }
6158         break;
6159     case MO_16:
6160         if (!dc_isar_feature(aa64_fp16, s)) {
6161             return false;
6162         }
6163         if (fp_access_check(s)) {
6164             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6165             TCGv_i32 t1 = tcg_temp_new_i32();
6166 
6167             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6168             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6169             write_fp_sreg(s, a->rd, t0);
6170         }
6171         break;
6172     default:
6173         g_assert_not_reached();
6174     }
6175     return true;
6176 }
6177 
6178 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6179 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6180 
6181 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6182 {
6183     switch (a->esz) {
6184     case MO_64:
6185         if (fp_access_check(s)) {
6186             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6187             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6188             TCGv_i64 t2 = tcg_temp_new_i64();
6189 
6190             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6191             if (neg) {
6192                 gen_vfp_negd(t1, t1);
6193             }
6194             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6195             write_fp_dreg(s, a->rd, t0);
6196         }
6197         break;
6198     case MO_32:
6199         if (fp_access_check(s)) {
6200             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6201             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6202             TCGv_i32 t2 = tcg_temp_new_i32();
6203 
6204             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6205             if (neg) {
6206                 gen_vfp_negs(t1, t1);
6207             }
6208             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6209             write_fp_sreg(s, a->rd, t0);
6210         }
6211         break;
6212     case MO_16:
6213         if (!dc_isar_feature(aa64_fp16, s)) {
6214             return false;
6215         }
6216         if (fp_access_check(s)) {
6217             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6218             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6219             TCGv_i32 t2 = tcg_temp_new_i32();
6220 
6221             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6222             if (neg) {
6223                 gen_vfp_negh(t1, t1);
6224             }
6225             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6226                                        fpstatus_ptr(FPST_FPCR_F16));
6227             write_fp_sreg(s, a->rd, t0);
6228         }
6229         break;
6230     default:
6231         g_assert_not_reached();
6232     }
6233     return true;
6234 }
6235 
6236 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6237 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6238 
6239 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6240                                   const ENVScalar2 *f)
6241 {
6242     if (a->esz < MO_16 || a->esz > MO_32) {
6243         return false;
6244     }
6245     if (fp_access_check(s)) {
6246         TCGv_i32 t0 = tcg_temp_new_i32();
6247         TCGv_i32 t1 = tcg_temp_new_i32();
6248 
6249         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6250         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6251         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6252         write_fp_sreg(s, a->rd, t0);
6253     }
6254     return true;
6255 }
6256 
6257 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6258 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6259 
6260 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6261                                   const ENVScalar3 *f)
6262 {
6263     if (a->esz < MO_16 || a->esz > MO_32) {
6264         return false;
6265     }
6266     if (fp_access_check(s)) {
6267         TCGv_i32 t0 = tcg_temp_new_i32();
6268         TCGv_i32 t1 = tcg_temp_new_i32();
6269         TCGv_i32 t2 = tcg_temp_new_i32();
6270 
6271         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6272         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6273         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6274         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6275         write_fp_sreg(s, a->rd, t0);
6276     }
6277     return true;
6278 }
6279 
6280 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6281 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6282 
6283 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6284                                           NeonGenTwo64OpFn *fn, bool acc)
6285 {
6286     if (fp_access_check(s)) {
6287         TCGv_i64 t0 = tcg_temp_new_i64();
6288         TCGv_i64 t1 = tcg_temp_new_i64();
6289         TCGv_i64 t2 = tcg_temp_new_i64();
6290         unsigned vsz, dofs;
6291 
6292         if (acc) {
6293             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6294         }
6295         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6296         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6297         fn(t0, t1, t2);
6298 
6299         /* Clear the whole register first, then store scalar. */
6300         vsz = vec_full_reg_size(s);
6301         dofs = vec_full_reg_offset(s, a->rd);
6302         tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
6303         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6304     }
6305     return true;
6306 }
6307 
6308 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6309       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6310 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6311       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6312 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6313       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6314 
6315 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6316                               gen_helper_gvec_3_ptr * const fns[3])
6317 {
6318     MemOp esz = a->esz;
6319 
6320     switch (esz) {
6321     case MO_64:
6322         if (!a->q) {
6323             return false;
6324         }
6325         break;
6326     case MO_32:
6327         break;
6328     case MO_16:
6329         if (!dc_isar_feature(aa64_fp16, s)) {
6330             return false;
6331         }
6332         break;
6333     default:
6334         g_assert_not_reached();
6335     }
6336     if (fp_access_check(s)) {
6337         gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6338                           esz == MO_16, a->idx, fns[esz - 1]);
6339     }
6340     return true;
6341 }
6342 
6343 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6344     gen_helper_gvec_fmul_idx_h,
6345     gen_helper_gvec_fmul_idx_s,
6346     gen_helper_gvec_fmul_idx_d,
6347 };
6348 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6349 
6350 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6351     gen_helper_gvec_fmulx_idx_h,
6352     gen_helper_gvec_fmulx_idx_s,
6353     gen_helper_gvec_fmulx_idx_d,
6354 };
6355 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6356 
6357 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6358 {
6359     static gen_helper_gvec_4_ptr * const fns[3] = {
6360         gen_helper_gvec_fmla_idx_h,
6361         gen_helper_gvec_fmla_idx_s,
6362         gen_helper_gvec_fmla_idx_d,
6363     };
6364     MemOp esz = a->esz;
6365 
6366     switch (esz) {
6367     case MO_64:
6368         if (!a->q) {
6369             return false;
6370         }
6371         break;
6372     case MO_32:
6373         break;
6374     case MO_16:
6375         if (!dc_isar_feature(aa64_fp16, s)) {
6376             return false;
6377         }
6378         break;
6379     default:
6380         g_assert_not_reached();
6381     }
6382     if (fp_access_check(s)) {
6383         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6384                           esz == MO_16, (a->idx << 1) | neg,
6385                           fns[esz - 1]);
6386     }
6387     return true;
6388 }
6389 
6390 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6391 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6392 
6393 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6394 {
6395     if (fp_access_check(s)) {
6396         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6397         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6398                            vec_full_reg_offset(s, a->rn),
6399                            vec_full_reg_offset(s, a->rm), tcg_env,
6400                            a->q ? 16 : 8, vec_full_reg_size(s),
6401                            data, gen_helper_gvec_fmlal_idx_a64);
6402     }
6403     return true;
6404 }
6405 
6406 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6407 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6408 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6409 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6410 
6411 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6412                                gen_helper_gvec_3 * const fns[2])
6413 {
6414     assert(a->esz == MO_16 || a->esz == MO_32);
6415     if (fp_access_check(s)) {
6416         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6417     }
6418     return true;
6419 }
6420 
6421 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6422     gen_helper_gvec_mul_idx_h,
6423     gen_helper_gvec_mul_idx_s,
6424 };
6425 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6426 
6427 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6428 {
6429     static gen_helper_gvec_4 * const fns[2][2] = {
6430         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6431         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6432     };
6433 
6434     assert(a->esz == MO_16 || a->esz == MO_32);
6435     if (fp_access_check(s)) {
6436         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6437                          a->idx, fns[a->esz - 1][sub]);
6438     }
6439     return true;
6440 }
6441 
6442 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6443 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6444 
6445 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6446                                   gen_helper_gvec_4 * const fns[2])
6447 {
6448     assert(a->esz == MO_16 || a->esz == MO_32);
6449     if (fp_access_check(s)) {
6450         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6451                            vec_full_reg_offset(s, a->rn),
6452                            vec_full_reg_offset(s, a->rm),
6453                            offsetof(CPUARMState, vfp.qc),
6454                            a->q ? 16 : 8, vec_full_reg_size(s),
6455                            a->idx, fns[a->esz - 1]);
6456     }
6457     return true;
6458 }
6459 
6460 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6461     gen_helper_neon_sqdmulh_idx_h,
6462     gen_helper_neon_sqdmulh_idx_s,
6463 };
6464 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6465 
6466 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6467     gen_helper_neon_sqrdmulh_idx_h,
6468     gen_helper_neon_sqrdmulh_idx_s,
6469 };
6470 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6471 
6472 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6473     gen_helper_neon_sqrdmlah_idx_h,
6474     gen_helper_neon_sqrdmlah_idx_s,
6475 };
6476 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6477            f_vector_idx_sqrdmlah)
6478 
6479 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6480     gen_helper_neon_sqrdmlsh_idx_h,
6481     gen_helper_neon_sqrdmlsh_idx_s,
6482 };
6483 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6484            f_vector_idx_sqrdmlsh)
6485 
6486 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6487                               gen_helper_gvec_4 *fn)
6488 {
6489     if (fp_access_check(s)) {
6490         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6491     }
6492     return true;
6493 }
6494 
6495 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6496                                   gen_helper_gvec_4_ptr *fn)
6497 {
6498     if (fp_access_check(s)) {
6499         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6500     }
6501     return true;
6502 }
6503 
6504 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6505 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6506 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6507            gen_helper_gvec_sudot_idx_b)
6508 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6509            gen_helper_gvec_usdot_idx_b)
6510 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6511            gen_helper_gvec_bfdot_idx)
6512 
6513 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6514 {
6515     if (!dc_isar_feature(aa64_bf16, s)) {
6516         return false;
6517     }
6518     if (fp_access_check(s)) {
6519         /* Q bit selects BFMLALB vs BFMLALT. */
6520         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6521                           (a->idx << 1) | a->q,
6522                           gen_helper_gvec_bfmlal_idx);
6523     }
6524     return true;
6525 }
6526 
6527 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6528 {
6529     gen_helper_gvec_4_ptr *fn;
6530 
6531     if (!dc_isar_feature(aa64_fcma, s)) {
6532         return false;
6533     }
6534     switch (a->esz) {
6535     case MO_16:
6536         if (!dc_isar_feature(aa64_fp16, s)) {
6537             return false;
6538         }
6539         fn = gen_helper_gvec_fcmlah_idx;
6540         break;
6541     case MO_32:
6542         fn = gen_helper_gvec_fcmlas_idx;
6543         break;
6544     default:
6545         g_assert_not_reached();
6546     }
6547     if (fp_access_check(s)) {
6548         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6549                           a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6550     }
6551     return true;
6552 }
6553 
6554 /*
6555  * Advanced SIMD scalar pairwise
6556  */
6557 
6558 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6559 {
6560     switch (a->esz) {
6561     case MO_64:
6562         if (fp_access_check(s)) {
6563             TCGv_i64 t0 = tcg_temp_new_i64();
6564             TCGv_i64 t1 = tcg_temp_new_i64();
6565 
6566             read_vec_element(s, t0, a->rn, 0, MO_64);
6567             read_vec_element(s, t1, a->rn, 1, MO_64);
6568             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6569             write_fp_dreg(s, a->rd, t0);
6570         }
6571         break;
6572     case MO_32:
6573         if (fp_access_check(s)) {
6574             TCGv_i32 t0 = tcg_temp_new_i32();
6575             TCGv_i32 t1 = tcg_temp_new_i32();
6576 
6577             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6578             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6579             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6580             write_fp_sreg(s, a->rd, t0);
6581         }
6582         break;
6583     case MO_16:
6584         if (!dc_isar_feature(aa64_fp16, s)) {
6585             return false;
6586         }
6587         if (fp_access_check(s)) {
6588             TCGv_i32 t0 = tcg_temp_new_i32();
6589             TCGv_i32 t1 = tcg_temp_new_i32();
6590 
6591             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6592             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6593             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6594             write_fp_sreg(s, a->rd, t0);
6595         }
6596         break;
6597     default:
6598         g_assert_not_reached();
6599     }
6600     return true;
6601 }
6602 
6603 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6604 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6605 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6606 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6607 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6608 
6609 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6610 {
6611     if (fp_access_check(s)) {
6612         TCGv_i64 t0 = tcg_temp_new_i64();
6613         TCGv_i64 t1 = tcg_temp_new_i64();
6614 
6615         read_vec_element(s, t0, a->rn, 0, MO_64);
6616         read_vec_element(s, t1, a->rn, 1, MO_64);
6617         tcg_gen_add_i64(t0, t0, t1);
6618         write_fp_dreg(s, a->rd, t0);
6619     }
6620     return true;
6621 }
6622 
6623 /*
6624  * Floating-point conditional select
6625  */
6626 
6627 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6628 {
6629     TCGv_i64 t_true, t_false;
6630     DisasCompare64 c;
6631 
6632     switch (a->esz) {
6633     case MO_32:
6634     case MO_64:
6635         break;
6636     case MO_16:
6637         if (!dc_isar_feature(aa64_fp16, s)) {
6638             return false;
6639         }
6640         break;
6641     default:
6642         return false;
6643     }
6644 
6645     if (!fp_access_check(s)) {
6646         return true;
6647     }
6648 
6649     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6650     t_true = tcg_temp_new_i64();
6651     t_false = tcg_temp_new_i64();
6652     read_vec_element(s, t_true, a->rn, 0, a->esz);
6653     read_vec_element(s, t_false, a->rm, 0, a->esz);
6654 
6655     a64_test_cc(&c, a->cond);
6656     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6657                         t_true, t_false);
6658 
6659     /*
6660      * Note that sregs & hregs write back zeros to the high bits,
6661      * and we've already done the zero-extension.
6662      */
6663     write_fp_dreg(s, a->rd, t_true);
6664     return true;
6665 }
6666 
6667 /*
6668  * Advanced SIMD Extract
6669  */
6670 
6671 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
6672 {
6673     if (fp_access_check(s)) {
6674         TCGv_i64 lo = read_fp_dreg(s, a->rn);
6675         if (a->imm != 0) {
6676             TCGv_i64 hi = read_fp_dreg(s, a->rm);
6677             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
6678         }
6679         write_fp_dreg(s, a->rd, lo);
6680     }
6681     return true;
6682 }
6683 
6684 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
6685 {
6686     TCGv_i64 lo, hi;
6687     int pos = (a->imm & 7) * 8;
6688     int elt = a->imm >> 3;
6689 
6690     if (!fp_access_check(s)) {
6691         return true;
6692     }
6693 
6694     lo = tcg_temp_new_i64();
6695     hi = tcg_temp_new_i64();
6696 
6697     read_vec_element(s, lo, a->rn, elt, MO_64);
6698     elt++;
6699     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
6700     elt++;
6701 
6702     if (pos != 0) {
6703         TCGv_i64 hh = tcg_temp_new_i64();
6704         tcg_gen_extract2_i64(lo, lo, hi, pos);
6705         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
6706         tcg_gen_extract2_i64(hi, hi, hh, pos);
6707     }
6708 
6709     write_vec_element(s, lo, a->rd, 0, MO_64);
6710     write_vec_element(s, hi, a->rd, 1, MO_64);
6711     clear_vec_high(s, true, a->rd);
6712     return true;
6713 }
6714 
6715 /*
6716  * Floating-point data-processing (3 source)
6717  */
6718 
6719 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6720 {
6721     TCGv_ptr fpst;
6722 
6723     /*
6724      * These are fused multiply-add.  Note that doing the negations here
6725      * as separate steps is correct: an input NaN should come out with
6726      * its sign bit flipped if it is a negated-input.
6727      */
6728     switch (a->esz) {
6729     case MO_64:
6730         if (fp_access_check(s)) {
6731             TCGv_i64 tn = read_fp_dreg(s, a->rn);
6732             TCGv_i64 tm = read_fp_dreg(s, a->rm);
6733             TCGv_i64 ta = read_fp_dreg(s, a->ra);
6734 
6735             if (neg_a) {
6736                 gen_vfp_negd(ta, ta);
6737             }
6738             if (neg_n) {
6739                 gen_vfp_negd(tn, tn);
6740             }
6741             fpst = fpstatus_ptr(FPST_FPCR);
6742             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6743             write_fp_dreg(s, a->rd, ta);
6744         }
6745         break;
6746 
6747     case MO_32:
6748         if (fp_access_check(s)) {
6749             TCGv_i32 tn = read_fp_sreg(s, a->rn);
6750             TCGv_i32 tm = read_fp_sreg(s, a->rm);
6751             TCGv_i32 ta = read_fp_sreg(s, a->ra);
6752 
6753             if (neg_a) {
6754                 gen_vfp_negs(ta, ta);
6755             }
6756             if (neg_n) {
6757                 gen_vfp_negs(tn, tn);
6758             }
6759             fpst = fpstatus_ptr(FPST_FPCR);
6760             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6761             write_fp_sreg(s, a->rd, ta);
6762         }
6763         break;
6764 
6765     case MO_16:
6766         if (!dc_isar_feature(aa64_fp16, s)) {
6767             return false;
6768         }
6769         if (fp_access_check(s)) {
6770             TCGv_i32 tn = read_fp_hreg(s, a->rn);
6771             TCGv_i32 tm = read_fp_hreg(s, a->rm);
6772             TCGv_i32 ta = read_fp_hreg(s, a->ra);
6773 
6774             if (neg_a) {
6775                 gen_vfp_negh(ta, ta);
6776             }
6777             if (neg_n) {
6778                 gen_vfp_negh(tn, tn);
6779             }
6780             fpst = fpstatus_ptr(FPST_FPCR_F16);
6781             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6782             write_fp_sreg(s, a->rd, ta);
6783         }
6784         break;
6785 
6786     default:
6787         return false;
6788     }
6789     return true;
6790 }
6791 
6792 TRANS(FMADD, do_fmadd, a, false, false)
6793 TRANS(FNMADD, do_fmadd, a, true, true)
6794 TRANS(FMSUB, do_fmadd, a, false, true)
6795 TRANS(FNMSUB, do_fmadd, a, true, false)
6796 
6797 /*
6798  * Advanced SIMD Across Lanes
6799  */
6800 
6801 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
6802                              MemOp src_sign, NeonGenTwo64OpFn *fn)
6803 {
6804     TCGv_i64 tcg_res, tcg_elt;
6805     MemOp src_mop = a->esz | src_sign;
6806     int elements = (a->q ? 16 : 8) >> a->esz;
6807 
6808     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
6809     if (elements < 4) {
6810         return false;
6811     }
6812     if (!fp_access_check(s)) {
6813         return true;
6814     }
6815 
6816     tcg_res = tcg_temp_new_i64();
6817     tcg_elt = tcg_temp_new_i64();
6818 
6819     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
6820     for (int i = 1; i < elements; i++) {
6821         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
6822         fn(tcg_res, tcg_res, tcg_elt);
6823     }
6824 
6825     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
6826     write_fp_dreg(s, a->rd, tcg_res);
6827     return true;
6828 }
6829 
6830 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
6831 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
6832 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
6833 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
6834 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
6835 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
6836 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
6837 
6838 /*
6839  * do_fp_reduction helper
6840  *
6841  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6842  * important for correct NaN propagation that we do these
6843  * operations in exactly the order specified by the pseudocode.
6844  *
6845  * This is a recursive function.
6846  */
6847 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
6848                                 int ebase, int ecount, TCGv_ptr fpst,
6849                                 NeonGenTwoSingleOpFn *fn)
6850 {
6851     if (ecount == 1) {
6852         TCGv_i32 tcg_elem = tcg_temp_new_i32();
6853         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
6854         return tcg_elem;
6855     } else {
6856         int half = ecount >> 1;
6857         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6858 
6859         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
6860         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
6861         tcg_res = tcg_temp_new_i32();
6862 
6863         fn(tcg_res, tcg_lo, tcg_hi, fpst);
6864         return tcg_res;
6865     }
6866 }
6867 
6868 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
6869                               NeonGenTwoSingleOpFn *fn)
6870 {
6871     if (fp_access_check(s)) {
6872         MemOp esz = a->esz;
6873         int elts = (a->q ? 16 : 8) >> esz;
6874         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
6875         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
6876         write_fp_sreg(s, a->rd, res);
6877     }
6878     return true;
6879 }
6880 
6881 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
6882 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
6883 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
6884 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
6885 
6886 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
6887 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
6888 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
6889 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
6890 
6891 /*
6892  * Floating-point Immediate
6893  */
6894 
6895 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
6896 {
6897     switch (a->esz) {
6898     case MO_32:
6899     case MO_64:
6900         break;
6901     case MO_16:
6902         if (!dc_isar_feature(aa64_fp16, s)) {
6903             return false;
6904         }
6905         break;
6906     default:
6907         return false;
6908     }
6909     if (fp_access_check(s)) {
6910         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
6911         write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
6912     }
6913     return true;
6914 }
6915 
6916 /*
6917  * Advanced SIMD Modified Immediate
6918  */
6919 
6920 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
6921 {
6922     if (!dc_isar_feature(aa64_fp16, s)) {
6923         return false;
6924     }
6925     if (fp_access_check(s)) {
6926         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
6927                              a->q ? 16 : 8, vec_full_reg_size(s),
6928                              vfp_expand_imm(MO_16, a->abcdefgh));
6929     }
6930     return true;
6931 }
6932 
6933 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
6934                      int64_t c, uint32_t oprsz, uint32_t maxsz)
6935 {
6936     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
6937 }
6938 
6939 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
6940 {
6941     GVecGen2iFn *fn;
6942 
6943     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
6944     if ((a->cmode & 1) && a->cmode < 12) {
6945         /* For op=1, the imm will be inverted, so BIC becomes AND. */
6946         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
6947     } else {
6948         /* There is one unallocated cmode/op combination in this space */
6949         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
6950             return false;
6951         }
6952         fn = gen_movi;
6953     }
6954 
6955     if (fp_access_check(s)) {
6956         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
6957         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
6958     }
6959     return true;
6960 }
6961 
6962 /*
6963  * Advanced SIMD Shift by Immediate
6964  */
6965 
6966 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
6967 {
6968     if (fp_access_check(s)) {
6969         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
6970     }
6971     return true;
6972 }
6973 
6974 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
6975 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
6976 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
6977 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
6978 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
6979 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
6980 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
6981 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
6982 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
6983 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
6984 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
6985 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
6986 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
6987 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
6988 
6989 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
6990 {
6991     TCGv_i64 tcg_rn, tcg_rd;
6992     int esz = a->esz;
6993     int esize;
6994 
6995     if (!fp_access_check(s)) {
6996         return true;
6997     }
6998 
6999     /*
7000      * For the LL variants the store is larger than the load,
7001      * so if rd == rn we would overwrite parts of our input.
7002      * So load everything right now and use shifts in the main loop.
7003      */
7004     tcg_rd = tcg_temp_new_i64();
7005     tcg_rn = tcg_temp_new_i64();
7006     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7007 
7008     esize = 8 << esz;
7009     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7010         if (is_u) {
7011             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7012         } else {
7013             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7014         }
7015         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7016         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7017     }
7018     clear_vec_high(s, true, a->rd);
7019     return true;
7020 }
7021 
7022 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7023 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7024 
7025 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7026 {
7027     assert(shift >= 0 && shift <= 64);
7028     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7029 }
7030 
7031 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7032 {
7033     assert(shift >= 0 && shift <= 64);
7034     if (shift == 64) {
7035         tcg_gen_movi_i64(dst, 0);
7036     } else {
7037         tcg_gen_shri_i64(dst, src, shift);
7038     }
7039 }
7040 
7041 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7042 {
7043     gen_sshr_d(src, src, shift);
7044     tcg_gen_add_i64(dst, dst, src);
7045 }
7046 
7047 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7048 {
7049     gen_ushr_d(src, src, shift);
7050     tcg_gen_add_i64(dst, dst, src);
7051 }
7052 
7053 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7054 {
7055     assert(shift >= 0 && shift <= 32);
7056     if (shift) {
7057         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7058         tcg_gen_add_i64(dst, src, rnd);
7059         tcg_gen_sari_i64(dst, dst, shift);
7060     } else {
7061         tcg_gen_mov_i64(dst, src);
7062     }
7063 }
7064 
7065 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7066 {
7067     assert(shift >= 0 && shift <= 32);
7068     if (shift) {
7069         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7070         tcg_gen_add_i64(dst, src, rnd);
7071         tcg_gen_shri_i64(dst, dst, shift);
7072     } else {
7073         tcg_gen_mov_i64(dst, src);
7074     }
7075 }
7076 
7077 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7078 {
7079     assert(shift >= 0 && shift <= 64);
7080     if (shift == 0) {
7081         tcg_gen_mov_i64(dst, src);
7082     } else if (shift == 64) {
7083         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7084         tcg_gen_movi_i64(dst, 0);
7085     } else {
7086         TCGv_i64 rnd = tcg_temp_new_i64();
7087         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7088         tcg_gen_sari_i64(dst, src, shift);
7089         tcg_gen_add_i64(dst, dst, rnd);
7090     }
7091 }
7092 
7093 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7094 {
7095     assert(shift >= 0 && shift <= 64);
7096     if (shift == 0) {
7097         tcg_gen_mov_i64(dst, src);
7098     } else if (shift == 64) {
7099         /* Rounding will propagate bit 63 into bit 64. */
7100         tcg_gen_shri_i64(dst, src, 63);
7101     } else {
7102         TCGv_i64 rnd = tcg_temp_new_i64();
7103         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7104         tcg_gen_shri_i64(dst, src, shift);
7105         tcg_gen_add_i64(dst, dst, rnd);
7106     }
7107 }
7108 
7109 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7110 {
7111     gen_srshr_d(src, src, shift);
7112     tcg_gen_add_i64(dst, dst, src);
7113 }
7114 
7115 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7116 {
7117     gen_urshr_d(src, src, shift);
7118     tcg_gen_add_i64(dst, dst, src);
7119 }
7120 
7121 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7122 {
7123     /* If shift is 64, dst is unchanged. */
7124     if (shift != 64) {
7125         tcg_gen_shri_i64(src, src, shift);
7126         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7127     }
7128 }
7129 
7130 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7131 {
7132     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7133 }
7134 
7135 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7136                                     WideShiftImmFn * const fns[3], MemOp sign)
7137 {
7138     TCGv_i64 tcg_rn, tcg_rd;
7139     int esz = a->esz;
7140     int esize;
7141     WideShiftImmFn *fn;
7142 
7143     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7144 
7145     if (!fp_access_check(s)) {
7146         return true;
7147     }
7148 
7149     tcg_rn = tcg_temp_new_i64();
7150     tcg_rd = tcg_temp_new_i64();
7151     tcg_gen_movi_i64(tcg_rd, 0);
7152 
7153     fn = fns[esz];
7154     esize = 8 << esz;
7155     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7156         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7157         fn(tcg_rn, tcg_rn, a->imm);
7158         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7159     }
7160 
7161     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7162     clear_vec_high(s, a->q, a->rd);
7163     return true;
7164 }
7165 
7166 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7167 {
7168     tcg_gen_sari_i64(d, s, i);
7169     tcg_gen_ext16u_i64(d, d);
7170     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7171 }
7172 
7173 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7174 {
7175     tcg_gen_sari_i64(d, s, i);
7176     tcg_gen_ext32u_i64(d, d);
7177     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7178 }
7179 
7180 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7181 {
7182     gen_sshr_d(d, s, i);
7183     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7184 }
7185 
7186 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7187 {
7188     tcg_gen_shri_i64(d, s, i);
7189     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7190 }
7191 
7192 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7193 {
7194     tcg_gen_shri_i64(d, s, i);
7195     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7196 }
7197 
7198 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7199 {
7200     gen_ushr_d(d, s, i);
7201     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7202 }
7203 
7204 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7205 {
7206     tcg_gen_sari_i64(d, s, i);
7207     tcg_gen_ext16u_i64(d, d);
7208     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7209 }
7210 
7211 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7212 {
7213     tcg_gen_sari_i64(d, s, i);
7214     tcg_gen_ext32u_i64(d, d);
7215     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7216 }
7217 
7218 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7219 {
7220     gen_sshr_d(d, s, i);
7221     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7222 }
7223 
7224 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7225 {
7226     gen_srshr_bhs(d, s, i);
7227     tcg_gen_ext16u_i64(d, d);
7228     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7229 }
7230 
7231 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7232 {
7233     gen_srshr_bhs(d, s, i);
7234     tcg_gen_ext32u_i64(d, d);
7235     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7236 }
7237 
7238 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7239 {
7240     gen_srshr_d(d, s, i);
7241     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7242 }
7243 
7244 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7245 {
7246     gen_urshr_bhs(d, s, i);
7247     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7248 }
7249 
7250 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7251 {
7252     gen_urshr_bhs(d, s, i);
7253     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7254 }
7255 
7256 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7257 {
7258     gen_urshr_d(d, s, i);
7259     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7260 }
7261 
7262 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7263 {
7264     gen_srshr_bhs(d, s, i);
7265     tcg_gen_ext16u_i64(d, d);
7266     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7267 }
7268 
7269 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7270 {
7271     gen_srshr_bhs(d, s, i);
7272     tcg_gen_ext32u_i64(d, d);
7273     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7274 }
7275 
7276 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7277 {
7278     gen_srshr_d(d, s, i);
7279     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7280 }
7281 
7282 static WideShiftImmFn * const shrn_fns[] = {
7283     tcg_gen_shri_i64,
7284     tcg_gen_shri_i64,
7285     gen_ushr_d,
7286 };
7287 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7288 
7289 static WideShiftImmFn * const rshrn_fns[] = {
7290     gen_urshr_bhs,
7291     gen_urshr_bhs,
7292     gen_urshr_d,
7293 };
7294 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7295 
7296 static WideShiftImmFn * const sqshrn_fns[] = {
7297     gen_sqshrn_b,
7298     gen_sqshrn_h,
7299     gen_sqshrn_s,
7300 };
7301 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7302 
7303 static WideShiftImmFn * const uqshrn_fns[] = {
7304     gen_uqshrn_b,
7305     gen_uqshrn_h,
7306     gen_uqshrn_s,
7307 };
7308 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7309 
7310 static WideShiftImmFn * const sqshrun_fns[] = {
7311     gen_sqshrun_b,
7312     gen_sqshrun_h,
7313     gen_sqshrun_s,
7314 };
7315 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7316 
7317 static WideShiftImmFn * const sqrshrn_fns[] = {
7318     gen_sqrshrn_b,
7319     gen_sqrshrn_h,
7320     gen_sqrshrn_s,
7321 };
7322 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7323 
7324 static WideShiftImmFn * const uqrshrn_fns[] = {
7325     gen_uqrshrn_b,
7326     gen_uqrshrn_h,
7327     gen_uqrshrn_s,
7328 };
7329 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7330 
7331 static WideShiftImmFn * const sqrshrun_fns[] = {
7332     gen_sqrshrun_b,
7333     gen_sqrshrun_h,
7334     gen_sqrshrun_s,
7335 };
7336 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7337 
7338 /*
7339  * Advanced SIMD Scalar Shift by Immediate
7340  */
7341 
7342 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7343                                 WideShiftImmFn *fn, bool accumulate,
7344                                 MemOp sign)
7345 {
7346     if (fp_access_check(s)) {
7347         TCGv_i64 rd = tcg_temp_new_i64();
7348         TCGv_i64 rn = tcg_temp_new_i64();
7349 
7350         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7351         if (accumulate) {
7352             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7353         }
7354         fn(rd, rn, a->imm);
7355         write_fp_dreg(s, a->rd, rd);
7356     }
7357     return true;
7358 }
7359 
7360 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7361 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7362 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7363 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7364 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7365 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7366 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7367 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7368 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7369 
7370 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7371 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7372 
7373 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7374                               NeonGenTwoOpEnvFn *fn)
7375 {
7376     TCGv_i32 t = tcg_temp_new_i32();
7377     tcg_gen_extrl_i64_i32(t, s);
7378     fn(t, tcg_env, t, tcg_constant_i32(i));
7379     tcg_gen_extu_i32_i64(d, t);
7380 }
7381 
7382 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7383 {
7384     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7385 }
7386 
7387 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7388 {
7389     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7390 }
7391 
7392 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7393 {
7394     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7395 }
7396 
7397 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7398 {
7399     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7400 }
7401 
7402 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7403 {
7404     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7405 }
7406 
7407 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7408 {
7409     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7410 }
7411 
7412 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7413 {
7414     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7415 }
7416 
7417 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7418 {
7419     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7420 }
7421 
7422 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7423 {
7424     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7425 }
7426 
7427 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7428 {
7429     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7430 }
7431 
7432 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7433 {
7434     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7435 }
7436 
7437 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7438 {
7439     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7440 }
7441 
7442 static WideShiftImmFn * const f_scalar_sqshli[] = {
7443     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7444 };
7445 
7446 static WideShiftImmFn * const f_scalar_uqshli[] = {
7447     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7448 };
7449 
7450 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7451     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7452 };
7453 
7454 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7455 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7456 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7457 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7458 
7459 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7460                                        WideShiftImmFn * const fns[3],
7461                                        MemOp sign, bool zext)
7462 {
7463     MemOp esz = a->esz;
7464 
7465     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7466 
7467     if (fp_access_check(s)) {
7468         TCGv_i64 rd = tcg_temp_new_i64();
7469         TCGv_i64 rn = tcg_temp_new_i64();
7470 
7471         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7472         fns[esz](rd, rn, a->imm);
7473         if (zext) {
7474             tcg_gen_ext_i64(rd, rd, esz);
7475         }
7476         write_fp_dreg(s, a->rd, rd);
7477     }
7478     return true;
7479 }
7480 
7481 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7482 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7483 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7484 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7485 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7486 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7487 
7488 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7489  * Note that it is the caller's responsibility to ensure that the
7490  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7491  * mandated semantics for out of range shifts.
7492  */
7493 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7494                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7495 {
7496     switch (shift_type) {
7497     case A64_SHIFT_TYPE_LSL:
7498         tcg_gen_shl_i64(dst, src, shift_amount);
7499         break;
7500     case A64_SHIFT_TYPE_LSR:
7501         tcg_gen_shr_i64(dst, src, shift_amount);
7502         break;
7503     case A64_SHIFT_TYPE_ASR:
7504         if (!sf) {
7505             tcg_gen_ext32s_i64(dst, src);
7506         }
7507         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
7508         break;
7509     case A64_SHIFT_TYPE_ROR:
7510         if (sf) {
7511             tcg_gen_rotr_i64(dst, src, shift_amount);
7512         } else {
7513             TCGv_i32 t0, t1;
7514             t0 = tcg_temp_new_i32();
7515             t1 = tcg_temp_new_i32();
7516             tcg_gen_extrl_i64_i32(t0, src);
7517             tcg_gen_extrl_i64_i32(t1, shift_amount);
7518             tcg_gen_rotr_i32(t0, t0, t1);
7519             tcg_gen_extu_i32_i64(dst, t0);
7520         }
7521         break;
7522     default:
7523         assert(FALSE); /* all shift types should be handled */
7524         break;
7525     }
7526 
7527     if (!sf) { /* zero extend final result */
7528         tcg_gen_ext32u_i64(dst, dst);
7529     }
7530 }
7531 
7532 /* Shift a TCGv src by immediate, put result in dst.
7533  * The shift amount must be in range (this should always be true as the
7534  * relevant instructions will UNDEF on bad shift immediates).
7535  */
7536 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
7537                           enum a64_shift_type shift_type, unsigned int shift_i)
7538 {
7539     assert(shift_i < (sf ? 64 : 32));
7540 
7541     if (shift_i == 0) {
7542         tcg_gen_mov_i64(dst, src);
7543     } else {
7544         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
7545     }
7546 }
7547 
7548 /* Logical (shifted register)
7549  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
7550  * +----+-----+-----------+-------+---+------+--------+------+------+
7551  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
7552  * +----+-----+-----------+-------+---+------+--------+------+------+
7553  */
7554 static void disas_logic_reg(DisasContext *s, uint32_t insn)
7555 {
7556     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
7557     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
7558 
7559     sf = extract32(insn, 31, 1);
7560     opc = extract32(insn, 29, 2);
7561     shift_type = extract32(insn, 22, 2);
7562     invert = extract32(insn, 21, 1);
7563     rm = extract32(insn, 16, 5);
7564     shift_amount = extract32(insn, 10, 6);
7565     rn = extract32(insn, 5, 5);
7566     rd = extract32(insn, 0, 5);
7567 
7568     if (!sf && (shift_amount & (1 << 5))) {
7569         unallocated_encoding(s);
7570         return;
7571     }
7572 
7573     tcg_rd = cpu_reg(s, rd);
7574 
7575     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
7576         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
7577          * register-register MOV and MVN, so it is worth special casing.
7578          */
7579         tcg_rm = cpu_reg(s, rm);
7580         if (invert) {
7581             tcg_gen_not_i64(tcg_rd, tcg_rm);
7582             if (!sf) {
7583                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7584             }
7585         } else {
7586             if (sf) {
7587                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
7588             } else {
7589                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
7590             }
7591         }
7592         return;
7593     }
7594 
7595     tcg_rm = read_cpu_reg(s, rm, sf);
7596 
7597     if (shift_amount) {
7598         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
7599     }
7600 
7601     tcg_rn = cpu_reg(s, rn);
7602 
7603     switch (opc | (invert << 2)) {
7604     case 0: /* AND */
7605     case 3: /* ANDS */
7606         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7607         break;
7608     case 1: /* ORR */
7609         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
7610         break;
7611     case 2: /* EOR */
7612         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
7613         break;
7614     case 4: /* BIC */
7615     case 7: /* BICS */
7616         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
7617         break;
7618     case 5: /* ORN */
7619         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
7620         break;
7621     case 6: /* EON */
7622         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
7623         break;
7624     default:
7625         assert(FALSE);
7626         break;
7627     }
7628 
7629     if (!sf) {
7630         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7631     }
7632 
7633     if (opc == 3) {
7634         gen_logic_CC(sf, tcg_rd);
7635     }
7636 }
7637 
7638 /*
7639  * Add/subtract (extended register)
7640  *
7641  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
7642  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
7643  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
7644  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
7645  *
7646  *  sf: 0 -> 32bit, 1 -> 64bit
7647  *  op: 0 -> add  , 1 -> sub
7648  *   S: 1 -> set flags
7649  * opt: 00
7650  * option: extension type (see DecodeRegExtend)
7651  * imm3: optional shift to Rm
7652  *
7653  * Rd = Rn + LSL(extend(Rm), amount)
7654  */
7655 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
7656 {
7657     int rd = extract32(insn, 0, 5);
7658     int rn = extract32(insn, 5, 5);
7659     int imm3 = extract32(insn, 10, 3);
7660     int option = extract32(insn, 13, 3);
7661     int rm = extract32(insn, 16, 5);
7662     int opt = extract32(insn, 22, 2);
7663     bool setflags = extract32(insn, 29, 1);
7664     bool sub_op = extract32(insn, 30, 1);
7665     bool sf = extract32(insn, 31, 1);
7666 
7667     TCGv_i64 tcg_rm, tcg_rn; /* temps */
7668     TCGv_i64 tcg_rd;
7669     TCGv_i64 tcg_result;
7670 
7671     if (imm3 > 4 || opt != 0) {
7672         unallocated_encoding(s);
7673         return;
7674     }
7675 
7676     /* non-flag setting ops may use SP */
7677     if (!setflags) {
7678         tcg_rd = cpu_reg_sp(s, rd);
7679     } else {
7680         tcg_rd = cpu_reg(s, rd);
7681     }
7682     tcg_rn = read_cpu_reg_sp(s, rn, sf);
7683 
7684     tcg_rm = read_cpu_reg(s, rm, sf);
7685     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
7686 
7687     tcg_result = tcg_temp_new_i64();
7688 
7689     if (!setflags) {
7690         if (sub_op) {
7691             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
7692         } else {
7693             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
7694         }
7695     } else {
7696         if (sub_op) {
7697             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
7698         } else {
7699             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
7700         }
7701     }
7702 
7703     if (sf) {
7704         tcg_gen_mov_i64(tcg_rd, tcg_result);
7705     } else {
7706         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
7707     }
7708 }
7709 
7710 /*
7711  * Add/subtract (shifted register)
7712  *
7713  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
7714  * +--+--+--+-----------+-----+--+-------+---------+------+------+
7715  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
7716  * +--+--+--+-----------+-----+--+-------+---------+------+------+
7717  *
7718  *    sf: 0 -> 32bit, 1 -> 64bit
7719  *    op: 0 -> add  , 1 -> sub
7720  *     S: 1 -> set flags
7721  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
7722  *  imm6: Shift amount to apply to Rm before the add/sub
7723  */
7724 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
7725 {
7726     int rd = extract32(insn, 0, 5);
7727     int rn = extract32(insn, 5, 5);
7728     int imm6 = extract32(insn, 10, 6);
7729     int rm = extract32(insn, 16, 5);
7730     int shift_type = extract32(insn, 22, 2);
7731     bool setflags = extract32(insn, 29, 1);
7732     bool sub_op = extract32(insn, 30, 1);
7733     bool sf = extract32(insn, 31, 1);
7734 
7735     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7736     TCGv_i64 tcg_rn, tcg_rm;
7737     TCGv_i64 tcg_result;
7738 
7739     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
7740         unallocated_encoding(s);
7741         return;
7742     }
7743 
7744     tcg_rn = read_cpu_reg(s, rn, sf);
7745     tcg_rm = read_cpu_reg(s, rm, sf);
7746 
7747     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
7748 
7749     tcg_result = tcg_temp_new_i64();
7750 
7751     if (!setflags) {
7752         if (sub_op) {
7753             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
7754         } else {
7755             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
7756         }
7757     } else {
7758         if (sub_op) {
7759             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
7760         } else {
7761             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
7762         }
7763     }
7764 
7765     if (sf) {
7766         tcg_gen_mov_i64(tcg_rd, tcg_result);
7767     } else {
7768         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
7769     }
7770 }
7771 
7772 /* Data-processing (3 source)
7773  *
7774  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
7775  *  +--+------+-----------+------+------+----+------+------+------+
7776  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
7777  *  +--+------+-----------+------+------+----+------+------+------+
7778  */
7779 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
7780 {
7781     int rd = extract32(insn, 0, 5);
7782     int rn = extract32(insn, 5, 5);
7783     int ra = extract32(insn, 10, 5);
7784     int rm = extract32(insn, 16, 5);
7785     int op_id = (extract32(insn, 29, 3) << 4) |
7786         (extract32(insn, 21, 3) << 1) |
7787         extract32(insn, 15, 1);
7788     bool sf = extract32(insn, 31, 1);
7789     bool is_sub = extract32(op_id, 0, 1);
7790     bool is_high = extract32(op_id, 2, 1);
7791     bool is_signed = false;
7792     TCGv_i64 tcg_op1;
7793     TCGv_i64 tcg_op2;
7794     TCGv_i64 tcg_tmp;
7795 
7796     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
7797     switch (op_id) {
7798     case 0x42: /* SMADDL */
7799     case 0x43: /* SMSUBL */
7800     case 0x44: /* SMULH */
7801         is_signed = true;
7802         break;
7803     case 0x0: /* MADD (32bit) */
7804     case 0x1: /* MSUB (32bit) */
7805     case 0x40: /* MADD (64bit) */
7806     case 0x41: /* MSUB (64bit) */
7807     case 0x4a: /* UMADDL */
7808     case 0x4b: /* UMSUBL */
7809     case 0x4c: /* UMULH */
7810         break;
7811     default:
7812         unallocated_encoding(s);
7813         return;
7814     }
7815 
7816     if (is_high) {
7817         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
7818         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7819         TCGv_i64 tcg_rn = cpu_reg(s, rn);
7820         TCGv_i64 tcg_rm = cpu_reg(s, rm);
7821 
7822         if (is_signed) {
7823             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
7824         } else {
7825             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
7826         }
7827         return;
7828     }
7829 
7830     tcg_op1 = tcg_temp_new_i64();
7831     tcg_op2 = tcg_temp_new_i64();
7832     tcg_tmp = tcg_temp_new_i64();
7833 
7834     if (op_id < 0x42) {
7835         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
7836         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
7837     } else {
7838         if (is_signed) {
7839             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
7840             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
7841         } else {
7842             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
7843             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
7844         }
7845     }
7846 
7847     if (ra == 31 && !is_sub) {
7848         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
7849         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
7850     } else {
7851         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
7852         if (is_sub) {
7853             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7854         } else {
7855             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7856         }
7857     }
7858 
7859     if (!sf) {
7860         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
7861     }
7862 }
7863 
7864 /* Add/subtract (with carry)
7865  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
7866  * +--+--+--+------------------------+------+-------------+------+-----+
7867  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
7868  * +--+--+--+------------------------+------+-------------+------+-----+
7869  */
7870 
7871 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
7872 {
7873     unsigned int sf, op, setflags, rm, rn, rd;
7874     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
7875 
7876     sf = extract32(insn, 31, 1);
7877     op = extract32(insn, 30, 1);
7878     setflags = extract32(insn, 29, 1);
7879     rm = extract32(insn, 16, 5);
7880     rn = extract32(insn, 5, 5);
7881     rd = extract32(insn, 0, 5);
7882 
7883     tcg_rd = cpu_reg(s, rd);
7884     tcg_rn = cpu_reg(s, rn);
7885 
7886     if (op) {
7887         tcg_y = tcg_temp_new_i64();
7888         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
7889     } else {
7890         tcg_y = cpu_reg(s, rm);
7891     }
7892 
7893     if (setflags) {
7894         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
7895     } else {
7896         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
7897     }
7898 }
7899 
7900 /*
7901  * Rotate right into flags
7902  *  31 30 29                21       15          10      5  4      0
7903  * +--+--+--+-----------------+--------+-----------+------+--+------+
7904  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
7905  * +--+--+--+-----------------+--------+-----------+------+--+------+
7906  */
7907 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
7908 {
7909     int mask = extract32(insn, 0, 4);
7910     int o2 = extract32(insn, 4, 1);
7911     int rn = extract32(insn, 5, 5);
7912     int imm6 = extract32(insn, 15, 6);
7913     int sf_op_s = extract32(insn, 29, 3);
7914     TCGv_i64 tcg_rn;
7915     TCGv_i32 nzcv;
7916 
7917     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
7918         unallocated_encoding(s);
7919         return;
7920     }
7921 
7922     tcg_rn = read_cpu_reg(s, rn, 1);
7923     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
7924 
7925     nzcv = tcg_temp_new_i32();
7926     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
7927 
7928     if (mask & 8) { /* N */
7929         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
7930     }
7931     if (mask & 4) { /* Z */
7932         tcg_gen_not_i32(cpu_ZF, nzcv);
7933         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
7934     }
7935     if (mask & 2) { /* C */
7936         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
7937     }
7938     if (mask & 1) { /* V */
7939         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
7940     }
7941 }
7942 
7943 /*
7944  * Evaluate into flags
7945  *  31 30 29                21        15   14        10      5  4      0
7946  * +--+--+--+-----------------+---------+----+---------+------+--+------+
7947  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
7948  * +--+--+--+-----------------+---------+----+---------+------+--+------+
7949  */
7950 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
7951 {
7952     int o3_mask = extract32(insn, 0, 5);
7953     int rn = extract32(insn, 5, 5);
7954     int o2 = extract32(insn, 15, 6);
7955     int sz = extract32(insn, 14, 1);
7956     int sf_op_s = extract32(insn, 29, 3);
7957     TCGv_i32 tmp;
7958     int shift;
7959 
7960     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
7961         !dc_isar_feature(aa64_condm_4, s)) {
7962         unallocated_encoding(s);
7963         return;
7964     }
7965     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
7966 
7967     tmp = tcg_temp_new_i32();
7968     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
7969     tcg_gen_shli_i32(cpu_NF, tmp, shift);
7970     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
7971     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
7972     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
7973 }
7974 
7975 /* Conditional compare (immediate / register)
7976  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
7977  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7978  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
7979  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7980  *        [1]                             y                [0]       [0]
7981  */
7982 static void disas_cc(DisasContext *s, uint32_t insn)
7983 {
7984     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
7985     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
7986     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
7987     DisasCompare c;
7988 
7989     if (!extract32(insn, 29, 1)) {
7990         unallocated_encoding(s);
7991         return;
7992     }
7993     if (insn & (1 << 10 | 1 << 4)) {
7994         unallocated_encoding(s);
7995         return;
7996     }
7997     sf = extract32(insn, 31, 1);
7998     op = extract32(insn, 30, 1);
7999     is_imm = extract32(insn, 11, 1);
8000     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
8001     cond = extract32(insn, 12, 4);
8002     rn = extract32(insn, 5, 5);
8003     nzcv = extract32(insn, 0, 4);
8004 
8005     /* Set T0 = !COND.  */
8006     tcg_t0 = tcg_temp_new_i32();
8007     arm_test_cc(&c, cond);
8008     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8009 
8010     /* Load the arguments for the new comparison.  */
8011     if (is_imm) {
8012         tcg_y = tcg_temp_new_i64();
8013         tcg_gen_movi_i64(tcg_y, y);
8014     } else {
8015         tcg_y = cpu_reg(s, y);
8016     }
8017     tcg_rn = cpu_reg(s, rn);
8018 
8019     /* Set the flags for the new comparison.  */
8020     tcg_tmp = tcg_temp_new_i64();
8021     if (op) {
8022         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
8023     } else {
8024         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
8025     }
8026 
8027     /* If COND was false, force the flags to #nzcv.  Compute two masks
8028      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8029      * For tcg hosts that support ANDC, we can make do with just T1.
8030      * In either case, allow the tcg optimizer to delete any unused mask.
8031      */
8032     tcg_t1 = tcg_temp_new_i32();
8033     tcg_t2 = tcg_temp_new_i32();
8034     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8035     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8036 
8037     if (nzcv & 8) { /* N */
8038         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8039     } else {
8040         if (TCG_TARGET_HAS_andc_i32) {
8041             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8042         } else {
8043             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8044         }
8045     }
8046     if (nzcv & 4) { /* Z */
8047         if (TCG_TARGET_HAS_andc_i32) {
8048             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8049         } else {
8050             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8051         }
8052     } else {
8053         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8054     }
8055     if (nzcv & 2) { /* C */
8056         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8057     } else {
8058         if (TCG_TARGET_HAS_andc_i32) {
8059             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8060         } else {
8061             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8062         }
8063     }
8064     if (nzcv & 1) { /* V */
8065         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8066     } else {
8067         if (TCG_TARGET_HAS_andc_i32) {
8068             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8069         } else {
8070             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8071         }
8072     }
8073 }
8074 
8075 /* Conditional select
8076  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
8077  * +----+----+---+-----------------+------+------+-----+------+------+
8078  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
8079  * +----+----+---+-----------------+------+------+-----+------+------+
8080  */
8081 static void disas_cond_select(DisasContext *s, uint32_t insn)
8082 {
8083     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
8084     TCGv_i64 tcg_rd, zero;
8085     DisasCompare64 c;
8086 
8087     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
8088         /* S == 1 or op2<1> == 1 */
8089         unallocated_encoding(s);
8090         return;
8091     }
8092     sf = extract32(insn, 31, 1);
8093     else_inv = extract32(insn, 30, 1);
8094     rm = extract32(insn, 16, 5);
8095     cond = extract32(insn, 12, 4);
8096     else_inc = extract32(insn, 10, 1);
8097     rn = extract32(insn, 5, 5);
8098     rd = extract32(insn, 0, 5);
8099 
8100     tcg_rd = cpu_reg(s, rd);
8101 
8102     a64_test_cc(&c, cond);
8103     zero = tcg_constant_i64(0);
8104 
8105     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
8106         /* CSET & CSETM.  */
8107         if (else_inv) {
8108             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8109                                    tcg_rd, c.value, zero);
8110         } else {
8111             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8112                                 tcg_rd, c.value, zero);
8113         }
8114     } else {
8115         TCGv_i64 t_true = cpu_reg(s, rn);
8116         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
8117         if (else_inv && else_inc) {
8118             tcg_gen_neg_i64(t_false, t_false);
8119         } else if (else_inv) {
8120             tcg_gen_not_i64(t_false, t_false);
8121         } else if (else_inc) {
8122             tcg_gen_addi_i64(t_false, t_false, 1);
8123         }
8124         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8125     }
8126 
8127     if (!sf) {
8128         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8129     }
8130 }
8131 
8132 static void handle_clz(DisasContext *s, unsigned int sf,
8133                        unsigned int rn, unsigned int rd)
8134 {
8135     TCGv_i64 tcg_rd, tcg_rn;
8136     tcg_rd = cpu_reg(s, rd);
8137     tcg_rn = cpu_reg(s, rn);
8138 
8139     if (sf) {
8140         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8141     } else {
8142         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
8143         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
8144         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
8145         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
8146     }
8147 }
8148 
8149 static void handle_cls(DisasContext *s, unsigned int sf,
8150                        unsigned int rn, unsigned int rd)
8151 {
8152     TCGv_i64 tcg_rd, tcg_rn;
8153     tcg_rd = cpu_reg(s, rd);
8154     tcg_rn = cpu_reg(s, rn);
8155 
8156     if (sf) {
8157         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8158     } else {
8159         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
8160         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
8161         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
8162         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
8163     }
8164 }
8165 
8166 static void handle_rbit(DisasContext *s, unsigned int sf,
8167                         unsigned int rn, unsigned int rd)
8168 {
8169     TCGv_i64 tcg_rd, tcg_rn;
8170     tcg_rd = cpu_reg(s, rd);
8171     tcg_rn = cpu_reg(s, rn);
8172 
8173     if (sf) {
8174         gen_helper_rbit64(tcg_rd, tcg_rn);
8175     } else {
8176         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
8177         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
8178         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
8179         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
8180     }
8181 }
8182 
8183 /* REV with sf==1, opcode==3 ("REV64") */
8184 static void handle_rev64(DisasContext *s, unsigned int sf,
8185                          unsigned int rn, unsigned int rd)
8186 {
8187     if (!sf) {
8188         unallocated_encoding(s);
8189         return;
8190     }
8191     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
8192 }
8193 
8194 /* REV with sf==0, opcode==2
8195  * REV32 (sf==1, opcode==2)
8196  */
8197 static void handle_rev32(DisasContext *s, unsigned int sf,
8198                          unsigned int rn, unsigned int rd)
8199 {
8200     TCGv_i64 tcg_rd = cpu_reg(s, rd);
8201     TCGv_i64 tcg_rn = cpu_reg(s, rn);
8202 
8203     if (sf) {
8204         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8205         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8206     } else {
8207         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8208     }
8209 }
8210 
8211 /* REV16 (opcode==1) */
8212 static void handle_rev16(DisasContext *s, unsigned int sf,
8213                          unsigned int rn, unsigned int rd)
8214 {
8215     TCGv_i64 tcg_rd = cpu_reg(s, rd);
8216     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8217     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
8218     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
8219 
8220     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8221     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8222     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8223     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8224     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8225 }
8226 
8227 /* Data-processing (1 source)
8228  *   31  30  29  28             21 20     16 15    10 9    5 4    0
8229  * +----+---+---+-----------------+---------+--------+------+------+
8230  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
8231  * +----+---+---+-----------------+---------+--------+------+------+
8232  */
8233 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
8234 {
8235     unsigned int sf, opcode, opcode2, rn, rd;
8236     TCGv_i64 tcg_rd;
8237 
8238     if (extract32(insn, 29, 1)) {
8239         unallocated_encoding(s);
8240         return;
8241     }
8242 
8243     sf = extract32(insn, 31, 1);
8244     opcode = extract32(insn, 10, 6);
8245     opcode2 = extract32(insn, 16, 5);
8246     rn = extract32(insn, 5, 5);
8247     rd = extract32(insn, 0, 5);
8248 
8249 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
8250 
8251     switch (MAP(sf, opcode2, opcode)) {
8252     case MAP(0, 0x00, 0x00): /* RBIT */
8253     case MAP(1, 0x00, 0x00):
8254         handle_rbit(s, sf, rn, rd);
8255         break;
8256     case MAP(0, 0x00, 0x01): /* REV16 */
8257     case MAP(1, 0x00, 0x01):
8258         handle_rev16(s, sf, rn, rd);
8259         break;
8260     case MAP(0, 0x00, 0x02): /* REV/REV32 */
8261     case MAP(1, 0x00, 0x02):
8262         handle_rev32(s, sf, rn, rd);
8263         break;
8264     case MAP(1, 0x00, 0x03): /* REV64 */
8265         handle_rev64(s, sf, rn, rd);
8266         break;
8267     case MAP(0, 0x00, 0x04): /* CLZ */
8268     case MAP(1, 0x00, 0x04):
8269         handle_clz(s, sf, rn, rd);
8270         break;
8271     case MAP(0, 0x00, 0x05): /* CLS */
8272     case MAP(1, 0x00, 0x05):
8273         handle_cls(s, sf, rn, rd);
8274         break;
8275     case MAP(1, 0x01, 0x00): /* PACIA */
8276         if (s->pauth_active) {
8277             tcg_rd = cpu_reg(s, rd);
8278             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8279         } else if (!dc_isar_feature(aa64_pauth, s)) {
8280             goto do_unallocated;
8281         }
8282         break;
8283     case MAP(1, 0x01, 0x01): /* PACIB */
8284         if (s->pauth_active) {
8285             tcg_rd = cpu_reg(s, rd);
8286             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8287         } else if (!dc_isar_feature(aa64_pauth, s)) {
8288             goto do_unallocated;
8289         }
8290         break;
8291     case MAP(1, 0x01, 0x02): /* PACDA */
8292         if (s->pauth_active) {
8293             tcg_rd = cpu_reg(s, rd);
8294             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8295         } else if (!dc_isar_feature(aa64_pauth, s)) {
8296             goto do_unallocated;
8297         }
8298         break;
8299     case MAP(1, 0x01, 0x03): /* PACDB */
8300         if (s->pauth_active) {
8301             tcg_rd = cpu_reg(s, rd);
8302             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8303         } else if (!dc_isar_feature(aa64_pauth, s)) {
8304             goto do_unallocated;
8305         }
8306         break;
8307     case MAP(1, 0x01, 0x04): /* AUTIA */
8308         if (s->pauth_active) {
8309             tcg_rd = cpu_reg(s, rd);
8310             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8311         } else if (!dc_isar_feature(aa64_pauth, s)) {
8312             goto do_unallocated;
8313         }
8314         break;
8315     case MAP(1, 0x01, 0x05): /* AUTIB */
8316         if (s->pauth_active) {
8317             tcg_rd = cpu_reg(s, rd);
8318             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8319         } else if (!dc_isar_feature(aa64_pauth, s)) {
8320             goto do_unallocated;
8321         }
8322         break;
8323     case MAP(1, 0x01, 0x06): /* AUTDA */
8324         if (s->pauth_active) {
8325             tcg_rd = cpu_reg(s, rd);
8326             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8327         } else if (!dc_isar_feature(aa64_pauth, s)) {
8328             goto do_unallocated;
8329         }
8330         break;
8331     case MAP(1, 0x01, 0x07): /* AUTDB */
8332         if (s->pauth_active) {
8333             tcg_rd = cpu_reg(s, rd);
8334             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8335         } else if (!dc_isar_feature(aa64_pauth, s)) {
8336             goto do_unallocated;
8337         }
8338         break;
8339     case MAP(1, 0x01, 0x08): /* PACIZA */
8340         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8341             goto do_unallocated;
8342         } else if (s->pauth_active) {
8343             tcg_rd = cpu_reg(s, rd);
8344             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8345         }
8346         break;
8347     case MAP(1, 0x01, 0x09): /* PACIZB */
8348         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8349             goto do_unallocated;
8350         } else if (s->pauth_active) {
8351             tcg_rd = cpu_reg(s, rd);
8352             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8353         }
8354         break;
8355     case MAP(1, 0x01, 0x0a): /* PACDZA */
8356         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8357             goto do_unallocated;
8358         } else if (s->pauth_active) {
8359             tcg_rd = cpu_reg(s, rd);
8360             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8361         }
8362         break;
8363     case MAP(1, 0x01, 0x0b): /* PACDZB */
8364         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8365             goto do_unallocated;
8366         } else if (s->pauth_active) {
8367             tcg_rd = cpu_reg(s, rd);
8368             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8369         }
8370         break;
8371     case MAP(1, 0x01, 0x0c): /* AUTIZA */
8372         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8373             goto do_unallocated;
8374         } else if (s->pauth_active) {
8375             tcg_rd = cpu_reg(s, rd);
8376             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8377         }
8378         break;
8379     case MAP(1, 0x01, 0x0d): /* AUTIZB */
8380         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8381             goto do_unallocated;
8382         } else if (s->pauth_active) {
8383             tcg_rd = cpu_reg(s, rd);
8384             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8385         }
8386         break;
8387     case MAP(1, 0x01, 0x0e): /* AUTDZA */
8388         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8389             goto do_unallocated;
8390         } else if (s->pauth_active) {
8391             tcg_rd = cpu_reg(s, rd);
8392             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8393         }
8394         break;
8395     case MAP(1, 0x01, 0x0f): /* AUTDZB */
8396         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8397             goto do_unallocated;
8398         } else if (s->pauth_active) {
8399             tcg_rd = cpu_reg(s, rd);
8400             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8401         }
8402         break;
8403     case MAP(1, 0x01, 0x10): /* XPACI */
8404         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8405             goto do_unallocated;
8406         } else if (s->pauth_active) {
8407             tcg_rd = cpu_reg(s, rd);
8408             gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
8409         }
8410         break;
8411     case MAP(1, 0x01, 0x11): /* XPACD */
8412         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8413             goto do_unallocated;
8414         } else if (s->pauth_active) {
8415             tcg_rd = cpu_reg(s, rd);
8416             gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
8417         }
8418         break;
8419     default:
8420     do_unallocated:
8421         unallocated_encoding(s);
8422         break;
8423     }
8424 
8425 #undef MAP
8426 }
8427 
8428 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
8429                        unsigned int rm, unsigned int rn, unsigned int rd)
8430 {
8431     TCGv_i64 tcg_n, tcg_m, tcg_rd;
8432     tcg_rd = cpu_reg(s, rd);
8433 
8434     if (!sf && is_signed) {
8435         tcg_n = tcg_temp_new_i64();
8436         tcg_m = tcg_temp_new_i64();
8437         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
8438         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
8439     } else {
8440         tcg_n = read_cpu_reg(s, rn, sf);
8441         tcg_m = read_cpu_reg(s, rm, sf);
8442     }
8443 
8444     if (is_signed) {
8445         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
8446     } else {
8447         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
8448     }
8449 
8450     if (!sf) { /* zero extend final result */
8451         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8452     }
8453 }
8454 
8455 /* LSLV, LSRV, ASRV, RORV */
8456 static void handle_shift_reg(DisasContext *s,
8457                              enum a64_shift_type shift_type, unsigned int sf,
8458                              unsigned int rm, unsigned int rn, unsigned int rd)
8459 {
8460     TCGv_i64 tcg_shift = tcg_temp_new_i64();
8461     TCGv_i64 tcg_rd = cpu_reg(s, rd);
8462     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
8463 
8464     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
8465     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
8466 }
8467 
8468 /* CRC32[BHWX], CRC32C[BHWX] */
8469 static void handle_crc32(DisasContext *s,
8470                          unsigned int sf, unsigned int sz, bool crc32c,
8471                          unsigned int rm, unsigned int rn, unsigned int rd)
8472 {
8473     TCGv_i64 tcg_acc, tcg_val;
8474     TCGv_i32 tcg_bytes;
8475 
8476     if (!dc_isar_feature(aa64_crc32, s)
8477         || (sf == 1 && sz != 3)
8478         || (sf == 0 && sz == 3)) {
8479         unallocated_encoding(s);
8480         return;
8481     }
8482 
8483     if (sz == 3) {
8484         tcg_val = cpu_reg(s, rm);
8485     } else {
8486         uint64_t mask;
8487         switch (sz) {
8488         case 0:
8489             mask = 0xFF;
8490             break;
8491         case 1:
8492             mask = 0xFFFF;
8493             break;
8494         case 2:
8495             mask = 0xFFFFFFFF;
8496             break;
8497         default:
8498             g_assert_not_reached();
8499         }
8500         tcg_val = tcg_temp_new_i64();
8501         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
8502     }
8503 
8504     tcg_acc = cpu_reg(s, rn);
8505     tcg_bytes = tcg_constant_i32(1 << sz);
8506 
8507     if (crc32c) {
8508         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
8509     } else {
8510         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
8511     }
8512 }
8513 
8514 /* Data-processing (2 source)
8515  *   31   30  29 28             21 20  16 15    10 9    5 4    0
8516  * +----+---+---+-----------------+------+--------+------+------+
8517  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
8518  * +----+---+---+-----------------+------+--------+------+------+
8519  */
8520 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
8521 {
8522     unsigned int sf, rm, opcode, rn, rd, setflag;
8523     sf = extract32(insn, 31, 1);
8524     setflag = extract32(insn, 29, 1);
8525     rm = extract32(insn, 16, 5);
8526     opcode = extract32(insn, 10, 6);
8527     rn = extract32(insn, 5, 5);
8528     rd = extract32(insn, 0, 5);
8529 
8530     if (setflag && opcode != 0) {
8531         unallocated_encoding(s);
8532         return;
8533     }
8534 
8535     switch (opcode) {
8536     case 0: /* SUBP(S) */
8537         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
8538             goto do_unallocated;
8539         } else {
8540             TCGv_i64 tcg_n, tcg_m, tcg_d;
8541 
8542             tcg_n = read_cpu_reg_sp(s, rn, true);
8543             tcg_m = read_cpu_reg_sp(s, rm, true);
8544             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8545             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8546             tcg_d = cpu_reg(s, rd);
8547 
8548             if (setflag) {
8549                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8550             } else {
8551                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8552             }
8553         }
8554         break;
8555     case 2: /* UDIV */
8556         handle_div(s, false, sf, rm, rn, rd);
8557         break;
8558     case 3: /* SDIV */
8559         handle_div(s, true, sf, rm, rn, rd);
8560         break;
8561     case 4: /* IRG */
8562         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
8563             goto do_unallocated;
8564         }
8565         if (s->ata[0]) {
8566             gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
8567                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
8568         } else {
8569             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
8570                                              cpu_reg_sp(s, rn));
8571         }
8572         break;
8573     case 5: /* GMI */
8574         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
8575             goto do_unallocated;
8576         } else {
8577             TCGv_i64 t = tcg_temp_new_i64();
8578 
8579             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
8580             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8581             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
8582         }
8583         break;
8584     case 8: /* LSLV */
8585         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
8586         break;
8587     case 9: /* LSRV */
8588         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
8589         break;
8590     case 10: /* ASRV */
8591         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
8592         break;
8593     case 11: /* RORV */
8594         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
8595         break;
8596     case 12: /* PACGA */
8597         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
8598             goto do_unallocated;
8599         }
8600         gen_helper_pacga(cpu_reg(s, rd), tcg_env,
8601                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
8602         break;
8603     case 16:
8604     case 17:
8605     case 18:
8606     case 19:
8607     case 20:
8608     case 21:
8609     case 22:
8610     case 23: /* CRC32 */
8611     {
8612         int sz = extract32(opcode, 0, 2);
8613         bool crc32c = extract32(opcode, 2, 1);
8614         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
8615         break;
8616     }
8617     default:
8618     do_unallocated:
8619         unallocated_encoding(s);
8620         break;
8621     }
8622 }
8623 
8624 /*
8625  * Data processing - register
8626  *  31  30 29  28      25    21  20  16      10         0
8627  * +--+---+--+---+-------+-----+-------+-------+---------+
8628  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
8629  * +--+---+--+---+-------+-----+-------+-------+---------+
8630  */
8631 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
8632 {
8633     int op0 = extract32(insn, 30, 1);
8634     int op1 = extract32(insn, 28, 1);
8635     int op2 = extract32(insn, 21, 4);
8636     int op3 = extract32(insn, 10, 6);
8637 
8638     if (!op1) {
8639         if (op2 & 8) {
8640             if (op2 & 1) {
8641                 /* Add/sub (extended register) */
8642                 disas_add_sub_ext_reg(s, insn);
8643             } else {
8644                 /* Add/sub (shifted register) */
8645                 disas_add_sub_reg(s, insn);
8646             }
8647         } else {
8648             /* Logical (shifted register) */
8649             disas_logic_reg(s, insn);
8650         }
8651         return;
8652     }
8653 
8654     switch (op2) {
8655     case 0x0:
8656         switch (op3) {
8657         case 0x00: /* Add/subtract (with carry) */
8658             disas_adc_sbc(s, insn);
8659             break;
8660 
8661         case 0x01: /* Rotate right into flags */
8662         case 0x21:
8663             disas_rotate_right_into_flags(s, insn);
8664             break;
8665 
8666         case 0x02: /* Evaluate into flags */
8667         case 0x12:
8668         case 0x22:
8669         case 0x32:
8670             disas_evaluate_into_flags(s, insn);
8671             break;
8672 
8673         default:
8674             goto do_unallocated;
8675         }
8676         break;
8677 
8678     case 0x2: /* Conditional compare */
8679         disas_cc(s, insn); /* both imm and reg forms */
8680         break;
8681 
8682     case 0x4: /* Conditional select */
8683         disas_cond_select(s, insn);
8684         break;
8685 
8686     case 0x6: /* Data-processing */
8687         if (op0) {    /* (1 source) */
8688             disas_data_proc_1src(s, insn);
8689         } else {      /* (2 source) */
8690             disas_data_proc_2src(s, insn);
8691         }
8692         break;
8693     case 0x8 ... 0xf: /* (3 source) */
8694         disas_data_proc_3src(s, insn);
8695         break;
8696 
8697     default:
8698     do_unallocated:
8699         unallocated_encoding(s);
8700         break;
8701     }
8702 }
8703 
8704 static void handle_fp_compare(DisasContext *s, int size,
8705                               unsigned int rn, unsigned int rm,
8706                               bool cmp_with_zero, bool signal_all_nans)
8707 {
8708     TCGv_i64 tcg_flags = tcg_temp_new_i64();
8709     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8710 
8711     if (size == MO_64) {
8712         TCGv_i64 tcg_vn, tcg_vm;
8713 
8714         tcg_vn = read_fp_dreg(s, rn);
8715         if (cmp_with_zero) {
8716             tcg_vm = tcg_constant_i64(0);
8717         } else {
8718             tcg_vm = read_fp_dreg(s, rm);
8719         }
8720         if (signal_all_nans) {
8721             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8722         } else {
8723             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8724         }
8725     } else {
8726         TCGv_i32 tcg_vn = tcg_temp_new_i32();
8727         TCGv_i32 tcg_vm = tcg_temp_new_i32();
8728 
8729         read_vec_element_i32(s, tcg_vn, rn, 0, size);
8730         if (cmp_with_zero) {
8731             tcg_gen_movi_i32(tcg_vm, 0);
8732         } else {
8733             read_vec_element_i32(s, tcg_vm, rm, 0, size);
8734         }
8735 
8736         switch (size) {
8737         case MO_32:
8738             if (signal_all_nans) {
8739                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8740             } else {
8741                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8742             }
8743             break;
8744         case MO_16:
8745             if (signal_all_nans) {
8746                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8747             } else {
8748                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8749             }
8750             break;
8751         default:
8752             g_assert_not_reached();
8753         }
8754     }
8755 
8756     gen_set_nzcv(tcg_flags);
8757 }
8758 
8759 /* Floating point compare
8760  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
8761  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
8762  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
8763  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
8764  */
8765 static void disas_fp_compare(DisasContext *s, uint32_t insn)
8766 {
8767     unsigned int mos, type, rm, op, rn, opc, op2r;
8768     int size;
8769 
8770     mos = extract32(insn, 29, 3);
8771     type = extract32(insn, 22, 2);
8772     rm = extract32(insn, 16, 5);
8773     op = extract32(insn, 14, 2);
8774     rn = extract32(insn, 5, 5);
8775     opc = extract32(insn, 3, 2);
8776     op2r = extract32(insn, 0, 3);
8777 
8778     if (mos || op || op2r) {
8779         unallocated_encoding(s);
8780         return;
8781     }
8782 
8783     switch (type) {
8784     case 0:
8785         size = MO_32;
8786         break;
8787     case 1:
8788         size = MO_64;
8789         break;
8790     case 3:
8791         size = MO_16;
8792         if (dc_isar_feature(aa64_fp16, s)) {
8793             break;
8794         }
8795         /* fallthru */
8796     default:
8797         unallocated_encoding(s);
8798         return;
8799     }
8800 
8801     if (!fp_access_check(s)) {
8802         return;
8803     }
8804 
8805     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
8806 }
8807 
8808 /* Floating point conditional compare
8809  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
8810  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
8811  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
8812  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
8813  */
8814 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
8815 {
8816     unsigned int mos, type, rm, cond, rn, op, nzcv;
8817     TCGLabel *label_continue = NULL;
8818     int size;
8819 
8820     mos = extract32(insn, 29, 3);
8821     type = extract32(insn, 22, 2);
8822     rm = extract32(insn, 16, 5);
8823     cond = extract32(insn, 12, 4);
8824     rn = extract32(insn, 5, 5);
8825     op = extract32(insn, 4, 1);
8826     nzcv = extract32(insn, 0, 4);
8827 
8828     if (mos) {
8829         unallocated_encoding(s);
8830         return;
8831     }
8832 
8833     switch (type) {
8834     case 0:
8835         size = MO_32;
8836         break;
8837     case 1:
8838         size = MO_64;
8839         break;
8840     case 3:
8841         size = MO_16;
8842         if (dc_isar_feature(aa64_fp16, s)) {
8843             break;
8844         }
8845         /* fallthru */
8846     default:
8847         unallocated_encoding(s);
8848         return;
8849     }
8850 
8851     if (!fp_access_check(s)) {
8852         return;
8853     }
8854 
8855     if (cond < 0x0e) { /* not always */
8856         TCGLabel *label_match = gen_new_label();
8857         label_continue = gen_new_label();
8858         arm_gen_test_cc(cond, label_match);
8859         /* nomatch: */
8860         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
8861         tcg_gen_br(label_continue);
8862         gen_set_label(label_match);
8863     }
8864 
8865     handle_fp_compare(s, size, rn, rm, false, op);
8866 
8867     if (cond < 0x0e) {
8868         gen_set_label(label_continue);
8869     }
8870 }
8871 
8872 /* Floating-point data-processing (1 source) - half precision */
8873 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
8874 {
8875     TCGv_ptr fpst = NULL;
8876     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
8877     TCGv_i32 tcg_res = tcg_temp_new_i32();
8878 
8879     switch (opcode) {
8880     case 0x0: /* FMOV */
8881         tcg_gen_mov_i32(tcg_res, tcg_op);
8882         break;
8883     case 0x1: /* FABS */
8884         gen_vfp_absh(tcg_res, tcg_op);
8885         break;
8886     case 0x2: /* FNEG */
8887         gen_vfp_negh(tcg_res, tcg_op);
8888         break;
8889     case 0x3: /* FSQRT */
8890         fpst = fpstatus_ptr(FPST_FPCR_F16);
8891         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
8892         break;
8893     case 0x8: /* FRINTN */
8894     case 0x9: /* FRINTP */
8895     case 0xa: /* FRINTM */
8896     case 0xb: /* FRINTZ */
8897     case 0xc: /* FRINTA */
8898     {
8899         TCGv_i32 tcg_rmode;
8900 
8901         fpst = fpstatus_ptr(FPST_FPCR_F16);
8902         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
8903         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8904         gen_restore_rmode(tcg_rmode, fpst);
8905         break;
8906     }
8907     case 0xe: /* FRINTX */
8908         fpst = fpstatus_ptr(FPST_FPCR_F16);
8909         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
8910         break;
8911     case 0xf: /* FRINTI */
8912         fpst = fpstatus_ptr(FPST_FPCR_F16);
8913         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8914         break;
8915     default:
8916         g_assert_not_reached();
8917     }
8918 
8919     write_fp_sreg(s, rd, tcg_res);
8920 }
8921 
8922 /* Floating-point data-processing (1 source) - single precision */
8923 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
8924 {
8925     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
8926     TCGv_i32 tcg_op, tcg_res;
8927     TCGv_ptr fpst;
8928     int rmode = -1;
8929 
8930     tcg_op = read_fp_sreg(s, rn);
8931     tcg_res = tcg_temp_new_i32();
8932 
8933     switch (opcode) {
8934     case 0x0: /* FMOV */
8935         tcg_gen_mov_i32(tcg_res, tcg_op);
8936         goto done;
8937     case 0x1: /* FABS */
8938         gen_vfp_abss(tcg_res, tcg_op);
8939         goto done;
8940     case 0x2: /* FNEG */
8941         gen_vfp_negs(tcg_res, tcg_op);
8942         goto done;
8943     case 0x3: /* FSQRT */
8944         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
8945         goto done;
8946     case 0x6: /* BFCVT */
8947         gen_fpst = gen_helper_bfcvt;
8948         break;
8949     case 0x8: /* FRINTN */
8950     case 0x9: /* FRINTP */
8951     case 0xa: /* FRINTM */
8952     case 0xb: /* FRINTZ */
8953     case 0xc: /* FRINTA */
8954         rmode = opcode & 7;
8955         gen_fpst = gen_helper_rints;
8956         break;
8957     case 0xe: /* FRINTX */
8958         gen_fpst = gen_helper_rints_exact;
8959         break;
8960     case 0xf: /* FRINTI */
8961         gen_fpst = gen_helper_rints;
8962         break;
8963     case 0x10: /* FRINT32Z */
8964         rmode = FPROUNDING_ZERO;
8965         gen_fpst = gen_helper_frint32_s;
8966         break;
8967     case 0x11: /* FRINT32X */
8968         gen_fpst = gen_helper_frint32_s;
8969         break;
8970     case 0x12: /* FRINT64Z */
8971         rmode = FPROUNDING_ZERO;
8972         gen_fpst = gen_helper_frint64_s;
8973         break;
8974     case 0x13: /* FRINT64X */
8975         gen_fpst = gen_helper_frint64_s;
8976         break;
8977     default:
8978         g_assert_not_reached();
8979     }
8980 
8981     fpst = fpstatus_ptr(FPST_FPCR);
8982     if (rmode >= 0) {
8983         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
8984         gen_fpst(tcg_res, tcg_op, fpst);
8985         gen_restore_rmode(tcg_rmode, fpst);
8986     } else {
8987         gen_fpst(tcg_res, tcg_op, fpst);
8988     }
8989 
8990  done:
8991     write_fp_sreg(s, rd, tcg_res);
8992 }
8993 
8994 /* Floating-point data-processing (1 source) - double precision */
8995 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
8996 {
8997     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
8998     TCGv_i64 tcg_op, tcg_res;
8999     TCGv_ptr fpst;
9000     int rmode = -1;
9001 
9002     switch (opcode) {
9003     case 0x0: /* FMOV */
9004         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
9005         return;
9006     }
9007 
9008     tcg_op = read_fp_dreg(s, rn);
9009     tcg_res = tcg_temp_new_i64();
9010 
9011     switch (opcode) {
9012     case 0x1: /* FABS */
9013         gen_vfp_absd(tcg_res, tcg_op);
9014         goto done;
9015     case 0x2: /* FNEG */
9016         gen_vfp_negd(tcg_res, tcg_op);
9017         goto done;
9018     case 0x3: /* FSQRT */
9019         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
9020         goto done;
9021     case 0x8: /* FRINTN */
9022     case 0x9: /* FRINTP */
9023     case 0xa: /* FRINTM */
9024     case 0xb: /* FRINTZ */
9025     case 0xc: /* FRINTA */
9026         rmode = opcode & 7;
9027         gen_fpst = gen_helper_rintd;
9028         break;
9029     case 0xe: /* FRINTX */
9030         gen_fpst = gen_helper_rintd_exact;
9031         break;
9032     case 0xf: /* FRINTI */
9033         gen_fpst = gen_helper_rintd;
9034         break;
9035     case 0x10: /* FRINT32Z */
9036         rmode = FPROUNDING_ZERO;
9037         gen_fpst = gen_helper_frint32_d;
9038         break;
9039     case 0x11: /* FRINT32X */
9040         gen_fpst = gen_helper_frint32_d;
9041         break;
9042     case 0x12: /* FRINT64Z */
9043         rmode = FPROUNDING_ZERO;
9044         gen_fpst = gen_helper_frint64_d;
9045         break;
9046     case 0x13: /* FRINT64X */
9047         gen_fpst = gen_helper_frint64_d;
9048         break;
9049     default:
9050         g_assert_not_reached();
9051     }
9052 
9053     fpst = fpstatus_ptr(FPST_FPCR);
9054     if (rmode >= 0) {
9055         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
9056         gen_fpst(tcg_res, tcg_op, fpst);
9057         gen_restore_rmode(tcg_rmode, fpst);
9058     } else {
9059         gen_fpst(tcg_res, tcg_op, fpst);
9060     }
9061 
9062  done:
9063     write_fp_dreg(s, rd, tcg_res);
9064 }
9065 
9066 static void handle_fp_fcvt(DisasContext *s, int opcode,
9067                            int rd, int rn, int dtype, int ntype)
9068 {
9069     switch (ntype) {
9070     case 0x0:
9071     {
9072         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
9073         if (dtype == 1) {
9074             /* Single to double */
9075             TCGv_i64 tcg_rd = tcg_temp_new_i64();
9076             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
9077             write_fp_dreg(s, rd, tcg_rd);
9078         } else {
9079             /* Single to half */
9080             TCGv_i32 tcg_rd = tcg_temp_new_i32();
9081             TCGv_i32 ahp = get_ahp_flag();
9082             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9083 
9084             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
9085             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
9086             write_fp_sreg(s, rd, tcg_rd);
9087         }
9088         break;
9089     }
9090     case 0x1:
9091     {
9092         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9093         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9094         if (dtype == 0) {
9095             /* Double to single */
9096             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
9097         } else {
9098             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9099             TCGv_i32 ahp = get_ahp_flag();
9100             /* Double to half */
9101             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
9102             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
9103         }
9104         write_fp_sreg(s, rd, tcg_rd);
9105         break;
9106     }
9107     case 0x3:
9108     {
9109         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
9110         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
9111         TCGv_i32 tcg_ahp = get_ahp_flag();
9112         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
9113         if (dtype == 0) {
9114             /* Half to single */
9115             TCGv_i32 tcg_rd = tcg_temp_new_i32();
9116             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9117             write_fp_sreg(s, rd, tcg_rd);
9118         } else {
9119             /* Half to double */
9120             TCGv_i64 tcg_rd = tcg_temp_new_i64();
9121             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9122             write_fp_dreg(s, rd, tcg_rd);
9123         }
9124         break;
9125     }
9126     default:
9127         g_assert_not_reached();
9128     }
9129 }
9130 
9131 /* Floating point data-processing (1 source)
9132  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
9133  * +---+---+---+-----------+------+---+--------+-----------+------+------+
9134  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
9135  * +---+---+---+-----------+------+---+--------+-----------+------+------+
9136  */
9137 static void disas_fp_1src(DisasContext *s, uint32_t insn)
9138 {
9139     int mos = extract32(insn, 29, 3);
9140     int type = extract32(insn, 22, 2);
9141     int opcode = extract32(insn, 15, 6);
9142     int rn = extract32(insn, 5, 5);
9143     int rd = extract32(insn, 0, 5);
9144 
9145     if (mos) {
9146         goto do_unallocated;
9147     }
9148 
9149     switch (opcode) {
9150     case 0x4: case 0x5: case 0x7:
9151     {
9152         /* FCVT between half, single and double precision */
9153         int dtype = extract32(opcode, 0, 2);
9154         if (type == 2 || dtype == type) {
9155             goto do_unallocated;
9156         }
9157         if (!fp_access_check(s)) {
9158             return;
9159         }
9160 
9161         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
9162         break;
9163     }
9164 
9165     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
9166         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
9167             goto do_unallocated;
9168         }
9169         /* fall through */
9170     case 0x0 ... 0x3:
9171     case 0x8 ... 0xc:
9172     case 0xe ... 0xf:
9173         /* 32-to-32 and 64-to-64 ops */
9174         switch (type) {
9175         case 0:
9176             if (!fp_access_check(s)) {
9177                 return;
9178             }
9179             handle_fp_1src_single(s, opcode, rd, rn);
9180             break;
9181         case 1:
9182             if (!fp_access_check(s)) {
9183                 return;
9184             }
9185             handle_fp_1src_double(s, opcode, rd, rn);
9186             break;
9187         case 3:
9188             if (!dc_isar_feature(aa64_fp16, s)) {
9189                 goto do_unallocated;
9190             }
9191 
9192             if (!fp_access_check(s)) {
9193                 return;
9194             }
9195             handle_fp_1src_half(s, opcode, rd, rn);
9196             break;
9197         default:
9198             goto do_unallocated;
9199         }
9200         break;
9201 
9202     case 0x6:
9203         switch (type) {
9204         case 1: /* BFCVT */
9205             if (!dc_isar_feature(aa64_bf16, s)) {
9206                 goto do_unallocated;
9207             }
9208             if (!fp_access_check(s)) {
9209                 return;
9210             }
9211             handle_fp_1src_single(s, opcode, rd, rn);
9212             break;
9213         default:
9214             goto do_unallocated;
9215         }
9216         break;
9217 
9218     default:
9219     do_unallocated:
9220         unallocated_encoding(s);
9221         break;
9222     }
9223 }
9224 
9225 /* Handle floating point <=> fixed point conversions. Note that we can
9226  * also deal with fp <=> integer conversions as a special case (scale == 64)
9227  * OPTME: consider handling that special case specially or at least skipping
9228  * the call to scalbn in the helpers for zero shifts.
9229  */
9230 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
9231                            bool itof, int rmode, int scale, int sf, int type)
9232 {
9233     bool is_signed = !(opcode & 1);
9234     TCGv_ptr tcg_fpstatus;
9235     TCGv_i32 tcg_shift, tcg_single;
9236     TCGv_i64 tcg_double;
9237 
9238     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
9239 
9240     tcg_shift = tcg_constant_i32(64 - scale);
9241 
9242     if (itof) {
9243         TCGv_i64 tcg_int = cpu_reg(s, rn);
9244         if (!sf) {
9245             TCGv_i64 tcg_extend = tcg_temp_new_i64();
9246 
9247             if (is_signed) {
9248                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
9249             } else {
9250                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
9251             }
9252 
9253             tcg_int = tcg_extend;
9254         }
9255 
9256         switch (type) {
9257         case 1: /* float64 */
9258             tcg_double = tcg_temp_new_i64();
9259             if (is_signed) {
9260                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
9261                                      tcg_shift, tcg_fpstatus);
9262             } else {
9263                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
9264                                      tcg_shift, tcg_fpstatus);
9265             }
9266             write_fp_dreg(s, rd, tcg_double);
9267             break;
9268 
9269         case 0: /* float32 */
9270             tcg_single = tcg_temp_new_i32();
9271             if (is_signed) {
9272                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
9273                                      tcg_shift, tcg_fpstatus);
9274             } else {
9275                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
9276                                      tcg_shift, tcg_fpstatus);
9277             }
9278             write_fp_sreg(s, rd, tcg_single);
9279             break;
9280 
9281         case 3: /* float16 */
9282             tcg_single = tcg_temp_new_i32();
9283             if (is_signed) {
9284                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
9285                                      tcg_shift, tcg_fpstatus);
9286             } else {
9287                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
9288                                      tcg_shift, tcg_fpstatus);
9289             }
9290             write_fp_sreg(s, rd, tcg_single);
9291             break;
9292 
9293         default:
9294             g_assert_not_reached();
9295         }
9296     } else {
9297         TCGv_i64 tcg_int = cpu_reg(s, rd);
9298         TCGv_i32 tcg_rmode;
9299 
9300         if (extract32(opcode, 2, 1)) {
9301             /* There are too many rounding modes to all fit into rmode,
9302              * so FCVTA[US] is a special case.
9303              */
9304             rmode = FPROUNDING_TIEAWAY;
9305         }
9306 
9307         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9308 
9309         switch (type) {
9310         case 1: /* float64 */
9311             tcg_double = read_fp_dreg(s, rn);
9312             if (is_signed) {
9313                 if (!sf) {
9314                     gen_helper_vfp_tosld(tcg_int, tcg_double,
9315                                          tcg_shift, tcg_fpstatus);
9316                 } else {
9317                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
9318                                          tcg_shift, tcg_fpstatus);
9319                 }
9320             } else {
9321                 if (!sf) {
9322                     gen_helper_vfp_tould(tcg_int, tcg_double,
9323                                          tcg_shift, tcg_fpstatus);
9324                 } else {
9325                     gen_helper_vfp_touqd(tcg_int, tcg_double,
9326                                          tcg_shift, tcg_fpstatus);
9327                 }
9328             }
9329             if (!sf) {
9330                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9331             }
9332             break;
9333 
9334         case 0: /* float32 */
9335             tcg_single = read_fp_sreg(s, rn);
9336             if (sf) {
9337                 if (is_signed) {
9338                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
9339                                          tcg_shift, tcg_fpstatus);
9340                 } else {
9341                     gen_helper_vfp_touqs(tcg_int, tcg_single,
9342                                          tcg_shift, tcg_fpstatus);
9343                 }
9344             } else {
9345                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
9346                 if (is_signed) {
9347                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
9348                                          tcg_shift, tcg_fpstatus);
9349                 } else {
9350                     gen_helper_vfp_touls(tcg_dest, tcg_single,
9351                                          tcg_shift, tcg_fpstatus);
9352                 }
9353                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
9354             }
9355             break;
9356 
9357         case 3: /* float16 */
9358             tcg_single = read_fp_sreg(s, rn);
9359             if (sf) {
9360                 if (is_signed) {
9361                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
9362                                          tcg_shift, tcg_fpstatus);
9363                 } else {
9364                     gen_helper_vfp_touqh(tcg_int, tcg_single,
9365                                          tcg_shift, tcg_fpstatus);
9366                 }
9367             } else {
9368                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
9369                 if (is_signed) {
9370                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
9371                                          tcg_shift, tcg_fpstatus);
9372                 } else {
9373                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
9374                                          tcg_shift, tcg_fpstatus);
9375                 }
9376                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
9377             }
9378             break;
9379 
9380         default:
9381             g_assert_not_reached();
9382         }
9383 
9384         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9385     }
9386 }
9387 
9388 /* Floating point <-> fixed point conversions
9389  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
9390  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
9391  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
9392  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
9393  */
9394 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
9395 {
9396     int rd = extract32(insn, 0, 5);
9397     int rn = extract32(insn, 5, 5);
9398     int scale = extract32(insn, 10, 6);
9399     int opcode = extract32(insn, 16, 3);
9400     int rmode = extract32(insn, 19, 2);
9401     int type = extract32(insn, 22, 2);
9402     bool sbit = extract32(insn, 29, 1);
9403     bool sf = extract32(insn, 31, 1);
9404     bool itof;
9405 
9406     if (sbit || (!sf && scale < 32)) {
9407         unallocated_encoding(s);
9408         return;
9409     }
9410 
9411     switch (type) {
9412     case 0: /* float32 */
9413     case 1: /* float64 */
9414         break;
9415     case 3: /* float16 */
9416         if (dc_isar_feature(aa64_fp16, s)) {
9417             break;
9418         }
9419         /* fallthru */
9420     default:
9421         unallocated_encoding(s);
9422         return;
9423     }
9424 
9425     switch ((rmode << 3) | opcode) {
9426     case 0x2: /* SCVTF */
9427     case 0x3: /* UCVTF */
9428         itof = true;
9429         break;
9430     case 0x18: /* FCVTZS */
9431     case 0x19: /* FCVTZU */
9432         itof = false;
9433         break;
9434     default:
9435         unallocated_encoding(s);
9436         return;
9437     }
9438 
9439     if (!fp_access_check(s)) {
9440         return;
9441     }
9442 
9443     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
9444 }
9445 
9446 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
9447 {
9448     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
9449      * without conversion.
9450      */
9451 
9452     if (itof) {
9453         TCGv_i64 tcg_rn = cpu_reg(s, rn);
9454         TCGv_i64 tmp;
9455 
9456         switch (type) {
9457         case 0:
9458             /* 32 bit */
9459             tmp = tcg_temp_new_i64();
9460             tcg_gen_ext32u_i64(tmp, tcg_rn);
9461             write_fp_dreg(s, rd, tmp);
9462             break;
9463         case 1:
9464             /* 64 bit */
9465             write_fp_dreg(s, rd, tcg_rn);
9466             break;
9467         case 2:
9468             /* 64 bit to top half. */
9469             tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
9470             clear_vec_high(s, true, rd);
9471             break;
9472         case 3:
9473             /* 16 bit */
9474             tmp = tcg_temp_new_i64();
9475             tcg_gen_ext16u_i64(tmp, tcg_rn);
9476             write_fp_dreg(s, rd, tmp);
9477             break;
9478         default:
9479             g_assert_not_reached();
9480         }
9481     } else {
9482         TCGv_i64 tcg_rd = cpu_reg(s, rd);
9483 
9484         switch (type) {
9485         case 0:
9486             /* 32 bit */
9487             tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
9488             break;
9489         case 1:
9490             /* 64 bit */
9491             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
9492             break;
9493         case 2:
9494             /* 64 bits from top half */
9495             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
9496             break;
9497         case 3:
9498             /* 16 bit */
9499             tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
9500             break;
9501         default:
9502             g_assert_not_reached();
9503         }
9504     }
9505 }
9506 
9507 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
9508 {
9509     TCGv_i64 t = read_fp_dreg(s, rn);
9510     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
9511 
9512     gen_helper_fjcvtzs(t, t, fpstatus);
9513 
9514     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
9515     tcg_gen_extrh_i64_i32(cpu_ZF, t);
9516     tcg_gen_movi_i32(cpu_CF, 0);
9517     tcg_gen_movi_i32(cpu_NF, 0);
9518     tcg_gen_movi_i32(cpu_VF, 0);
9519 }
9520 
9521 /* Floating point <-> integer conversions
9522  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
9523  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
9524  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
9525  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
9526  */
9527 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
9528 {
9529     int rd = extract32(insn, 0, 5);
9530     int rn = extract32(insn, 5, 5);
9531     int opcode = extract32(insn, 16, 3);
9532     int rmode = extract32(insn, 19, 2);
9533     int type = extract32(insn, 22, 2);
9534     bool sbit = extract32(insn, 29, 1);
9535     bool sf = extract32(insn, 31, 1);
9536     bool itof = false;
9537 
9538     if (sbit) {
9539         goto do_unallocated;
9540     }
9541 
9542     switch (opcode) {
9543     case 2: /* SCVTF */
9544     case 3: /* UCVTF */
9545         itof = true;
9546         /* fallthru */
9547     case 4: /* FCVTAS */
9548     case 5: /* FCVTAU */
9549         if (rmode != 0) {
9550             goto do_unallocated;
9551         }
9552         /* fallthru */
9553     case 0: /* FCVT[NPMZ]S */
9554     case 1: /* FCVT[NPMZ]U */
9555         switch (type) {
9556         case 0: /* float32 */
9557         case 1: /* float64 */
9558             break;
9559         case 3: /* float16 */
9560             if (!dc_isar_feature(aa64_fp16, s)) {
9561                 goto do_unallocated;
9562             }
9563             break;
9564         default:
9565             goto do_unallocated;
9566         }
9567         if (!fp_access_check(s)) {
9568             return;
9569         }
9570         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
9571         break;
9572 
9573     default:
9574         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
9575         case 0b01100110: /* FMOV half <-> 32-bit int */
9576         case 0b01100111:
9577         case 0b11100110: /* FMOV half <-> 64-bit int */
9578         case 0b11100111:
9579             if (!dc_isar_feature(aa64_fp16, s)) {
9580                 goto do_unallocated;
9581             }
9582             /* fallthru */
9583         case 0b00000110: /* FMOV 32-bit */
9584         case 0b00000111:
9585         case 0b10100110: /* FMOV 64-bit */
9586         case 0b10100111:
9587         case 0b11001110: /* FMOV top half of 128-bit */
9588         case 0b11001111:
9589             if (!fp_access_check(s)) {
9590                 return;
9591             }
9592             itof = opcode & 1;
9593             handle_fmov(s, rd, rn, type, itof);
9594             break;
9595 
9596         case 0b00111110: /* FJCVTZS */
9597             if (!dc_isar_feature(aa64_jscvt, s)) {
9598                 goto do_unallocated;
9599             } else if (fp_access_check(s)) {
9600                 handle_fjcvtzs(s, rd, rn);
9601             }
9602             break;
9603 
9604         default:
9605         do_unallocated:
9606             unallocated_encoding(s);
9607             return;
9608         }
9609         break;
9610     }
9611 }
9612 
9613 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
9614  *   31  30  29 28     25 24                          0
9615  * +---+---+---+---------+-----------------------------+
9616  * |   | 0 |   | 1 1 1 1 |                             |
9617  * +---+---+---+---------+-----------------------------+
9618  */
9619 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
9620 {
9621     if (extract32(insn, 24, 1)) {
9622         unallocated_encoding(s); /* in decodetree */
9623     } else if (extract32(insn, 21, 1) == 0) {
9624         /* Floating point to fixed point conversions */
9625         disas_fp_fixed_conv(s, insn);
9626     } else {
9627         switch (extract32(insn, 10, 2)) {
9628         case 1:
9629             /* Floating point conditional compare */
9630             disas_fp_ccomp(s, insn);
9631             break;
9632         case 2:
9633             /* Floating point data-processing (2 source) */
9634             unallocated_encoding(s); /* in decodetree */
9635             break;
9636         case 3:
9637             /* Floating point conditional select */
9638             unallocated_encoding(s); /* in decodetree */
9639             break;
9640         case 0:
9641             switch (ctz32(extract32(insn, 12, 4))) {
9642             case 0: /* [15:12] == xxx1 */
9643                 /* Floating point immediate */
9644                 unallocated_encoding(s); /* in decodetree */
9645                 break;
9646             case 1: /* [15:12] == xx10 */
9647                 /* Floating point compare */
9648                 disas_fp_compare(s, insn);
9649                 break;
9650             case 2: /* [15:12] == x100 */
9651                 /* Floating point data-processing (1 source) */
9652                 disas_fp_1src(s, insn);
9653                 break;
9654             case 3: /* [15:12] == 1000 */
9655                 unallocated_encoding(s);
9656                 break;
9657             default: /* [15:12] == 0000 */
9658                 /* Floating point <-> integer conversions */
9659                 disas_fp_int_conv(s, insn);
9660                 break;
9661             }
9662             break;
9663         }
9664     }
9665 }
9666 
9667 /* Common vector code for handling integer to FP conversion */
9668 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
9669                                    int elements, int is_signed,
9670                                    int fracbits, int size)
9671 {
9672     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9673     TCGv_i32 tcg_shift = NULL;
9674 
9675     MemOp mop = size | (is_signed ? MO_SIGN : 0);
9676     int pass;
9677 
9678     if (fracbits || size == MO_64) {
9679         tcg_shift = tcg_constant_i32(fracbits);
9680     }
9681 
9682     if (size == MO_64) {
9683         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
9684         TCGv_i64 tcg_double = tcg_temp_new_i64();
9685 
9686         for (pass = 0; pass < elements; pass++) {
9687             read_vec_element(s, tcg_int64, rn, pass, mop);
9688 
9689             if (is_signed) {
9690                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
9691                                      tcg_shift, tcg_fpst);
9692             } else {
9693                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
9694                                      tcg_shift, tcg_fpst);
9695             }
9696             if (elements == 1) {
9697                 write_fp_dreg(s, rd, tcg_double);
9698             } else {
9699                 write_vec_element(s, tcg_double, rd, pass, MO_64);
9700             }
9701         }
9702     } else {
9703         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
9704         TCGv_i32 tcg_float = tcg_temp_new_i32();
9705 
9706         for (pass = 0; pass < elements; pass++) {
9707             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
9708 
9709             switch (size) {
9710             case MO_32:
9711                 if (fracbits) {
9712                     if (is_signed) {
9713                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
9714                                              tcg_shift, tcg_fpst);
9715                     } else {
9716                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
9717                                              tcg_shift, tcg_fpst);
9718                     }
9719                 } else {
9720                     if (is_signed) {
9721                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
9722                     } else {
9723                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
9724                     }
9725                 }
9726                 break;
9727             case MO_16:
9728                 if (fracbits) {
9729                     if (is_signed) {
9730                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
9731                                              tcg_shift, tcg_fpst);
9732                     } else {
9733                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
9734                                              tcg_shift, tcg_fpst);
9735                     }
9736                 } else {
9737                     if (is_signed) {
9738                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
9739                     } else {
9740                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
9741                     }
9742                 }
9743                 break;
9744             default:
9745                 g_assert_not_reached();
9746             }
9747 
9748             if (elements == 1) {
9749                 write_fp_sreg(s, rd, tcg_float);
9750             } else {
9751                 write_vec_element_i32(s, tcg_float, rd, pass, size);
9752             }
9753         }
9754     }
9755 
9756     clear_vec_high(s, elements << size == 16, rd);
9757 }
9758 
9759 /* UCVTF/SCVTF - Integer to FP conversion */
9760 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
9761                                          bool is_q, bool is_u,
9762                                          int immh, int immb, int opcode,
9763                                          int rn, int rd)
9764 {
9765     int size, elements, fracbits;
9766     int immhb = immh << 3 | immb;
9767 
9768     if (immh & 8) {
9769         size = MO_64;
9770         if (!is_scalar && !is_q) {
9771             unallocated_encoding(s);
9772             return;
9773         }
9774     } else if (immh & 4) {
9775         size = MO_32;
9776     } else if (immh & 2) {
9777         size = MO_16;
9778         if (!dc_isar_feature(aa64_fp16, s)) {
9779             unallocated_encoding(s);
9780             return;
9781         }
9782     } else {
9783         /* immh == 0 would be a failure of the decode logic */
9784         g_assert(immh == 1);
9785         unallocated_encoding(s);
9786         return;
9787     }
9788 
9789     if (is_scalar) {
9790         elements = 1;
9791     } else {
9792         elements = (8 << is_q) >> size;
9793     }
9794     fracbits = (16 << size) - immhb;
9795 
9796     if (!fp_access_check(s)) {
9797         return;
9798     }
9799 
9800     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
9801 }
9802 
9803 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
9804 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
9805                                          bool is_q, bool is_u,
9806                                          int immh, int immb, int rn, int rd)
9807 {
9808     int immhb = immh << 3 | immb;
9809     int pass, size, fracbits;
9810     TCGv_ptr tcg_fpstatus;
9811     TCGv_i32 tcg_rmode, tcg_shift;
9812 
9813     if (immh & 0x8) {
9814         size = MO_64;
9815         if (!is_scalar && !is_q) {
9816             unallocated_encoding(s);
9817             return;
9818         }
9819     } else if (immh & 0x4) {
9820         size = MO_32;
9821     } else if (immh & 0x2) {
9822         size = MO_16;
9823         if (!dc_isar_feature(aa64_fp16, s)) {
9824             unallocated_encoding(s);
9825             return;
9826         }
9827     } else {
9828         /* Should have split out AdvSIMD modified immediate earlier.  */
9829         assert(immh == 1);
9830         unallocated_encoding(s);
9831         return;
9832     }
9833 
9834     if (!fp_access_check(s)) {
9835         return;
9836     }
9837 
9838     assert(!(is_scalar && is_q));
9839 
9840     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9841     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
9842     fracbits = (16 << size) - immhb;
9843     tcg_shift = tcg_constant_i32(fracbits);
9844 
9845     if (size == MO_64) {
9846         int maxpass = is_scalar ? 1 : 2;
9847 
9848         for (pass = 0; pass < maxpass; pass++) {
9849             TCGv_i64 tcg_op = tcg_temp_new_i64();
9850 
9851             read_vec_element(s, tcg_op, rn, pass, MO_64);
9852             if (is_u) {
9853                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9854             } else {
9855                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9856             }
9857             write_vec_element(s, tcg_op, rd, pass, MO_64);
9858         }
9859         clear_vec_high(s, is_q, rd);
9860     } else {
9861         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
9862         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
9863 
9864         switch (size) {
9865         case MO_16:
9866             if (is_u) {
9867                 fn = gen_helper_vfp_touhh;
9868             } else {
9869                 fn = gen_helper_vfp_toshh;
9870             }
9871             break;
9872         case MO_32:
9873             if (is_u) {
9874                 fn = gen_helper_vfp_touls;
9875             } else {
9876                 fn = gen_helper_vfp_tosls;
9877             }
9878             break;
9879         default:
9880             g_assert_not_reached();
9881         }
9882 
9883         for (pass = 0; pass < maxpass; pass++) {
9884             TCGv_i32 tcg_op = tcg_temp_new_i32();
9885 
9886             read_vec_element_i32(s, tcg_op, rn, pass, size);
9887             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9888             if (is_scalar) {
9889                 if (size == MO_16 && !is_u) {
9890                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
9891                 }
9892                 write_fp_sreg(s, rd, tcg_op);
9893             } else {
9894                 write_vec_element_i32(s, tcg_op, rd, pass, size);
9895             }
9896         }
9897         if (!is_scalar) {
9898             clear_vec_high(s, is_q, rd);
9899         }
9900     }
9901 
9902     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9903 }
9904 
9905 /* AdvSIMD scalar shift by immediate
9906  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
9907  * +-----+---+-------------+------+------+--------+---+------+------+
9908  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9909  * +-----+---+-------------+------+------+--------+---+------+------+
9910  *
9911  * This is the scalar version so it works on a fixed sized registers
9912  */
9913 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
9914 {
9915     int rd = extract32(insn, 0, 5);
9916     int rn = extract32(insn, 5, 5);
9917     int opcode = extract32(insn, 11, 5);
9918     int immb = extract32(insn, 16, 3);
9919     int immh = extract32(insn, 19, 4);
9920     bool is_u = extract32(insn, 29, 1);
9921 
9922     if (immh == 0) {
9923         unallocated_encoding(s);
9924         return;
9925     }
9926 
9927     switch (opcode) {
9928     case 0x1c: /* SCVTF, UCVTF */
9929         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
9930                                      opcode, rn, rd);
9931         break;
9932     case 0x1f: /* FCVTZS, FCVTZU */
9933         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
9934         break;
9935     default:
9936     case 0x00: /* SSHR / USHR */
9937     case 0x02: /* SSRA / USRA */
9938     case 0x04: /* SRSHR / URSHR */
9939     case 0x06: /* SRSRA / URSRA */
9940     case 0x08: /* SRI */
9941     case 0x0a: /* SHL / SLI */
9942     case 0x0c: /* SQSHLU */
9943     case 0x0e: /* SQSHL, UQSHL */
9944     case 0x10: /* SQSHRUN */
9945     case 0x11: /* SQRSHRUN */
9946     case 0x12: /* SQSHRN, UQSHRN */
9947     case 0x13: /* SQRSHRN, UQRSHRN */
9948         unallocated_encoding(s);
9949         break;
9950     }
9951 }
9952 
9953 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9954                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9955                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9956 {
9957     /* Handle 64->64 opcodes which are shared between the scalar and
9958      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9959      * is valid in either group and also the double-precision fp ops.
9960      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9961      * requires them.
9962      */
9963     TCGCond cond;
9964 
9965     switch (opcode) {
9966     case 0x4: /* CLS, CLZ */
9967         if (u) {
9968             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9969         } else {
9970             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9971         }
9972         break;
9973     case 0x5: /* NOT */
9974         /* This opcode is shared with CNT and RBIT but we have earlier
9975          * enforced that size == 3 if and only if this is the NOT insn.
9976          */
9977         tcg_gen_not_i64(tcg_rd, tcg_rn);
9978         break;
9979     case 0x7: /* SQABS, SQNEG */
9980         if (u) {
9981             gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
9982         } else {
9983             gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
9984         }
9985         break;
9986     case 0xa: /* CMLT */
9987         cond = TCG_COND_LT;
9988     do_cmop:
9989         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9990         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
9991         break;
9992     case 0x8: /* CMGT, CMGE */
9993         cond = u ? TCG_COND_GE : TCG_COND_GT;
9994         goto do_cmop;
9995     case 0x9: /* CMEQ, CMLE */
9996         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9997         goto do_cmop;
9998     case 0xb: /* ABS, NEG */
9999         if (u) {
10000             tcg_gen_neg_i64(tcg_rd, tcg_rn);
10001         } else {
10002             tcg_gen_abs_i64(tcg_rd, tcg_rn);
10003         }
10004         break;
10005     case 0x2f: /* FABS */
10006         gen_vfp_absd(tcg_rd, tcg_rn);
10007         break;
10008     case 0x6f: /* FNEG */
10009         gen_vfp_negd(tcg_rd, tcg_rn);
10010         break;
10011     case 0x7f: /* FSQRT */
10012         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
10013         break;
10014     case 0x1a: /* FCVTNS */
10015     case 0x1b: /* FCVTMS */
10016     case 0x1c: /* FCVTAS */
10017     case 0x3a: /* FCVTPS */
10018     case 0x3b: /* FCVTZS */
10019         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10020         break;
10021     case 0x5a: /* FCVTNU */
10022     case 0x5b: /* FCVTMU */
10023     case 0x5c: /* FCVTAU */
10024     case 0x7a: /* FCVTPU */
10025     case 0x7b: /* FCVTZU */
10026         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10027         break;
10028     case 0x18: /* FRINTN */
10029     case 0x19: /* FRINTM */
10030     case 0x38: /* FRINTP */
10031     case 0x39: /* FRINTZ */
10032     case 0x58: /* FRINTA */
10033     case 0x79: /* FRINTI */
10034         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
10035         break;
10036     case 0x59: /* FRINTX */
10037         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
10038         break;
10039     case 0x1e: /* FRINT32Z */
10040     case 0x5e: /* FRINT32X */
10041         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
10042         break;
10043     case 0x1f: /* FRINT64Z */
10044     case 0x5f: /* FRINT64X */
10045         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
10046         break;
10047     default:
10048         g_assert_not_reached();
10049     }
10050 }
10051 
10052 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
10053                                    bool is_scalar, bool is_u, bool is_q,
10054                                    int size, int rn, int rd)
10055 {
10056     bool is_double = (size == MO_64);
10057     TCGv_ptr fpst;
10058 
10059     if (!fp_access_check(s)) {
10060         return;
10061     }
10062 
10063     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
10064 
10065     if (is_double) {
10066         TCGv_i64 tcg_op = tcg_temp_new_i64();
10067         TCGv_i64 tcg_zero = tcg_constant_i64(0);
10068         TCGv_i64 tcg_res = tcg_temp_new_i64();
10069         NeonGenTwoDoubleOpFn *genfn;
10070         bool swap = false;
10071         int pass;
10072 
10073         switch (opcode) {
10074         case 0x2e: /* FCMLT (zero) */
10075             swap = true;
10076             /* fallthrough */
10077         case 0x2c: /* FCMGT (zero) */
10078             genfn = gen_helper_neon_cgt_f64;
10079             break;
10080         case 0x2d: /* FCMEQ (zero) */
10081             genfn = gen_helper_neon_ceq_f64;
10082             break;
10083         case 0x6d: /* FCMLE (zero) */
10084             swap = true;
10085             /* fall through */
10086         case 0x6c: /* FCMGE (zero) */
10087             genfn = gen_helper_neon_cge_f64;
10088             break;
10089         default:
10090             g_assert_not_reached();
10091         }
10092 
10093         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10094             read_vec_element(s, tcg_op, rn, pass, MO_64);
10095             if (swap) {
10096                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10097             } else {
10098                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10099             }
10100             write_vec_element(s, tcg_res, rd, pass, MO_64);
10101         }
10102 
10103         clear_vec_high(s, !is_scalar, rd);
10104     } else {
10105         TCGv_i32 tcg_op = tcg_temp_new_i32();
10106         TCGv_i32 tcg_zero = tcg_constant_i32(0);
10107         TCGv_i32 tcg_res = tcg_temp_new_i32();
10108         NeonGenTwoSingleOpFn *genfn;
10109         bool swap = false;
10110         int pass, maxpasses;
10111 
10112         if (size == MO_16) {
10113             switch (opcode) {
10114             case 0x2e: /* FCMLT (zero) */
10115                 swap = true;
10116                 /* fall through */
10117             case 0x2c: /* FCMGT (zero) */
10118                 genfn = gen_helper_advsimd_cgt_f16;
10119                 break;
10120             case 0x2d: /* FCMEQ (zero) */
10121                 genfn = gen_helper_advsimd_ceq_f16;
10122                 break;
10123             case 0x6d: /* FCMLE (zero) */
10124                 swap = true;
10125                 /* fall through */
10126             case 0x6c: /* FCMGE (zero) */
10127                 genfn = gen_helper_advsimd_cge_f16;
10128                 break;
10129             default:
10130                 g_assert_not_reached();
10131             }
10132         } else {
10133             switch (opcode) {
10134             case 0x2e: /* FCMLT (zero) */
10135                 swap = true;
10136                 /* fall through */
10137             case 0x2c: /* FCMGT (zero) */
10138                 genfn = gen_helper_neon_cgt_f32;
10139                 break;
10140             case 0x2d: /* FCMEQ (zero) */
10141                 genfn = gen_helper_neon_ceq_f32;
10142                 break;
10143             case 0x6d: /* FCMLE (zero) */
10144                 swap = true;
10145                 /* fall through */
10146             case 0x6c: /* FCMGE (zero) */
10147                 genfn = gen_helper_neon_cge_f32;
10148                 break;
10149             default:
10150                 g_assert_not_reached();
10151             }
10152         }
10153 
10154         if (is_scalar) {
10155             maxpasses = 1;
10156         } else {
10157             int vector_size = 8 << is_q;
10158             maxpasses = vector_size >> size;
10159         }
10160 
10161         for (pass = 0; pass < maxpasses; pass++) {
10162             read_vec_element_i32(s, tcg_op, rn, pass, size);
10163             if (swap) {
10164                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10165             } else {
10166                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10167             }
10168             if (is_scalar) {
10169                 write_fp_sreg(s, rd, tcg_res);
10170             } else {
10171                 write_vec_element_i32(s, tcg_res, rd, pass, size);
10172             }
10173         }
10174 
10175         if (!is_scalar) {
10176             clear_vec_high(s, is_q, rd);
10177         }
10178     }
10179 }
10180 
10181 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
10182                                     bool is_scalar, bool is_u, bool is_q,
10183                                     int size, int rn, int rd)
10184 {
10185     bool is_double = (size == 3);
10186     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10187 
10188     if (is_double) {
10189         TCGv_i64 tcg_op = tcg_temp_new_i64();
10190         TCGv_i64 tcg_res = tcg_temp_new_i64();
10191         int pass;
10192 
10193         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10194             read_vec_element(s, tcg_op, rn, pass, MO_64);
10195             switch (opcode) {
10196             case 0x3d: /* FRECPE */
10197                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
10198                 break;
10199             case 0x3f: /* FRECPX */
10200                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
10201                 break;
10202             case 0x7d: /* FRSQRTE */
10203                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
10204                 break;
10205             default:
10206                 g_assert_not_reached();
10207             }
10208             write_vec_element(s, tcg_res, rd, pass, MO_64);
10209         }
10210         clear_vec_high(s, !is_scalar, rd);
10211     } else {
10212         TCGv_i32 tcg_op = tcg_temp_new_i32();
10213         TCGv_i32 tcg_res = tcg_temp_new_i32();
10214         int pass, maxpasses;
10215 
10216         if (is_scalar) {
10217             maxpasses = 1;
10218         } else {
10219             maxpasses = is_q ? 4 : 2;
10220         }
10221 
10222         for (pass = 0; pass < maxpasses; pass++) {
10223             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10224 
10225             switch (opcode) {
10226             case 0x3c: /* URECPE */
10227                 gen_helper_recpe_u32(tcg_res, tcg_op);
10228                 break;
10229             case 0x3d: /* FRECPE */
10230                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
10231                 break;
10232             case 0x3f: /* FRECPX */
10233                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
10234                 break;
10235             case 0x7d: /* FRSQRTE */
10236                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
10237                 break;
10238             default:
10239                 g_assert_not_reached();
10240             }
10241 
10242             if (is_scalar) {
10243                 write_fp_sreg(s, rd, tcg_res);
10244             } else {
10245                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10246             }
10247         }
10248         if (!is_scalar) {
10249             clear_vec_high(s, is_q, rd);
10250         }
10251     }
10252 }
10253 
10254 static void handle_2misc_narrow(DisasContext *s, bool scalar,
10255                                 int opcode, bool u, bool is_q,
10256                                 int size, int rn, int rd)
10257 {
10258     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
10259      * in the source becomes a size element in the destination).
10260      */
10261     int pass;
10262     TCGv_i64 tcg_res[2];
10263     int destelt = is_q ? 2 : 0;
10264     int passes = scalar ? 1 : 2;
10265 
10266     if (scalar) {
10267         tcg_res[1] = tcg_constant_i64(0);
10268     }
10269 
10270     for (pass = 0; pass < passes; pass++) {
10271         TCGv_i64 tcg_op = tcg_temp_new_i64();
10272         NeonGenOne64OpFn *genfn = NULL;
10273         NeonGenOne64OpEnvFn *genenvfn = NULL;
10274 
10275         if (scalar) {
10276             read_vec_element(s, tcg_op, rn, pass, size + 1);
10277         } else {
10278             read_vec_element(s, tcg_op, rn, pass, MO_64);
10279         }
10280         tcg_res[pass] = tcg_temp_new_i64();
10281 
10282         switch (opcode) {
10283         case 0x12: /* XTN, SQXTUN */
10284         {
10285             static NeonGenOne64OpFn * const xtnfns[3] = {
10286                 gen_helper_neon_narrow_u8,
10287                 gen_helper_neon_narrow_u16,
10288                 tcg_gen_ext32u_i64,
10289             };
10290             static NeonGenOne64OpEnvFn * const sqxtunfns[3] = {
10291                 gen_helper_neon_unarrow_sat8,
10292                 gen_helper_neon_unarrow_sat16,
10293                 gen_helper_neon_unarrow_sat32,
10294             };
10295             if (u) {
10296                 genenvfn = sqxtunfns[size];
10297             } else {
10298                 genfn = xtnfns[size];
10299             }
10300             break;
10301         }
10302         case 0x14: /* SQXTN, UQXTN */
10303         {
10304             static NeonGenOne64OpEnvFn * const fns[3][2] = {
10305                 { gen_helper_neon_narrow_sat_s8,
10306                   gen_helper_neon_narrow_sat_u8 },
10307                 { gen_helper_neon_narrow_sat_s16,
10308                   gen_helper_neon_narrow_sat_u16 },
10309                 { gen_helper_neon_narrow_sat_s32,
10310                   gen_helper_neon_narrow_sat_u32 },
10311             };
10312             genenvfn = fns[size][u];
10313             break;
10314         }
10315         case 0x16: /* FCVTN, FCVTN2 */
10316             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
10317             if (size == 2) {
10318                 TCGv_i32 tmp = tcg_temp_new_i32();
10319                 gen_helper_vfp_fcvtsd(tmp, tcg_op, tcg_env);
10320                 tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
10321             } else {
10322                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
10323                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
10324                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10325                 TCGv_i32 ahp = get_ahp_flag();
10326 
10327                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
10328                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10329                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10330                 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
10331                 tcg_gen_extu_i32_i64(tcg_res[pass], tcg_lo);
10332             }
10333             break;
10334         case 0x36: /* BFCVTN, BFCVTN2 */
10335             {
10336                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10337                 TCGv_i32 tmp = tcg_temp_new_i32();
10338                 gen_helper_bfcvt_pair(tmp, tcg_op, fpst);
10339                 tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
10340             }
10341             break;
10342         case 0x56:  /* FCVTXN, FCVTXN2 */
10343             {
10344                 /*
10345                  * 64 bit to 32 bit float conversion
10346                  * with von Neumann rounding (round to odd)
10347                  */
10348                 TCGv_i32 tmp = tcg_temp_new_i32();
10349                 assert(size == 2);
10350                 gen_helper_fcvtx_f64_to_f32(tmp, tcg_op, tcg_env);
10351                 tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
10352             }
10353             break;
10354         default:
10355             g_assert_not_reached();
10356         }
10357 
10358         if (genfn) {
10359             genfn(tcg_res[pass], tcg_op);
10360         } else if (genenvfn) {
10361             genenvfn(tcg_res[pass], tcg_env, tcg_op);
10362         }
10363     }
10364 
10365     for (pass = 0; pass < 2; pass++) {
10366         write_vec_element(s, tcg_res[pass], rd, destelt + pass, MO_32);
10367     }
10368     clear_vec_high(s, is_q, rd);
10369 }
10370 
10371 /* AdvSIMD scalar two reg misc
10372  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10373  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10374  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10375  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10376  */
10377 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10378 {
10379     int rd = extract32(insn, 0, 5);
10380     int rn = extract32(insn, 5, 5);
10381     int opcode = extract32(insn, 12, 5);
10382     int size = extract32(insn, 22, 2);
10383     bool u = extract32(insn, 29, 1);
10384     bool is_fcvt = false;
10385     int rmode;
10386     TCGv_i32 tcg_rmode;
10387     TCGv_ptr tcg_fpstatus;
10388 
10389     switch (opcode) {
10390     case 0x7: /* SQABS / SQNEG */
10391         break;
10392     case 0xa: /* CMLT */
10393         if (u) {
10394             unallocated_encoding(s);
10395             return;
10396         }
10397         /* fall through */
10398     case 0x8: /* CMGT, CMGE */
10399     case 0x9: /* CMEQ, CMLE */
10400     case 0xb: /* ABS, NEG */
10401         if (size != 3) {
10402             unallocated_encoding(s);
10403             return;
10404         }
10405         break;
10406     case 0x12: /* SQXTUN */
10407         if (!u) {
10408             unallocated_encoding(s);
10409             return;
10410         }
10411         /* fall through */
10412     case 0x14: /* SQXTN, UQXTN */
10413         if (size == 3) {
10414             unallocated_encoding(s);
10415             return;
10416         }
10417         if (!fp_access_check(s)) {
10418             return;
10419         }
10420         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10421         return;
10422     case 0xc ... 0xf:
10423     case 0x16 ... 0x1d:
10424     case 0x1f:
10425         /* Floating point: U, size[1] and opcode indicate operation;
10426          * size[0] indicates single or double precision.
10427          */
10428         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10429         size = extract32(size, 0, 1) ? 3 : 2;
10430         switch (opcode) {
10431         case 0x2c: /* FCMGT (zero) */
10432         case 0x2d: /* FCMEQ (zero) */
10433         case 0x2e: /* FCMLT (zero) */
10434         case 0x6c: /* FCMGE (zero) */
10435         case 0x6d: /* FCMLE (zero) */
10436             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10437             return;
10438         case 0x1d: /* SCVTF */
10439         case 0x5d: /* UCVTF */
10440         {
10441             bool is_signed = (opcode == 0x1d);
10442             if (!fp_access_check(s)) {
10443                 return;
10444             }
10445             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10446             return;
10447         }
10448         case 0x3d: /* FRECPE */
10449         case 0x3f: /* FRECPX */
10450         case 0x7d: /* FRSQRTE */
10451             if (!fp_access_check(s)) {
10452                 return;
10453             }
10454             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10455             return;
10456         case 0x1a: /* FCVTNS */
10457         case 0x1b: /* FCVTMS */
10458         case 0x3a: /* FCVTPS */
10459         case 0x3b: /* FCVTZS */
10460         case 0x5a: /* FCVTNU */
10461         case 0x5b: /* FCVTMU */
10462         case 0x7a: /* FCVTPU */
10463         case 0x7b: /* FCVTZU */
10464             is_fcvt = true;
10465             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10466             break;
10467         case 0x1c: /* FCVTAS */
10468         case 0x5c: /* FCVTAU */
10469             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10470             is_fcvt = true;
10471             rmode = FPROUNDING_TIEAWAY;
10472             break;
10473         case 0x56: /* FCVTXN, FCVTXN2 */
10474             if (size == 2) {
10475                 unallocated_encoding(s);
10476                 return;
10477             }
10478             if (!fp_access_check(s)) {
10479                 return;
10480             }
10481             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10482             return;
10483         default:
10484             unallocated_encoding(s);
10485             return;
10486         }
10487         break;
10488     default:
10489     case 0x3: /* USQADD / SUQADD */
10490         unallocated_encoding(s);
10491         return;
10492     }
10493 
10494     if (!fp_access_check(s)) {
10495         return;
10496     }
10497 
10498     if (is_fcvt) {
10499         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10500         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10501     } else {
10502         tcg_fpstatus = NULL;
10503         tcg_rmode = NULL;
10504     }
10505 
10506     if (size == 3) {
10507         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10508         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10509 
10510         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10511         write_fp_dreg(s, rd, tcg_rd);
10512     } else {
10513         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10514         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10515 
10516         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10517 
10518         switch (opcode) {
10519         case 0x7: /* SQABS, SQNEG */
10520         {
10521             NeonGenOneOpEnvFn *genfn;
10522             static NeonGenOneOpEnvFn * const fns[3][2] = {
10523                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10524                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10525                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10526             };
10527             genfn = fns[size][u];
10528             genfn(tcg_rd, tcg_env, tcg_rn);
10529             break;
10530         }
10531         case 0x1a: /* FCVTNS */
10532         case 0x1b: /* FCVTMS */
10533         case 0x1c: /* FCVTAS */
10534         case 0x3a: /* FCVTPS */
10535         case 0x3b: /* FCVTZS */
10536             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10537                                  tcg_fpstatus);
10538             break;
10539         case 0x5a: /* FCVTNU */
10540         case 0x5b: /* FCVTMU */
10541         case 0x5c: /* FCVTAU */
10542         case 0x7a: /* FCVTPU */
10543         case 0x7b: /* FCVTZU */
10544             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10545                                  tcg_fpstatus);
10546             break;
10547         default:
10548             g_assert_not_reached();
10549         }
10550 
10551         write_fp_sreg(s, rd, tcg_rd);
10552     }
10553 
10554     if (is_fcvt) {
10555         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10556     }
10557 }
10558 
10559 /* AdvSIMD shift by immediate
10560  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10561  * +---+---+---+-------------+------+------+--------+---+------+------+
10562  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10563  * +---+---+---+-------------+------+------+--------+---+------+------+
10564  */
10565 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10566 {
10567     int rd = extract32(insn, 0, 5);
10568     int rn = extract32(insn, 5, 5);
10569     int opcode = extract32(insn, 11, 5);
10570     int immb = extract32(insn, 16, 3);
10571     int immh = extract32(insn, 19, 4);
10572     bool is_u = extract32(insn, 29, 1);
10573     bool is_q = extract32(insn, 30, 1);
10574 
10575     if (immh == 0) {
10576         unallocated_encoding(s);
10577         return;
10578     }
10579 
10580     switch (opcode) {
10581     case 0x1c: /* SCVTF / UCVTF */
10582         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10583                                      opcode, rn, rd);
10584         break;
10585     case 0x1f: /* FCVTZS/ FCVTZU */
10586         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10587         return;
10588     default:
10589     case 0x00: /* SSHR / USHR */
10590     case 0x02: /* SSRA / USRA (accumulate) */
10591     case 0x04: /* SRSHR / URSHR (rounding) */
10592     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10593     case 0x08: /* SRI */
10594     case 0x0a: /* SHL / SLI */
10595     case 0x0c: /* SQSHLU */
10596     case 0x0e: /* SQSHL, UQSHL */
10597     case 0x10: /* SHRN / SQSHRUN */
10598     case 0x11: /* RSHRN / SQRSHRUN */
10599     case 0x12: /* SQSHRN / UQSHRN */
10600     case 0x13: /* SQRSHRN / UQRSHRN */
10601     case 0x14: /* SSHLL / USHLL */
10602         unallocated_encoding(s);
10603         return;
10604     }
10605 }
10606 
10607 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
10608                                   int size, int rn, int rd)
10609 {
10610     /* Handle 2-reg-misc ops which are widening (so each size element
10611      * in the source becomes a 2*size element in the destination.
10612      * The only instruction like this is FCVTL.
10613      */
10614     int pass;
10615 
10616     if (size == 3) {
10617         /* 32 -> 64 bit fp conversion */
10618         TCGv_i64 tcg_res[2];
10619         int srcelt = is_q ? 2 : 0;
10620 
10621         for (pass = 0; pass < 2; pass++) {
10622             TCGv_i32 tcg_op = tcg_temp_new_i32();
10623             tcg_res[pass] = tcg_temp_new_i64();
10624 
10625             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
10626             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
10627         }
10628         for (pass = 0; pass < 2; pass++) {
10629             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10630         }
10631     } else {
10632         /* 16 -> 32 bit fp conversion */
10633         int srcelt = is_q ? 4 : 0;
10634         TCGv_i32 tcg_res[4];
10635         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10636         TCGv_i32 ahp = get_ahp_flag();
10637 
10638         for (pass = 0; pass < 4; pass++) {
10639             tcg_res[pass] = tcg_temp_new_i32();
10640 
10641             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
10642             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10643                                            fpst, ahp);
10644         }
10645         for (pass = 0; pass < 4; pass++) {
10646             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10647         }
10648     }
10649 }
10650 
10651 static void handle_rev(DisasContext *s, int opcode, bool u,
10652                        bool is_q, int size, int rn, int rd)
10653 {
10654     int op = (opcode << 1) | u;
10655     int opsz = op + size;
10656     int grp_size = 3 - opsz;
10657     int dsize = is_q ? 128 : 64;
10658     int i;
10659 
10660     if (opsz >= 3) {
10661         unallocated_encoding(s);
10662         return;
10663     }
10664 
10665     if (!fp_access_check(s)) {
10666         return;
10667     }
10668 
10669     if (size == 0) {
10670         /* Special case bytes, use bswap op on each group of elements */
10671         int groups = dsize / (8 << grp_size);
10672 
10673         for (i = 0; i < groups; i++) {
10674             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
10675 
10676             read_vec_element(s, tcg_tmp, rn, i, grp_size);
10677             switch (grp_size) {
10678             case MO_16:
10679                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10680                 break;
10681             case MO_32:
10682                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10683                 break;
10684             case MO_64:
10685                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
10686                 break;
10687             default:
10688                 g_assert_not_reached();
10689             }
10690             write_vec_element(s, tcg_tmp, rd, i, grp_size);
10691         }
10692         clear_vec_high(s, is_q, rd);
10693     } else {
10694         int revmask = (1 << grp_size) - 1;
10695         int esize = 8 << size;
10696         int elements = dsize / esize;
10697         TCGv_i64 tcg_rn = tcg_temp_new_i64();
10698         TCGv_i64 tcg_rd[2];
10699 
10700         for (i = 0; i < 2; i++) {
10701             tcg_rd[i] = tcg_temp_new_i64();
10702             tcg_gen_movi_i64(tcg_rd[i], 0);
10703         }
10704 
10705         for (i = 0; i < elements; i++) {
10706             int e_rev = (i & 0xf) ^ revmask;
10707             int w = (e_rev * esize) / 64;
10708             int o = (e_rev * esize) % 64;
10709 
10710             read_vec_element(s, tcg_rn, rn, i, size);
10711             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
10712         }
10713 
10714         for (i = 0; i < 2; i++) {
10715             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
10716         }
10717         clear_vec_high(s, true, rd);
10718     }
10719 }
10720 
10721 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
10722                                   bool is_q, int size, int rn, int rd)
10723 {
10724     /* Implement the pairwise operations from 2-misc:
10725      * SADDLP, UADDLP, SADALP, UADALP.
10726      * These all add pairs of elements in the input to produce a
10727      * double-width result element in the output (possibly accumulating).
10728      */
10729     bool accum = (opcode == 0x6);
10730     int maxpass = is_q ? 2 : 1;
10731     int pass;
10732     TCGv_i64 tcg_res[2];
10733 
10734     if (size == 2) {
10735         /* 32 + 32 -> 64 op */
10736         MemOp memop = size + (u ? 0 : MO_SIGN);
10737 
10738         for (pass = 0; pass < maxpass; pass++) {
10739             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10740             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10741 
10742             tcg_res[pass] = tcg_temp_new_i64();
10743 
10744             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
10745             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
10746             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10747             if (accum) {
10748                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
10749                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
10750             }
10751         }
10752     } else {
10753         for (pass = 0; pass < maxpass; pass++) {
10754             TCGv_i64 tcg_op = tcg_temp_new_i64();
10755             NeonGenOne64OpFn *genfn;
10756             static NeonGenOne64OpFn * const fns[2][2] = {
10757                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
10758                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
10759             };
10760 
10761             genfn = fns[size][u];
10762 
10763             tcg_res[pass] = tcg_temp_new_i64();
10764 
10765             read_vec_element(s, tcg_op, rn, pass, MO_64);
10766             genfn(tcg_res[pass], tcg_op);
10767 
10768             if (accum) {
10769                 read_vec_element(s, tcg_op, rd, pass, MO_64);
10770                 if (size == 0) {
10771                     gen_helper_neon_addl_u16(tcg_res[pass],
10772                                              tcg_res[pass], tcg_op);
10773                 } else {
10774                     gen_helper_neon_addl_u32(tcg_res[pass],
10775                                              tcg_res[pass], tcg_op);
10776                 }
10777             }
10778         }
10779     }
10780     if (!is_q) {
10781         tcg_res[1] = tcg_constant_i64(0);
10782     }
10783     for (pass = 0; pass < 2; pass++) {
10784         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10785     }
10786 }
10787 
10788 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
10789 {
10790     /* Implement SHLL and SHLL2 */
10791     int pass;
10792     int part = is_q ? 2 : 0;
10793     TCGv_i64 tcg_res[2];
10794 
10795     for (pass = 0; pass < 2; pass++) {
10796         static NeonGenWidenFn * const widenfns[3] = {
10797             gen_helper_neon_widen_u8,
10798             gen_helper_neon_widen_u16,
10799             tcg_gen_extu_i32_i64,
10800         };
10801         NeonGenWidenFn *widenfn = widenfns[size];
10802         TCGv_i32 tcg_op = tcg_temp_new_i32();
10803 
10804         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
10805         tcg_res[pass] = tcg_temp_new_i64();
10806         widenfn(tcg_res[pass], tcg_op);
10807         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
10808     }
10809 
10810     for (pass = 0; pass < 2; pass++) {
10811         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10812     }
10813 }
10814 
10815 /* AdvSIMD two reg misc
10816  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10817  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
10818  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10819  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
10820  */
10821 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
10822 {
10823     int size = extract32(insn, 22, 2);
10824     int opcode = extract32(insn, 12, 5);
10825     bool u = extract32(insn, 29, 1);
10826     bool is_q = extract32(insn, 30, 1);
10827     int rn = extract32(insn, 5, 5);
10828     int rd = extract32(insn, 0, 5);
10829     bool need_fpstatus = false;
10830     int rmode = -1;
10831     TCGv_i32 tcg_rmode;
10832     TCGv_ptr tcg_fpstatus;
10833 
10834     switch (opcode) {
10835     case 0x0: /* REV64, REV32 */
10836     case 0x1: /* REV16 */
10837         handle_rev(s, opcode, u, is_q, size, rn, rd);
10838         return;
10839     case 0x5: /* CNT, NOT, RBIT */
10840         if (u && size == 0) {
10841             /* NOT */
10842             break;
10843         } else if (u && size == 1) {
10844             /* RBIT */
10845             break;
10846         } else if (!u && size == 0) {
10847             /* CNT */
10848             break;
10849         }
10850         unallocated_encoding(s);
10851         return;
10852     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
10853     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
10854         if (size == 3) {
10855             unallocated_encoding(s);
10856             return;
10857         }
10858         if (!fp_access_check(s)) {
10859             return;
10860         }
10861 
10862         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10863         return;
10864     case 0x4: /* CLS, CLZ */
10865         if (size == 3) {
10866             unallocated_encoding(s);
10867             return;
10868         }
10869         break;
10870     case 0x2: /* SADDLP, UADDLP */
10871     case 0x6: /* SADALP, UADALP */
10872         if (size == 3) {
10873             unallocated_encoding(s);
10874             return;
10875         }
10876         if (!fp_access_check(s)) {
10877             return;
10878         }
10879         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10880         return;
10881     case 0x13: /* SHLL, SHLL2 */
10882         if (u == 0 || size == 3) {
10883             unallocated_encoding(s);
10884             return;
10885         }
10886         if (!fp_access_check(s)) {
10887             return;
10888         }
10889         handle_shll(s, is_q, size, rn, rd);
10890         return;
10891     case 0xa: /* CMLT */
10892         if (u == 1) {
10893             unallocated_encoding(s);
10894             return;
10895         }
10896         /* fall through */
10897     case 0x8: /* CMGT, CMGE */
10898     case 0x9: /* CMEQ, CMLE */
10899     case 0xb: /* ABS, NEG */
10900         if (size == 3 && !is_q) {
10901             unallocated_encoding(s);
10902             return;
10903         }
10904         break;
10905     case 0x7: /* SQABS, SQNEG */
10906         if (size == 3 && !is_q) {
10907             unallocated_encoding(s);
10908             return;
10909         }
10910         break;
10911     case 0xc ... 0xf:
10912     case 0x16 ... 0x1f:
10913     {
10914         /* Floating point: U, size[1] and opcode indicate operation;
10915          * size[0] indicates single or double precision.
10916          */
10917         int is_double = extract32(size, 0, 1);
10918         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10919         size = is_double ? 3 : 2;
10920         switch (opcode) {
10921         case 0x2f: /* FABS */
10922         case 0x6f: /* FNEG */
10923             if (size == 3 && !is_q) {
10924                 unallocated_encoding(s);
10925                 return;
10926             }
10927             break;
10928         case 0x1d: /* SCVTF */
10929         case 0x5d: /* UCVTF */
10930         {
10931             bool is_signed = (opcode == 0x1d) ? true : false;
10932             int elements = is_double ? 2 : is_q ? 4 : 2;
10933             if (is_double && !is_q) {
10934                 unallocated_encoding(s);
10935                 return;
10936             }
10937             if (!fp_access_check(s)) {
10938                 return;
10939             }
10940             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10941             return;
10942         }
10943         case 0x2c: /* FCMGT (zero) */
10944         case 0x2d: /* FCMEQ (zero) */
10945         case 0x2e: /* FCMLT (zero) */
10946         case 0x6c: /* FCMGE (zero) */
10947         case 0x6d: /* FCMLE (zero) */
10948             if (size == 3 && !is_q) {
10949                 unallocated_encoding(s);
10950                 return;
10951             }
10952             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10953             return;
10954         case 0x7f: /* FSQRT */
10955             if (size == 3 && !is_q) {
10956                 unallocated_encoding(s);
10957                 return;
10958             }
10959             break;
10960         case 0x1a: /* FCVTNS */
10961         case 0x1b: /* FCVTMS */
10962         case 0x3a: /* FCVTPS */
10963         case 0x3b: /* FCVTZS */
10964         case 0x5a: /* FCVTNU */
10965         case 0x5b: /* FCVTMU */
10966         case 0x7a: /* FCVTPU */
10967         case 0x7b: /* FCVTZU */
10968             need_fpstatus = true;
10969             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10970             if (size == 3 && !is_q) {
10971                 unallocated_encoding(s);
10972                 return;
10973             }
10974             break;
10975         case 0x5c: /* FCVTAU */
10976         case 0x1c: /* FCVTAS */
10977             need_fpstatus = true;
10978             rmode = FPROUNDING_TIEAWAY;
10979             if (size == 3 && !is_q) {
10980                 unallocated_encoding(s);
10981                 return;
10982             }
10983             break;
10984         case 0x3c: /* URECPE */
10985             if (size == 3) {
10986                 unallocated_encoding(s);
10987                 return;
10988             }
10989             /* fall through */
10990         case 0x3d: /* FRECPE */
10991         case 0x7d: /* FRSQRTE */
10992             if (size == 3 && !is_q) {
10993                 unallocated_encoding(s);
10994                 return;
10995             }
10996             if (!fp_access_check(s)) {
10997                 return;
10998             }
10999             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11000             return;
11001         case 0x56: /* FCVTXN, FCVTXN2 */
11002             if (size == 2) {
11003                 unallocated_encoding(s);
11004                 return;
11005             }
11006             /* fall through */
11007         case 0x16: /* FCVTN, FCVTN2 */
11008             /* handle_2misc_narrow does a 2*size -> size operation, but these
11009              * instructions encode the source size rather than dest size.
11010              */
11011             if (!fp_access_check(s)) {
11012                 return;
11013             }
11014             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11015             return;
11016         case 0x36: /* BFCVTN, BFCVTN2 */
11017             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
11018                 unallocated_encoding(s);
11019                 return;
11020             }
11021             if (!fp_access_check(s)) {
11022                 return;
11023             }
11024             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11025             return;
11026         case 0x17: /* FCVTL, FCVTL2 */
11027             if (!fp_access_check(s)) {
11028                 return;
11029             }
11030             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11031             return;
11032         case 0x18: /* FRINTN */
11033         case 0x19: /* FRINTM */
11034         case 0x38: /* FRINTP */
11035         case 0x39: /* FRINTZ */
11036             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11037             /* fall through */
11038         case 0x59: /* FRINTX */
11039         case 0x79: /* FRINTI */
11040             need_fpstatus = true;
11041             if (size == 3 && !is_q) {
11042                 unallocated_encoding(s);
11043                 return;
11044             }
11045             break;
11046         case 0x58: /* FRINTA */
11047             rmode = FPROUNDING_TIEAWAY;
11048             need_fpstatus = true;
11049             if (size == 3 && !is_q) {
11050                 unallocated_encoding(s);
11051                 return;
11052             }
11053             break;
11054         case 0x7c: /* URSQRTE */
11055             if (size == 3) {
11056                 unallocated_encoding(s);
11057                 return;
11058             }
11059             break;
11060         case 0x1e: /* FRINT32Z */
11061         case 0x1f: /* FRINT64Z */
11062             rmode = FPROUNDING_ZERO;
11063             /* fall through */
11064         case 0x5e: /* FRINT32X */
11065         case 0x5f: /* FRINT64X */
11066             need_fpstatus = true;
11067             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
11068                 unallocated_encoding(s);
11069                 return;
11070             }
11071             break;
11072         default:
11073             unallocated_encoding(s);
11074             return;
11075         }
11076         break;
11077     }
11078     default:
11079     case 0x3: /* SUQADD, USQADD */
11080         unallocated_encoding(s);
11081         return;
11082     }
11083 
11084     if (!fp_access_check(s)) {
11085         return;
11086     }
11087 
11088     if (need_fpstatus || rmode >= 0) {
11089         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
11090     } else {
11091         tcg_fpstatus = NULL;
11092     }
11093     if (rmode >= 0) {
11094         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11095     } else {
11096         tcg_rmode = NULL;
11097     }
11098 
11099     switch (opcode) {
11100     case 0x5:
11101         if (u && size == 0) { /* NOT */
11102             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11103             return;
11104         }
11105         break;
11106     case 0x8: /* CMGT, CMGE */
11107         if (u) {
11108             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
11109         } else {
11110             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
11111         }
11112         return;
11113     case 0x9: /* CMEQ, CMLE */
11114         if (u) {
11115             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
11116         } else {
11117             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
11118         }
11119         return;
11120     case 0xa: /* CMLT */
11121         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
11122         return;
11123     case 0xb:
11124         if (u) { /* ABS, NEG */
11125             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11126         } else {
11127             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
11128         }
11129         return;
11130     }
11131 
11132     if (size == 3) {
11133         /* All 64-bit element operations can be shared with scalar 2misc */
11134         int pass;
11135 
11136         /* Coverity claims (size == 3 && !is_q) has been eliminated
11137          * from all paths leading to here.
11138          */
11139         tcg_debug_assert(is_q);
11140         for (pass = 0; pass < 2; pass++) {
11141             TCGv_i64 tcg_op = tcg_temp_new_i64();
11142             TCGv_i64 tcg_res = tcg_temp_new_i64();
11143 
11144             read_vec_element(s, tcg_op, rn, pass, MO_64);
11145 
11146             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11147                             tcg_rmode, tcg_fpstatus);
11148 
11149             write_vec_element(s, tcg_res, rd, pass, MO_64);
11150         }
11151     } else {
11152         int pass;
11153 
11154         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11155             TCGv_i32 tcg_op = tcg_temp_new_i32();
11156             TCGv_i32 tcg_res = tcg_temp_new_i32();
11157 
11158             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11159 
11160             if (size == 2) {
11161                 /* Special cases for 32 bit elements */
11162                 switch (opcode) {
11163                 case 0x4: /* CLS */
11164                     if (u) {
11165                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11166                     } else {
11167                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
11168                     }
11169                     break;
11170                 case 0x7: /* SQABS, SQNEG */
11171                     if (u) {
11172                         gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
11173                     } else {
11174                         gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
11175                     }
11176                     break;
11177                 case 0x2f: /* FABS */
11178                     gen_vfp_abss(tcg_res, tcg_op);
11179                     break;
11180                 case 0x6f: /* FNEG */
11181                     gen_vfp_negs(tcg_res, tcg_op);
11182                     break;
11183                 case 0x7f: /* FSQRT */
11184                     gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
11185                     break;
11186                 case 0x1a: /* FCVTNS */
11187                 case 0x1b: /* FCVTMS */
11188                 case 0x1c: /* FCVTAS */
11189                 case 0x3a: /* FCVTPS */
11190                 case 0x3b: /* FCVTZS */
11191                     gen_helper_vfp_tosls(tcg_res, tcg_op,
11192                                          tcg_constant_i32(0), tcg_fpstatus);
11193                     break;
11194                 case 0x5a: /* FCVTNU */
11195                 case 0x5b: /* FCVTMU */
11196                 case 0x5c: /* FCVTAU */
11197                 case 0x7a: /* FCVTPU */
11198                 case 0x7b: /* FCVTZU */
11199                     gen_helper_vfp_touls(tcg_res, tcg_op,
11200                                          tcg_constant_i32(0), tcg_fpstatus);
11201                     break;
11202                 case 0x18: /* FRINTN */
11203                 case 0x19: /* FRINTM */
11204                 case 0x38: /* FRINTP */
11205                 case 0x39: /* FRINTZ */
11206                 case 0x58: /* FRINTA */
11207                 case 0x79: /* FRINTI */
11208                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11209                     break;
11210                 case 0x59: /* FRINTX */
11211                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11212                     break;
11213                 case 0x7c: /* URSQRTE */
11214                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
11215                     break;
11216                 case 0x1e: /* FRINT32Z */
11217                 case 0x5e: /* FRINT32X */
11218                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
11219                     break;
11220                 case 0x1f: /* FRINT64Z */
11221                 case 0x5f: /* FRINT64X */
11222                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
11223                     break;
11224                 default:
11225                     g_assert_not_reached();
11226                 }
11227             } else {
11228                 /* Use helpers for 8 and 16 bit elements */
11229                 switch (opcode) {
11230                 case 0x5: /* CNT, RBIT */
11231                     /* For these two insns size is part of the opcode specifier
11232                      * (handled earlier); they always operate on byte elements.
11233                      */
11234                     if (u) {
11235                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11236                     } else {
11237                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11238                     }
11239                     break;
11240                 case 0x7: /* SQABS, SQNEG */
11241                 {
11242                     NeonGenOneOpEnvFn *genfn;
11243                     static NeonGenOneOpEnvFn * const fns[2][2] = {
11244                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11245                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11246                     };
11247                     genfn = fns[size][u];
11248                     genfn(tcg_res, tcg_env, tcg_op);
11249                     break;
11250                 }
11251                 case 0x4: /* CLS, CLZ */
11252                     if (u) {
11253                         if (size == 0) {
11254                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
11255                         } else {
11256                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
11257                         }
11258                     } else {
11259                         if (size == 0) {
11260                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
11261                         } else {
11262                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
11263                         }
11264                     }
11265                     break;
11266                 default:
11267                     g_assert_not_reached();
11268                 }
11269             }
11270 
11271             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11272         }
11273     }
11274     clear_vec_high(s, is_q, rd);
11275 
11276     if (tcg_rmode) {
11277         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11278     }
11279 }
11280 
11281 /* AdvSIMD [scalar] two register miscellaneous (FP16)
11282  *
11283  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
11284  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11285  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11286  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11287  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11288  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11289  *
11290  * This actually covers two groups where scalar access is governed by
11291  * bit 28. A bunch of the instructions (float to integral) only exist
11292  * in the vector form and are un-allocated for the scalar decode. Also
11293  * in the scalar decode Q is always 1.
11294  */
11295 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11296 {
11297     int fpop, opcode, a, u;
11298     int rn, rd;
11299     bool is_q;
11300     bool is_scalar;
11301     bool only_in_vector = false;
11302 
11303     int pass;
11304     TCGv_i32 tcg_rmode = NULL;
11305     TCGv_ptr tcg_fpstatus = NULL;
11306     bool need_fpst = true;
11307     int rmode = -1;
11308 
11309     if (!dc_isar_feature(aa64_fp16, s)) {
11310         unallocated_encoding(s);
11311         return;
11312     }
11313 
11314     rd = extract32(insn, 0, 5);
11315     rn = extract32(insn, 5, 5);
11316 
11317     a = extract32(insn, 23, 1);
11318     u = extract32(insn, 29, 1);
11319     is_scalar = extract32(insn, 28, 1);
11320     is_q = extract32(insn, 30, 1);
11321 
11322     opcode = extract32(insn, 12, 5);
11323     fpop = deposit32(opcode, 5, 1, a);
11324     fpop = deposit32(fpop, 6, 1, u);
11325 
11326     switch (fpop) {
11327     case 0x1d: /* SCVTF */
11328     case 0x5d: /* UCVTF */
11329     {
11330         int elements;
11331 
11332         if (is_scalar) {
11333             elements = 1;
11334         } else {
11335             elements = (is_q ? 8 : 4);
11336         }
11337 
11338         if (!fp_access_check(s)) {
11339             return;
11340         }
11341         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11342         return;
11343     }
11344     break;
11345     case 0x2c: /* FCMGT (zero) */
11346     case 0x2d: /* FCMEQ (zero) */
11347     case 0x2e: /* FCMLT (zero) */
11348     case 0x6c: /* FCMGE (zero) */
11349     case 0x6d: /* FCMLE (zero) */
11350         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11351         return;
11352     case 0x3d: /* FRECPE */
11353     case 0x3f: /* FRECPX */
11354         break;
11355     case 0x18: /* FRINTN */
11356         only_in_vector = true;
11357         rmode = FPROUNDING_TIEEVEN;
11358         break;
11359     case 0x19: /* FRINTM */
11360         only_in_vector = true;
11361         rmode = FPROUNDING_NEGINF;
11362         break;
11363     case 0x38: /* FRINTP */
11364         only_in_vector = true;
11365         rmode = FPROUNDING_POSINF;
11366         break;
11367     case 0x39: /* FRINTZ */
11368         only_in_vector = true;
11369         rmode = FPROUNDING_ZERO;
11370         break;
11371     case 0x58: /* FRINTA */
11372         only_in_vector = true;
11373         rmode = FPROUNDING_TIEAWAY;
11374         break;
11375     case 0x59: /* FRINTX */
11376     case 0x79: /* FRINTI */
11377         only_in_vector = true;
11378         /* current rounding mode */
11379         break;
11380     case 0x1a: /* FCVTNS */
11381         rmode = FPROUNDING_TIEEVEN;
11382         break;
11383     case 0x1b: /* FCVTMS */
11384         rmode = FPROUNDING_NEGINF;
11385         break;
11386     case 0x1c: /* FCVTAS */
11387         rmode = FPROUNDING_TIEAWAY;
11388         break;
11389     case 0x3a: /* FCVTPS */
11390         rmode = FPROUNDING_POSINF;
11391         break;
11392     case 0x3b: /* FCVTZS */
11393         rmode = FPROUNDING_ZERO;
11394         break;
11395     case 0x5a: /* FCVTNU */
11396         rmode = FPROUNDING_TIEEVEN;
11397         break;
11398     case 0x5b: /* FCVTMU */
11399         rmode = FPROUNDING_NEGINF;
11400         break;
11401     case 0x5c: /* FCVTAU */
11402         rmode = FPROUNDING_TIEAWAY;
11403         break;
11404     case 0x7a: /* FCVTPU */
11405         rmode = FPROUNDING_POSINF;
11406         break;
11407     case 0x7b: /* FCVTZU */
11408         rmode = FPROUNDING_ZERO;
11409         break;
11410     case 0x2f: /* FABS */
11411     case 0x6f: /* FNEG */
11412         need_fpst = false;
11413         break;
11414     case 0x7d: /* FRSQRTE */
11415     case 0x7f: /* FSQRT (vector) */
11416         break;
11417     default:
11418         unallocated_encoding(s);
11419         return;
11420     }
11421 
11422 
11423     /* Check additional constraints for the scalar encoding */
11424     if (is_scalar) {
11425         if (!is_q) {
11426             unallocated_encoding(s);
11427             return;
11428         }
11429         /* FRINTxx is only in the vector form */
11430         if (only_in_vector) {
11431             unallocated_encoding(s);
11432             return;
11433         }
11434     }
11435 
11436     if (!fp_access_check(s)) {
11437         return;
11438     }
11439 
11440     if (rmode >= 0 || need_fpst) {
11441         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
11442     }
11443 
11444     if (rmode >= 0) {
11445         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11446     }
11447 
11448     if (is_scalar) {
11449         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
11450         TCGv_i32 tcg_res = tcg_temp_new_i32();
11451 
11452         switch (fpop) {
11453         case 0x1a: /* FCVTNS */
11454         case 0x1b: /* FCVTMS */
11455         case 0x1c: /* FCVTAS */
11456         case 0x3a: /* FCVTPS */
11457         case 0x3b: /* FCVTZS */
11458             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11459             break;
11460         case 0x3d: /* FRECPE */
11461             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11462             break;
11463         case 0x3f: /* FRECPX */
11464             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
11465             break;
11466         case 0x5a: /* FCVTNU */
11467         case 0x5b: /* FCVTMU */
11468         case 0x5c: /* FCVTAU */
11469         case 0x7a: /* FCVTPU */
11470         case 0x7b: /* FCVTZU */
11471             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11472             break;
11473         case 0x6f: /* FNEG */
11474             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11475             break;
11476         case 0x7d: /* FRSQRTE */
11477             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11478             break;
11479         default:
11480             g_assert_not_reached();
11481         }
11482 
11483         /* limit any sign extension going on */
11484         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
11485         write_fp_sreg(s, rd, tcg_res);
11486     } else {
11487         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
11488             TCGv_i32 tcg_op = tcg_temp_new_i32();
11489             TCGv_i32 tcg_res = tcg_temp_new_i32();
11490 
11491             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
11492 
11493             switch (fpop) {
11494             case 0x1a: /* FCVTNS */
11495             case 0x1b: /* FCVTMS */
11496             case 0x1c: /* FCVTAS */
11497             case 0x3a: /* FCVTPS */
11498             case 0x3b: /* FCVTZS */
11499                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11500                 break;
11501             case 0x3d: /* FRECPE */
11502                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11503                 break;
11504             case 0x5a: /* FCVTNU */
11505             case 0x5b: /* FCVTMU */
11506             case 0x5c: /* FCVTAU */
11507             case 0x7a: /* FCVTPU */
11508             case 0x7b: /* FCVTZU */
11509                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11510                 break;
11511             case 0x18: /* FRINTN */
11512             case 0x19: /* FRINTM */
11513             case 0x38: /* FRINTP */
11514             case 0x39: /* FRINTZ */
11515             case 0x58: /* FRINTA */
11516             case 0x79: /* FRINTI */
11517                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
11518                 break;
11519             case 0x59: /* FRINTX */
11520                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
11521                 break;
11522             case 0x2f: /* FABS */
11523                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
11524                 break;
11525             case 0x6f: /* FNEG */
11526                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11527                 break;
11528             case 0x7d: /* FRSQRTE */
11529                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11530                 break;
11531             case 0x7f: /* FSQRT */
11532                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
11533                 break;
11534             default:
11535                 g_assert_not_reached();
11536             }
11537 
11538             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11539         }
11540 
11541         clear_vec_high(s, is_q, rd);
11542     }
11543 
11544     if (tcg_rmode) {
11545         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11546     }
11547 }
11548 
11549 /* C3.6 Data processing - SIMD, inc Crypto
11550  *
11551  * As the decode gets a little complex we are using a table based
11552  * approach for this part of the decode.
11553  */
11554 static const AArch64DecodeTable data_proc_simd[] = {
11555     /* pattern  ,  mask     ,  fn                        */
11556     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11557     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11558     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11559     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11560     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
11561     { 0x00000000, 0x00000000, NULL }
11562 };
11563 
11564 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11565 {
11566     /* Note that this is called with all non-FP cases from
11567      * table C3-6 so it must UNDEF for entries not specifically
11568      * allocated to instructions in that table.
11569      */
11570     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11571     if (fn) {
11572         fn(s, insn);
11573     } else {
11574         unallocated_encoding(s);
11575     }
11576 }
11577 
11578 /* C3.6 Data processing - SIMD and floating point */
11579 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11580 {
11581     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11582         disas_data_proc_fp(s, insn);
11583     } else {
11584         /* SIMD, including crypto */
11585         disas_data_proc_simd(s, insn);
11586     }
11587 }
11588 
11589 static bool trans_OK(DisasContext *s, arg_OK *a)
11590 {
11591     return true;
11592 }
11593 
11594 static bool trans_FAIL(DisasContext *s, arg_OK *a)
11595 {
11596     s->is_nonstreaming = true;
11597     return true;
11598 }
11599 
11600 /**
11601  * btype_destination_ok:
11602  * @insn: The instruction at the branch destination
11603  * @bt: SCTLR_ELx.BT
11604  * @btype: PSTATE.BTYPE, and is non-zero
11605  *
11606  * On a guarded page, there are a limited number of insns
11607  * that may be present at the branch target:
11608  *   - branch target identifiers,
11609  *   - paciasp, pacibsp,
11610  *   - BRK insn
11611  *   - HLT insn
11612  * Anything else causes a Branch Target Exception.
11613  *
11614  * Return true if the branch is compatible, false to raise BTITRAP.
11615  */
11616 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
11617 {
11618     if ((insn & 0xfffff01fu) == 0xd503201fu) {
11619         /* HINT space */
11620         switch (extract32(insn, 5, 7)) {
11621         case 0b011001: /* PACIASP */
11622         case 0b011011: /* PACIBSP */
11623             /*
11624              * If SCTLR_ELx.BT, then PACI*SP are not compatible
11625              * with btype == 3.  Otherwise all btype are ok.
11626              */
11627             return !bt || btype != 3;
11628         case 0b100000: /* BTI */
11629             /* Not compatible with any btype.  */
11630             return false;
11631         case 0b100010: /* BTI c */
11632             /* Not compatible with btype == 3 */
11633             return btype != 3;
11634         case 0b100100: /* BTI j */
11635             /* Not compatible with btype == 2 */
11636             return btype != 2;
11637         case 0b100110: /* BTI jc */
11638             /* Compatible with any btype.  */
11639             return true;
11640         }
11641     } else {
11642         switch (insn & 0xffe0001fu) {
11643         case 0xd4200000u: /* BRK */
11644         case 0xd4400000u: /* HLT */
11645             /* Give priority to the breakpoint exception.  */
11646             return true;
11647         }
11648     }
11649     return false;
11650 }
11651 
11652 /* C3.1 A64 instruction index by encoding */
11653 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
11654 {
11655     switch (extract32(insn, 25, 4)) {
11656     case 0x5:
11657     case 0xd:      /* Data processing - register */
11658         disas_data_proc_reg(s, insn);
11659         break;
11660     case 0x7:
11661     case 0xf:      /* Data processing - SIMD and floating point */
11662         disas_data_proc_simd_fp(s, insn);
11663         break;
11664     default:
11665         unallocated_encoding(s);
11666         break;
11667     }
11668 }
11669 
11670 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
11671                                           CPUState *cpu)
11672 {
11673     DisasContext *dc = container_of(dcbase, DisasContext, base);
11674     CPUARMState *env = cpu_env(cpu);
11675     ARMCPU *arm_cpu = env_archcpu(env);
11676     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
11677     int bound, core_mmu_idx;
11678 
11679     dc->isar = &arm_cpu->isar;
11680     dc->condjmp = 0;
11681     dc->pc_save = dc->base.pc_first;
11682     dc->aarch64 = true;
11683     dc->thumb = false;
11684     dc->sctlr_b = 0;
11685     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
11686     dc->condexec_mask = 0;
11687     dc->condexec_cond = 0;
11688     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
11689     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
11690     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
11691     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
11692     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
11693     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11694 #if !defined(CONFIG_USER_ONLY)
11695     dc->user = (dc->current_el == 0);
11696 #endif
11697     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
11698     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
11699     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
11700     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
11701     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
11702     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
11703     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
11704     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
11705     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
11706     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
11707     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
11708     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
11709     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
11710     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
11711     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
11712     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
11713     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
11714     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
11715     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
11716     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
11717     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
11718     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
11719     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
11720     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
11721     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
11722     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
11723     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
11724     dc->vec_len = 0;
11725     dc->vec_stride = 0;
11726     dc->cp_regs = arm_cpu->cp_regs;
11727     dc->features = env->features;
11728     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
11729     dc->gm_blocksize = arm_cpu->gm_blocksize;
11730 
11731 #ifdef CONFIG_USER_ONLY
11732     /* In sve_probe_page, we assume TBI is enabled. */
11733     tcg_debug_assert(dc->tbid & 1);
11734 #endif
11735 
11736     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
11737 
11738     /* Single step state. The code-generation logic here is:
11739      *  SS_ACTIVE == 0:
11740      *   generate code with no special handling for single-stepping (except
11741      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11742      *   this happens anyway because those changes are all system register or
11743      *   PSTATE writes).
11744      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11745      *   emit code for one insn
11746      *   emit code to clear PSTATE.SS
11747      *   emit code to generate software step exception for completed step
11748      *   end TB (as usual for having generated an exception)
11749      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11750      *   emit code to generate a software step exception
11751      *   end the TB
11752      */
11753     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
11754     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
11755     dc->is_ldex = false;
11756 
11757     /* Bound the number of insns to execute to those left on the page.  */
11758     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11759 
11760     /* If architectural single step active, limit to 1.  */
11761     if (dc->ss_active) {
11762         bound = 1;
11763     }
11764     dc->base.max_insns = MIN(dc->base.max_insns, bound);
11765 }
11766 
11767 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
11768 {
11769 }
11770 
11771 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11772 {
11773     DisasContext *dc = container_of(dcbase, DisasContext, base);
11774     target_ulong pc_arg = dc->base.pc_next;
11775 
11776     if (tb_cflags(dcbase->tb) & CF_PCREL) {
11777         pc_arg &= ~TARGET_PAGE_MASK;
11778     }
11779     tcg_gen_insn_start(pc_arg, 0, 0);
11780     dc->insn_start_updated = false;
11781 }
11782 
11783 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11784 {
11785     DisasContext *s = container_of(dcbase, DisasContext, base);
11786     CPUARMState *env = cpu_env(cpu);
11787     uint64_t pc = s->base.pc_next;
11788     uint32_t insn;
11789 
11790     /* Singlestep exceptions have the highest priority. */
11791     if (s->ss_active && !s->pstate_ss) {
11792         /* Singlestep state is Active-pending.
11793          * If we're in this state at the start of a TB then either
11794          *  a) we just took an exception to an EL which is being debugged
11795          *     and this is the first insn in the exception handler
11796          *  b) debug exceptions were masked and we just unmasked them
11797          *     without changing EL (eg by clearing PSTATE.D)
11798          * In either case we're going to take a swstep exception in the
11799          * "did not step an insn" case, and so the syndrome ISV and EX
11800          * bits should be zero.
11801          */
11802         assert(s->base.num_insns == 1);
11803         gen_swstep_exception(s, 0, 0);
11804         s->base.is_jmp = DISAS_NORETURN;
11805         s->base.pc_next = pc + 4;
11806         return;
11807     }
11808 
11809     if (pc & 3) {
11810         /*
11811          * PC alignment fault.  This has priority over the instruction abort
11812          * that we would receive from a translation fault via arm_ldl_code.
11813          * This should only be possible after an indirect branch, at the
11814          * start of the TB.
11815          */
11816         assert(s->base.num_insns == 1);
11817         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
11818         s->base.is_jmp = DISAS_NORETURN;
11819         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
11820         return;
11821     }
11822 
11823     s->pc_curr = pc;
11824     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
11825     s->insn = insn;
11826     s->base.pc_next = pc + 4;
11827 
11828     s->fp_access_checked = false;
11829     s->sve_access_checked = false;
11830 
11831     if (s->pstate_il) {
11832         /*
11833          * Illegal execution state. This has priority over BTI
11834          * exceptions, but comes after instruction abort exceptions.
11835          */
11836         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
11837         return;
11838     }
11839 
11840     if (dc_isar_feature(aa64_bti, s)) {
11841         if (s->base.num_insns == 1) {
11842             /* First insn can have btype set to non-zero.  */
11843             tcg_debug_assert(s->btype >= 0);
11844 
11845             /*
11846              * Note that the Branch Target Exception has fairly high
11847              * priority -- below debugging exceptions but above most
11848              * everything else.  This allows us to handle this now
11849              * instead of waiting until the insn is otherwise decoded.
11850              *
11851              * We can check all but the guarded page check here;
11852              * defer the latter to a helper.
11853              */
11854             if (s->btype != 0
11855                 && !btype_destination_ok(insn, s->bt, s->btype)) {
11856                 gen_helper_guarded_page_check(tcg_env);
11857             }
11858         } else {
11859             /* Not the first insn: btype must be 0.  */
11860             tcg_debug_assert(s->btype == 0);
11861         }
11862     }
11863 
11864     s->is_nonstreaming = false;
11865     if (s->sme_trap_nonstreaming) {
11866         disas_sme_fa64(s, insn);
11867     }
11868 
11869     if (!disas_a64(s, insn) &&
11870         !disas_sme(s, insn) &&
11871         !disas_sve(s, insn)) {
11872         disas_a64_legacy(s, insn);
11873     }
11874 
11875     /*
11876      * After execution of most insns, btype is reset to 0.
11877      * Note that we set btype == -1 when the insn sets btype.
11878      */
11879     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
11880         reset_btype(s);
11881     }
11882 }
11883 
11884 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11885 {
11886     DisasContext *dc = container_of(dcbase, DisasContext, base);
11887 
11888     if (unlikely(dc->ss_active)) {
11889         /* Note that this means single stepping WFI doesn't halt the CPU.
11890          * For conditional branch insns this is harmless unreachable code as
11891          * gen_goto_tb() has already handled emitting the debug exception
11892          * (and thus a tb-jump is not possible when singlestepping).
11893          */
11894         switch (dc->base.is_jmp) {
11895         default:
11896             gen_a64_update_pc(dc, 4);
11897             /* fall through */
11898         case DISAS_EXIT:
11899         case DISAS_JUMP:
11900             gen_step_complete_exception(dc);
11901             break;
11902         case DISAS_NORETURN:
11903             break;
11904         }
11905     } else {
11906         switch (dc->base.is_jmp) {
11907         case DISAS_NEXT:
11908         case DISAS_TOO_MANY:
11909             gen_goto_tb(dc, 1, 4);
11910             break;
11911         default:
11912         case DISAS_UPDATE_EXIT:
11913             gen_a64_update_pc(dc, 4);
11914             /* fall through */
11915         case DISAS_EXIT:
11916             tcg_gen_exit_tb(NULL, 0);
11917             break;
11918         case DISAS_UPDATE_NOCHAIN:
11919             gen_a64_update_pc(dc, 4);
11920             /* fall through */
11921         case DISAS_JUMP:
11922             tcg_gen_lookup_and_goto_ptr();
11923             break;
11924         case DISAS_NORETURN:
11925         case DISAS_SWI:
11926             break;
11927         case DISAS_WFE:
11928             gen_a64_update_pc(dc, 4);
11929             gen_helper_wfe(tcg_env);
11930             break;
11931         case DISAS_YIELD:
11932             gen_a64_update_pc(dc, 4);
11933             gen_helper_yield(tcg_env);
11934             break;
11935         case DISAS_WFI:
11936             /*
11937              * This is a special case because we don't want to just halt
11938              * the CPU if trying to debug across a WFI.
11939              */
11940             gen_a64_update_pc(dc, 4);
11941             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
11942             /*
11943              * The helper doesn't necessarily throw an exception, but we
11944              * must go back to the main loop to check for interrupts anyway.
11945              */
11946             tcg_gen_exit_tb(NULL, 0);
11947             break;
11948         }
11949     }
11950 }
11951 
11952 const TranslatorOps aarch64_translator_ops = {
11953     .init_disas_context = aarch64_tr_init_disas_context,
11954     .tb_start           = aarch64_tr_tb_start,
11955     .insn_start         = aarch64_tr_insn_start,
11956     .translate_insn     = aarch64_tr_translate_insn,
11957     .tb_stop            = aarch64_tr_tb_stop,
11958 };
11959