xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 99367627)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "disas/disas.h"
26 #include "arm_ldst.h"
27 #include "semihosting/semihost.h"
28 #include "cpregs.h"
29 
30 static TCGv_i64 cpu_X[32];
31 static TCGv_i64 cpu_pc;
32 
33 /* Load/store exclusive handling */
34 static TCGv_i64 cpu_exclusive_high;
35 
36 static const char *regnames[] = {
37     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
38     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
39     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
40     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
41 };
42 
43 enum a64_shift_type {
44     A64_SHIFT_TYPE_LSL = 0,
45     A64_SHIFT_TYPE_LSR = 1,
46     A64_SHIFT_TYPE_ASR = 2,
47     A64_SHIFT_TYPE_ROR = 3
48 };
49 
50 /*
51  * Helpers for extracting complex instruction fields
52  */
53 
54 /*
55  * For load/store with an unsigned 12 bit immediate scaled by the element
56  * size. The input has the immediate field in bits [14:3] and the element
57  * size in [2:0].
58  */
59 static int uimm_scaled(DisasContext *s, int x)
60 {
61     unsigned imm = x >> 3;
62     unsigned scale = extract32(x, 0, 3);
63     return imm << scale;
64 }
65 
66 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
67 static int scale_by_log2_tag_granule(DisasContext *s, int x)
68 {
69     return x << LOG2_TAG_GRANULE;
70 }
71 
72 /*
73  * Include the generated decoders.
74  */
75 
76 #include "decode-sme-fa64.c.inc"
77 #include "decode-a64.c.inc"
78 
79 /* Table based decoder typedefs - used when the relevant bits for decode
80  * are too awkwardly scattered across the instruction (eg SIMD).
81  */
82 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
83 
84 typedef struct AArch64DecodeTable {
85     uint32_t pattern;
86     uint32_t mask;
87     AArch64DecodeFn *disas_fn;
88 } AArch64DecodeTable;
89 
90 /* initialize TCG globals.  */
91 void a64_translate_init(void)
92 {
93     int i;
94 
95     cpu_pc = tcg_global_mem_new_i64(tcg_env,
96                                     offsetof(CPUARMState, pc),
97                                     "pc");
98     for (i = 0; i < 32; i++) {
99         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
100                                           offsetof(CPUARMState, xregs[i]),
101                                           regnames[i]);
102     }
103 
104     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
105         offsetof(CPUARMState, exclusive_high), "exclusive_high");
106 }
107 
108 /*
109  * Return the core mmu_idx to use for A64 load/store insns which
110  * have a "unprivileged load/store" variant. Those insns access
111  * EL0 if executed from an EL which has control over EL0 (usually
112  * EL1) but behave like normal loads and stores if executed from
113  * elsewhere (eg EL3).
114  *
115  * @unpriv : true for the unprivileged encoding; false for the
116  *           normal encoding (in which case we will return the same
117  *           thing as get_mem_index().
118  */
119 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
120 {
121     /*
122      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
123      * which is the usual mmu_idx for this cpu state.
124      */
125     ARMMMUIdx useridx = s->mmu_idx;
126 
127     if (unpriv && s->unpriv) {
128         /*
129          * We have pre-computed the condition for AccType_UNPRIV.
130          * Therefore we should never get here with a mmu_idx for
131          * which we do not know the corresponding user mmu_idx.
132          */
133         switch (useridx) {
134         case ARMMMUIdx_E10_1:
135         case ARMMMUIdx_E10_1_PAN:
136             useridx = ARMMMUIdx_E10_0;
137             break;
138         case ARMMMUIdx_E20_2:
139         case ARMMMUIdx_E20_2_PAN:
140             useridx = ARMMMUIdx_E20_0;
141             break;
142         default:
143             g_assert_not_reached();
144         }
145     }
146     return arm_to_core_mmu_idx(useridx);
147 }
148 
149 static void set_btype_raw(int val)
150 {
151     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
152                    offsetof(CPUARMState, btype));
153 }
154 
155 static void set_btype(DisasContext *s, int val)
156 {
157     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
158     tcg_debug_assert(val >= 1 && val <= 3);
159     set_btype_raw(val);
160     s->btype = -1;
161 }
162 
163 static void reset_btype(DisasContext *s)
164 {
165     if (s->btype != 0) {
166         set_btype_raw(0);
167         s->btype = 0;
168     }
169 }
170 
171 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
172 {
173     assert(s->pc_save != -1);
174     if (tb_cflags(s->base.tb) & CF_PCREL) {
175         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
176     } else {
177         tcg_gen_movi_i64(dest, s->pc_curr + diff);
178     }
179 }
180 
181 void gen_a64_update_pc(DisasContext *s, target_long diff)
182 {
183     gen_pc_plus_diff(s, cpu_pc, diff);
184     s->pc_save = s->pc_curr + diff;
185 }
186 
187 /*
188  * Handle Top Byte Ignore (TBI) bits.
189  *
190  * If address tagging is enabled via the TCR TBI bits:
191  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
192  *    then the address is zero-extended, clearing bits [63:56]
193  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
194  *    and TBI1 controls addresses with bit 55 == 1.
195  *    If the appropriate TBI bit is set for the address then
196  *    the address is sign-extended from bit 55 into bits [63:56]
197  *
198  * Here We have concatenated TBI{1,0} into tbi.
199  */
200 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
201                                 TCGv_i64 src, int tbi)
202 {
203     if (tbi == 0) {
204         /* Load unmodified address */
205         tcg_gen_mov_i64(dst, src);
206     } else if (!regime_has_2_ranges(s->mmu_idx)) {
207         /* Force tag byte to all zero */
208         tcg_gen_extract_i64(dst, src, 0, 56);
209     } else {
210         /* Sign-extend from bit 55.  */
211         tcg_gen_sextract_i64(dst, src, 0, 56);
212 
213         switch (tbi) {
214         case 1:
215             /* tbi0 but !tbi1: only use the extension if positive */
216             tcg_gen_and_i64(dst, dst, src);
217             break;
218         case 2:
219             /* !tbi0 but tbi1: only use the extension if negative */
220             tcg_gen_or_i64(dst, dst, src);
221             break;
222         case 3:
223             /* tbi0 and tbi1: always use the extension */
224             break;
225         default:
226             g_assert_not_reached();
227         }
228     }
229 }
230 
231 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
232 {
233     /*
234      * If address tagging is enabled for instructions via the TCR TBI bits,
235      * then loading an address into the PC will clear out any tag.
236      */
237     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
238     s->pc_save = -1;
239 }
240 
241 /*
242  * Handle MTE and/or TBI.
243  *
244  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
245  * for the tag to be present in the FAR_ELx register.  But for user-only
246  * mode we do not have a TLB with which to implement this, so we must
247  * remove the top byte now.
248  *
249  * Always return a fresh temporary that we can increment independently
250  * of the write-back address.
251  */
252 
253 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
254 {
255     TCGv_i64 clean = tcg_temp_new_i64();
256 #ifdef CONFIG_USER_ONLY
257     gen_top_byte_ignore(s, clean, addr, s->tbid);
258 #else
259     tcg_gen_mov_i64(clean, addr);
260 #endif
261     return clean;
262 }
263 
264 /* Insert a zero tag into src, with the result at dst. */
265 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
266 {
267     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
268 }
269 
270 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
271                              MMUAccessType acc, int log2_size)
272 {
273     gen_helper_probe_access(tcg_env, ptr,
274                             tcg_constant_i32(acc),
275                             tcg_constant_i32(get_mem_index(s)),
276                             tcg_constant_i32(1 << log2_size));
277 }
278 
279 /*
280  * For MTE, check a single logical or atomic access.  This probes a single
281  * address, the exact one specified.  The size and alignment of the access
282  * is not relevant to MTE, per se, but watchpoints do require the size,
283  * and we want to recognize those before making any other changes to state.
284  */
285 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
286                                       bool is_write, bool tag_checked,
287                                       MemOp memop, bool is_unpriv,
288                                       int core_idx)
289 {
290     if (tag_checked && s->mte_active[is_unpriv]) {
291         TCGv_i64 ret;
292         int desc = 0;
293 
294         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
295         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
296         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
297         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
298         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
299         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
300 
301         ret = tcg_temp_new_i64();
302         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
303 
304         return ret;
305     }
306     return clean_data_tbi(s, addr);
307 }
308 
309 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
310                         bool tag_checked, MemOp memop)
311 {
312     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
313                                  false, get_mem_index(s));
314 }
315 
316 /*
317  * For MTE, check multiple logical sequential accesses.
318  */
319 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
320                         bool tag_checked, int total_size, MemOp single_mop)
321 {
322     if (tag_checked && s->mte_active[0]) {
323         TCGv_i64 ret;
324         int desc = 0;
325 
326         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
327         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
328         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
329         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
330         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
331         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
332 
333         ret = tcg_temp_new_i64();
334         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
335 
336         return ret;
337     }
338     return clean_data_tbi(s, addr);
339 }
340 
341 /*
342  * Generate the special alignment check that applies to AccType_ATOMIC
343  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
344  * naturally aligned, but it must not cross a 16-byte boundary.
345  * See AArch64.CheckAlignment().
346  */
347 static void check_lse2_align(DisasContext *s, int rn, int imm,
348                              bool is_write, MemOp mop)
349 {
350     TCGv_i32 tmp;
351     TCGv_i64 addr;
352     TCGLabel *over_label;
353     MMUAccessType type;
354     int mmu_idx;
355 
356     tmp = tcg_temp_new_i32();
357     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
358     tcg_gen_addi_i32(tmp, tmp, imm & 15);
359     tcg_gen_andi_i32(tmp, tmp, 15);
360     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
361 
362     over_label = gen_new_label();
363     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
364 
365     addr = tcg_temp_new_i64();
366     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
367 
368     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
369     mmu_idx = get_mem_index(s);
370     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
371                                 tcg_constant_i32(mmu_idx));
372 
373     gen_set_label(over_label);
374 
375 }
376 
377 /* Handle the alignment check for AccType_ATOMIC instructions. */
378 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
379 {
380     MemOp size = mop & MO_SIZE;
381 
382     if (size == MO_8) {
383         return mop;
384     }
385 
386     /*
387      * If size == MO_128, this is a LDXP, and the operation is single-copy
388      * atomic for each doubleword, not the entire quadword; it still must
389      * be quadword aligned.
390      */
391     if (size == MO_128) {
392         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
393                                    MO_ATOM_IFALIGN_PAIR);
394     }
395     if (dc_isar_feature(aa64_lse2, s)) {
396         check_lse2_align(s, rn, 0, true, mop);
397     } else {
398         mop |= MO_ALIGN;
399     }
400     return finalize_memop(s, mop);
401 }
402 
403 /* Handle the alignment check for AccType_ORDERED instructions. */
404 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
405                                  bool is_write, MemOp mop)
406 {
407     MemOp size = mop & MO_SIZE;
408 
409     if (size == MO_8) {
410         return mop;
411     }
412     if (size == MO_128) {
413         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
414                                    MO_ATOM_IFALIGN_PAIR);
415     }
416     if (!dc_isar_feature(aa64_lse2, s)) {
417         mop |= MO_ALIGN;
418     } else if (!s->naa) {
419         check_lse2_align(s, rn, imm, is_write, mop);
420     }
421     return finalize_memop(s, mop);
422 }
423 
424 typedef struct DisasCompare64 {
425     TCGCond cond;
426     TCGv_i64 value;
427 } DisasCompare64;
428 
429 static void a64_test_cc(DisasCompare64 *c64, int cc)
430 {
431     DisasCompare c32;
432 
433     arm_test_cc(&c32, cc);
434 
435     /*
436      * Sign-extend the 32-bit value so that the GE/LT comparisons work
437      * properly.  The NE/EQ comparisons are also fine with this choice.
438       */
439     c64->cond = c32.cond;
440     c64->value = tcg_temp_new_i64();
441     tcg_gen_ext_i32_i64(c64->value, c32.value);
442 }
443 
444 static void gen_rebuild_hflags(DisasContext *s)
445 {
446     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
447 }
448 
449 static void gen_exception_internal(int excp)
450 {
451     assert(excp_is_internal(excp));
452     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
453 }
454 
455 static void gen_exception_internal_insn(DisasContext *s, int excp)
456 {
457     gen_a64_update_pc(s, 0);
458     gen_exception_internal(excp);
459     s->base.is_jmp = DISAS_NORETURN;
460 }
461 
462 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
463 {
464     gen_a64_update_pc(s, 0);
465     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
466     s->base.is_jmp = DISAS_NORETURN;
467 }
468 
469 static void gen_step_complete_exception(DisasContext *s)
470 {
471     /* We just completed step of an insn. Move from Active-not-pending
472      * to Active-pending, and then also take the swstep exception.
473      * This corresponds to making the (IMPDEF) choice to prioritize
474      * swstep exceptions over asynchronous exceptions taken to an exception
475      * level where debug is disabled. This choice has the advantage that
476      * we do not need to maintain internal state corresponding to the
477      * ISV/EX syndrome bits between completion of the step and generation
478      * of the exception, and our syndrome information is always correct.
479      */
480     gen_ss_advance(s);
481     gen_swstep_exception(s, 1, s->is_ldex);
482     s->base.is_jmp = DISAS_NORETURN;
483 }
484 
485 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
486 {
487     if (s->ss_active) {
488         return false;
489     }
490     return translator_use_goto_tb(&s->base, dest);
491 }
492 
493 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
494 {
495     if (use_goto_tb(s, s->pc_curr + diff)) {
496         /*
497          * For pcrel, the pc must always be up-to-date on entry to
498          * the linked TB, so that it can use simple additions for all
499          * further adjustments.  For !pcrel, the linked TB is compiled
500          * to know its full virtual address, so we can delay the
501          * update to pc to the unlinked path.  A long chain of links
502          * can thus avoid many updates to the PC.
503          */
504         if (tb_cflags(s->base.tb) & CF_PCREL) {
505             gen_a64_update_pc(s, diff);
506             tcg_gen_goto_tb(n);
507         } else {
508             tcg_gen_goto_tb(n);
509             gen_a64_update_pc(s, diff);
510         }
511         tcg_gen_exit_tb(s->base.tb, n);
512         s->base.is_jmp = DISAS_NORETURN;
513     } else {
514         gen_a64_update_pc(s, diff);
515         if (s->ss_active) {
516             gen_step_complete_exception(s);
517         } else {
518             tcg_gen_lookup_and_goto_ptr();
519             s->base.is_jmp = DISAS_NORETURN;
520         }
521     }
522 }
523 
524 /*
525  * Register access functions
526  *
527  * These functions are used for directly accessing a register in where
528  * changes to the final register value are likely to be made. If you
529  * need to use a register for temporary calculation (e.g. index type
530  * operations) use the read_* form.
531  *
532  * B1.2.1 Register mappings
533  *
534  * In instruction register encoding 31 can refer to ZR (zero register) or
535  * the SP (stack pointer) depending on context. In QEMU's case we map SP
536  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
537  * This is the point of the _sp forms.
538  */
539 TCGv_i64 cpu_reg(DisasContext *s, int reg)
540 {
541     if (reg == 31) {
542         TCGv_i64 t = tcg_temp_new_i64();
543         tcg_gen_movi_i64(t, 0);
544         return t;
545     } else {
546         return cpu_X[reg];
547     }
548 }
549 
550 /* register access for when 31 == SP */
551 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
552 {
553     return cpu_X[reg];
554 }
555 
556 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
557  * representing the register contents. This TCGv is an auto-freed
558  * temporary so it need not be explicitly freed, and may be modified.
559  */
560 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
561 {
562     TCGv_i64 v = tcg_temp_new_i64();
563     if (reg != 31) {
564         if (sf) {
565             tcg_gen_mov_i64(v, cpu_X[reg]);
566         } else {
567             tcg_gen_ext32u_i64(v, cpu_X[reg]);
568         }
569     } else {
570         tcg_gen_movi_i64(v, 0);
571     }
572     return v;
573 }
574 
575 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
576 {
577     TCGv_i64 v = tcg_temp_new_i64();
578     if (sf) {
579         tcg_gen_mov_i64(v, cpu_X[reg]);
580     } else {
581         tcg_gen_ext32u_i64(v, cpu_X[reg]);
582     }
583     return v;
584 }
585 
586 /* Return the offset into CPUARMState of a slice (from
587  * the least significant end) of FP register Qn (ie
588  * Dn, Sn, Hn or Bn).
589  * (Note that this is not the same mapping as for A32; see cpu.h)
590  */
591 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
592 {
593     return vec_reg_offset(s, regno, 0, size);
594 }
595 
596 /* Offset of the high half of the 128 bit vector Qn */
597 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
598 {
599     return vec_reg_offset(s, regno, 1, MO_64);
600 }
601 
602 /* Convenience accessors for reading and writing single and double
603  * FP registers. Writing clears the upper parts of the associated
604  * 128 bit vector register, as required by the architecture.
605  * Note that unlike the GP register accessors, the values returned
606  * by the read functions must be manually freed.
607  */
608 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
609 {
610     TCGv_i64 v = tcg_temp_new_i64();
611 
612     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
613     return v;
614 }
615 
616 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
617 {
618     TCGv_i32 v = tcg_temp_new_i32();
619 
620     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
621     return v;
622 }
623 
624 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
625 {
626     TCGv_i32 v = tcg_temp_new_i32();
627 
628     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
629     return v;
630 }
631 
632 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
633  * If SVE is not enabled, then there are only 128 bits in the vector.
634  */
635 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
636 {
637     unsigned ofs = fp_reg_offset(s, rd, MO_64);
638     unsigned vsz = vec_full_reg_size(s);
639 
640     /* Nop move, with side effect of clearing the tail. */
641     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
642 }
643 
644 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
645 {
646     unsigned ofs = fp_reg_offset(s, reg, MO_64);
647 
648     tcg_gen_st_i64(v, tcg_env, ofs);
649     clear_vec_high(s, false, reg);
650 }
651 
652 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
653 {
654     TCGv_i64 tmp = tcg_temp_new_i64();
655 
656     tcg_gen_extu_i32_i64(tmp, v);
657     write_fp_dreg(s, reg, tmp);
658 }
659 
660 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
661 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
662                          GVecGen2Fn *gvec_fn, int vece)
663 {
664     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
665             is_q ? 16 : 8, vec_full_reg_size(s));
666 }
667 
668 /* Expand a 2-operand + immediate AdvSIMD vector operation using
669  * an expander function.
670  */
671 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
672                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
673 {
674     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
675             imm, is_q ? 16 : 8, vec_full_reg_size(s));
676 }
677 
678 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
679 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
680                          GVecGen3Fn *gvec_fn, int vece)
681 {
682     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
683             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
684 }
685 
686 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
687 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
688                          int rx, GVecGen4Fn *gvec_fn, int vece)
689 {
690     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
691             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
692             is_q ? 16 : 8, vec_full_reg_size(s));
693 }
694 
695 /* Expand a 2-operand operation using an out-of-line helper.  */
696 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
697                              int rn, int data, gen_helper_gvec_2 *fn)
698 {
699     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
700                        vec_full_reg_offset(s, rn),
701                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
702 }
703 
704 /* Expand a 3-operand operation using an out-of-line helper.  */
705 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
706                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
707 {
708     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
709                        vec_full_reg_offset(s, rn),
710                        vec_full_reg_offset(s, rm),
711                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
712 }
713 
714 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
715  * an out-of-line helper.
716  */
717 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
718                               int rm, bool is_fp16, int data,
719                               gen_helper_gvec_3_ptr *fn)
720 {
721     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
722     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
723                        vec_full_reg_offset(s, rn),
724                        vec_full_reg_offset(s, rm), fpst,
725                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
726 }
727 
728 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
729 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
730                             int rm, gen_helper_gvec_3_ptr *fn)
731 {
732     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
733 
734     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
735     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
736                        vec_full_reg_offset(s, rn),
737                        vec_full_reg_offset(s, rm), qc_ptr,
738                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
739 }
740 
741 /* Expand a 4-operand operation using an out-of-line helper.  */
742 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
743                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
744 {
745     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
746                        vec_full_reg_offset(s, rn),
747                        vec_full_reg_offset(s, rm),
748                        vec_full_reg_offset(s, ra),
749                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
750 }
751 
752 /*
753  * Expand a 4-operand + fpstatus pointer + simd data value operation using
754  * an out-of-line helper.
755  */
756 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
757                               int rm, int ra, bool is_fp16, int data,
758                               gen_helper_gvec_4_ptr *fn)
759 {
760     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
761     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
762                        vec_full_reg_offset(s, rn),
763                        vec_full_reg_offset(s, rm),
764                        vec_full_reg_offset(s, ra), fpst,
765                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
766 }
767 
768 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
769  * than the 32 bit equivalent.
770  */
771 static inline void gen_set_NZ64(TCGv_i64 result)
772 {
773     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
774     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
775 }
776 
777 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
778 static inline void gen_logic_CC(int sf, TCGv_i64 result)
779 {
780     if (sf) {
781         gen_set_NZ64(result);
782     } else {
783         tcg_gen_extrl_i64_i32(cpu_ZF, result);
784         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
785     }
786     tcg_gen_movi_i32(cpu_CF, 0);
787     tcg_gen_movi_i32(cpu_VF, 0);
788 }
789 
790 /* dest = T0 + T1; compute C, N, V and Z flags */
791 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
792 {
793     TCGv_i64 result, flag, tmp;
794     result = tcg_temp_new_i64();
795     flag = tcg_temp_new_i64();
796     tmp = tcg_temp_new_i64();
797 
798     tcg_gen_movi_i64(tmp, 0);
799     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
800 
801     tcg_gen_extrl_i64_i32(cpu_CF, flag);
802 
803     gen_set_NZ64(result);
804 
805     tcg_gen_xor_i64(flag, result, t0);
806     tcg_gen_xor_i64(tmp, t0, t1);
807     tcg_gen_andc_i64(flag, flag, tmp);
808     tcg_gen_extrh_i64_i32(cpu_VF, flag);
809 
810     tcg_gen_mov_i64(dest, result);
811 }
812 
813 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
814 {
815     TCGv_i32 t0_32 = tcg_temp_new_i32();
816     TCGv_i32 t1_32 = tcg_temp_new_i32();
817     TCGv_i32 tmp = tcg_temp_new_i32();
818 
819     tcg_gen_movi_i32(tmp, 0);
820     tcg_gen_extrl_i64_i32(t0_32, t0);
821     tcg_gen_extrl_i64_i32(t1_32, t1);
822     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
823     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
824     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
825     tcg_gen_xor_i32(tmp, t0_32, t1_32);
826     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
827     tcg_gen_extu_i32_i64(dest, cpu_NF);
828 }
829 
830 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
831 {
832     if (sf) {
833         gen_add64_CC(dest, t0, t1);
834     } else {
835         gen_add32_CC(dest, t0, t1);
836     }
837 }
838 
839 /* dest = T0 - T1; compute C, N, V and Z flags */
840 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
841 {
842     /* 64 bit arithmetic */
843     TCGv_i64 result, flag, tmp;
844 
845     result = tcg_temp_new_i64();
846     flag = tcg_temp_new_i64();
847     tcg_gen_sub_i64(result, t0, t1);
848 
849     gen_set_NZ64(result);
850 
851     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
852     tcg_gen_extrl_i64_i32(cpu_CF, flag);
853 
854     tcg_gen_xor_i64(flag, result, t0);
855     tmp = tcg_temp_new_i64();
856     tcg_gen_xor_i64(tmp, t0, t1);
857     tcg_gen_and_i64(flag, flag, tmp);
858     tcg_gen_extrh_i64_i32(cpu_VF, flag);
859     tcg_gen_mov_i64(dest, result);
860 }
861 
862 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
863 {
864     /* 32 bit arithmetic */
865     TCGv_i32 t0_32 = tcg_temp_new_i32();
866     TCGv_i32 t1_32 = tcg_temp_new_i32();
867     TCGv_i32 tmp;
868 
869     tcg_gen_extrl_i64_i32(t0_32, t0);
870     tcg_gen_extrl_i64_i32(t1_32, t1);
871     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
872     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
873     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
874     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
875     tmp = tcg_temp_new_i32();
876     tcg_gen_xor_i32(tmp, t0_32, t1_32);
877     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
878     tcg_gen_extu_i32_i64(dest, cpu_NF);
879 }
880 
881 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
882 {
883     if (sf) {
884         gen_sub64_CC(dest, t0, t1);
885     } else {
886         gen_sub32_CC(dest, t0, t1);
887     }
888 }
889 
890 /* dest = T0 + T1 + CF; do not compute flags. */
891 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
892 {
893     TCGv_i64 flag = tcg_temp_new_i64();
894     tcg_gen_extu_i32_i64(flag, cpu_CF);
895     tcg_gen_add_i64(dest, t0, t1);
896     tcg_gen_add_i64(dest, dest, flag);
897 
898     if (!sf) {
899         tcg_gen_ext32u_i64(dest, dest);
900     }
901 }
902 
903 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
904 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
905 {
906     if (sf) {
907         TCGv_i64 result = tcg_temp_new_i64();
908         TCGv_i64 cf_64 = tcg_temp_new_i64();
909         TCGv_i64 vf_64 = tcg_temp_new_i64();
910         TCGv_i64 tmp = tcg_temp_new_i64();
911         TCGv_i64 zero = tcg_constant_i64(0);
912 
913         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
914         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
915         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
916         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
917         gen_set_NZ64(result);
918 
919         tcg_gen_xor_i64(vf_64, result, t0);
920         tcg_gen_xor_i64(tmp, t0, t1);
921         tcg_gen_andc_i64(vf_64, vf_64, tmp);
922         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
923 
924         tcg_gen_mov_i64(dest, result);
925     } else {
926         TCGv_i32 t0_32 = tcg_temp_new_i32();
927         TCGv_i32 t1_32 = tcg_temp_new_i32();
928         TCGv_i32 tmp = tcg_temp_new_i32();
929         TCGv_i32 zero = tcg_constant_i32(0);
930 
931         tcg_gen_extrl_i64_i32(t0_32, t0);
932         tcg_gen_extrl_i64_i32(t1_32, t1);
933         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
934         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
935 
936         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
937         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
938         tcg_gen_xor_i32(tmp, t0_32, t1_32);
939         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
940         tcg_gen_extu_i32_i64(dest, cpu_NF);
941     }
942 }
943 
944 /*
945  * Load/Store generators
946  */
947 
948 /*
949  * Store from GPR register to memory.
950  */
951 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
952                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
953                              bool iss_valid,
954                              unsigned int iss_srt,
955                              bool iss_sf, bool iss_ar)
956 {
957     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
958 
959     if (iss_valid) {
960         uint32_t syn;
961 
962         syn = syn_data_abort_with_iss(0,
963                                       (memop & MO_SIZE),
964                                       false,
965                                       iss_srt,
966                                       iss_sf,
967                                       iss_ar,
968                                       0, 0, 0, 0, 0, false);
969         disas_set_insn_syndrome(s, syn);
970     }
971 }
972 
973 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
974                       TCGv_i64 tcg_addr, MemOp memop,
975                       bool iss_valid,
976                       unsigned int iss_srt,
977                       bool iss_sf, bool iss_ar)
978 {
979     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
980                      iss_valid, iss_srt, iss_sf, iss_ar);
981 }
982 
983 /*
984  * Load from memory to GPR register
985  */
986 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
987                              MemOp memop, bool extend, int memidx,
988                              bool iss_valid, unsigned int iss_srt,
989                              bool iss_sf, bool iss_ar)
990 {
991     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
992 
993     if (extend && (memop & MO_SIGN)) {
994         g_assert((memop & MO_SIZE) <= MO_32);
995         tcg_gen_ext32u_i64(dest, dest);
996     }
997 
998     if (iss_valid) {
999         uint32_t syn;
1000 
1001         syn = syn_data_abort_with_iss(0,
1002                                       (memop & MO_SIZE),
1003                                       (memop & MO_SIGN) != 0,
1004                                       iss_srt,
1005                                       iss_sf,
1006                                       iss_ar,
1007                                       0, 0, 0, 0, 0, false);
1008         disas_set_insn_syndrome(s, syn);
1009     }
1010 }
1011 
1012 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1013                       MemOp memop, bool extend,
1014                       bool iss_valid, unsigned int iss_srt,
1015                       bool iss_sf, bool iss_ar)
1016 {
1017     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1018                      iss_valid, iss_srt, iss_sf, iss_ar);
1019 }
1020 
1021 /*
1022  * Store from FP register to memory
1023  */
1024 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1025 {
1026     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1027     TCGv_i64 tmplo = tcg_temp_new_i64();
1028 
1029     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1030 
1031     if ((mop & MO_SIZE) < MO_128) {
1032         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1033     } else {
1034         TCGv_i64 tmphi = tcg_temp_new_i64();
1035         TCGv_i128 t16 = tcg_temp_new_i128();
1036 
1037         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1038         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1039 
1040         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1041     }
1042 }
1043 
1044 /*
1045  * Load from memory to FP register
1046  */
1047 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1048 {
1049     /* This always zero-extends and writes to a full 128 bit wide vector */
1050     TCGv_i64 tmplo = tcg_temp_new_i64();
1051     TCGv_i64 tmphi = NULL;
1052 
1053     if ((mop & MO_SIZE) < MO_128) {
1054         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1055     } else {
1056         TCGv_i128 t16 = tcg_temp_new_i128();
1057 
1058         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1059 
1060         tmphi = tcg_temp_new_i64();
1061         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1062     }
1063 
1064     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1065 
1066     if (tmphi) {
1067         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1068     }
1069     clear_vec_high(s, tmphi != NULL, destidx);
1070 }
1071 
1072 /*
1073  * Vector load/store helpers.
1074  *
1075  * The principal difference between this and a FP load is that we don't
1076  * zero extend as we are filling a partial chunk of the vector register.
1077  * These functions don't support 128 bit loads/stores, which would be
1078  * normal load/store operations.
1079  *
1080  * The _i32 versions are useful when operating on 32 bit quantities
1081  * (eg for floating point single or using Neon helper functions).
1082  */
1083 
1084 /* Get value of an element within a vector register */
1085 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1086                              int element, MemOp memop)
1087 {
1088     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1089     switch ((unsigned)memop) {
1090     case MO_8:
1091         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1092         break;
1093     case MO_16:
1094         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1095         break;
1096     case MO_32:
1097         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1098         break;
1099     case MO_8|MO_SIGN:
1100         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1101         break;
1102     case MO_16|MO_SIGN:
1103         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1104         break;
1105     case MO_32|MO_SIGN:
1106         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1107         break;
1108     case MO_64:
1109     case MO_64|MO_SIGN:
1110         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1111         break;
1112     default:
1113         g_assert_not_reached();
1114     }
1115 }
1116 
1117 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1118                                  int element, MemOp memop)
1119 {
1120     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1121     switch (memop) {
1122     case MO_8:
1123         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1124         break;
1125     case MO_16:
1126         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1127         break;
1128     case MO_8|MO_SIGN:
1129         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1130         break;
1131     case MO_16|MO_SIGN:
1132         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1133         break;
1134     case MO_32:
1135     case MO_32|MO_SIGN:
1136         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1137         break;
1138     default:
1139         g_assert_not_reached();
1140     }
1141 }
1142 
1143 /* Set value of an element within a vector register */
1144 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1145                               int element, MemOp memop)
1146 {
1147     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1148     switch (memop) {
1149     case MO_8:
1150         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1151         break;
1152     case MO_16:
1153         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1154         break;
1155     case MO_32:
1156         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1157         break;
1158     case MO_64:
1159         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1160         break;
1161     default:
1162         g_assert_not_reached();
1163     }
1164 }
1165 
1166 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1167                                   int destidx, int element, MemOp memop)
1168 {
1169     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1170     switch (memop) {
1171     case MO_8:
1172         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1173         break;
1174     case MO_16:
1175         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1176         break;
1177     case MO_32:
1178         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1179         break;
1180     default:
1181         g_assert_not_reached();
1182     }
1183 }
1184 
1185 /* Store from vector register to memory */
1186 static void do_vec_st(DisasContext *s, int srcidx, int element,
1187                       TCGv_i64 tcg_addr, MemOp mop)
1188 {
1189     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1190 
1191     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1192     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1193 }
1194 
1195 /* Load from memory to vector register */
1196 static void do_vec_ld(DisasContext *s, int destidx, int element,
1197                       TCGv_i64 tcg_addr, MemOp mop)
1198 {
1199     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1200 
1201     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1202     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1203 }
1204 
1205 /* Check that FP/Neon access is enabled. If it is, return
1206  * true. If not, emit code to generate an appropriate exception,
1207  * and return false; the caller should not emit any code for
1208  * the instruction. Note that this check must happen after all
1209  * unallocated-encoding checks (otherwise the syndrome information
1210  * for the resulting exception will be incorrect).
1211  */
1212 static bool fp_access_check_only(DisasContext *s)
1213 {
1214     if (s->fp_excp_el) {
1215         assert(!s->fp_access_checked);
1216         s->fp_access_checked = true;
1217 
1218         gen_exception_insn_el(s, 0, EXCP_UDEF,
1219                               syn_fp_access_trap(1, 0xe, false, 0),
1220                               s->fp_excp_el);
1221         return false;
1222     }
1223     s->fp_access_checked = true;
1224     return true;
1225 }
1226 
1227 static bool fp_access_check(DisasContext *s)
1228 {
1229     if (!fp_access_check_only(s)) {
1230         return false;
1231     }
1232     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1233         gen_exception_insn(s, 0, EXCP_UDEF,
1234                            syn_smetrap(SME_ET_Streaming, false));
1235         return false;
1236     }
1237     return true;
1238 }
1239 
1240 /*
1241  * Check that SVE access is enabled.  If it is, return true.
1242  * If not, emit code to generate an appropriate exception and return false.
1243  * This function corresponds to CheckSVEEnabled().
1244  */
1245 bool sve_access_check(DisasContext *s)
1246 {
1247     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1248         assert(dc_isar_feature(aa64_sme, s));
1249         if (!sme_sm_enabled_check(s)) {
1250             goto fail_exit;
1251         }
1252     } else if (s->sve_excp_el) {
1253         gen_exception_insn_el(s, 0, EXCP_UDEF,
1254                               syn_sve_access_trap(), s->sve_excp_el);
1255         goto fail_exit;
1256     }
1257     s->sve_access_checked = true;
1258     return fp_access_check(s);
1259 
1260  fail_exit:
1261     /* Assert that we only raise one exception per instruction. */
1262     assert(!s->sve_access_checked);
1263     s->sve_access_checked = true;
1264     return false;
1265 }
1266 
1267 /*
1268  * Check that SME access is enabled, raise an exception if not.
1269  * Note that this function corresponds to CheckSMEAccess and is
1270  * only used directly for cpregs.
1271  */
1272 static bool sme_access_check(DisasContext *s)
1273 {
1274     if (s->sme_excp_el) {
1275         gen_exception_insn_el(s, 0, EXCP_UDEF,
1276                               syn_smetrap(SME_ET_AccessTrap, false),
1277                               s->sme_excp_el);
1278         return false;
1279     }
1280     return true;
1281 }
1282 
1283 /* This function corresponds to CheckSMEEnabled. */
1284 bool sme_enabled_check(DisasContext *s)
1285 {
1286     /*
1287      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1288      * to be zero when fp_excp_el has priority.  This is because we need
1289      * sme_excp_el by itself for cpregs access checks.
1290      */
1291     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1292         s->fp_access_checked = true;
1293         return sme_access_check(s);
1294     }
1295     return fp_access_check_only(s);
1296 }
1297 
1298 /* Common subroutine for CheckSMEAnd*Enabled. */
1299 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1300 {
1301     if (!sme_enabled_check(s)) {
1302         return false;
1303     }
1304     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1305         gen_exception_insn(s, 0, EXCP_UDEF,
1306                            syn_smetrap(SME_ET_NotStreaming, false));
1307         return false;
1308     }
1309     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1310         gen_exception_insn(s, 0, EXCP_UDEF,
1311                            syn_smetrap(SME_ET_InactiveZA, false));
1312         return false;
1313     }
1314     return true;
1315 }
1316 
1317 /*
1318  * This utility function is for doing register extension with an
1319  * optional shift. You will likely want to pass a temporary for the
1320  * destination register. See DecodeRegExtend() in the ARM ARM.
1321  */
1322 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1323                               int option, unsigned int shift)
1324 {
1325     int extsize = extract32(option, 0, 2);
1326     bool is_signed = extract32(option, 2, 1);
1327 
1328     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1329     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1330 }
1331 
1332 static inline void gen_check_sp_alignment(DisasContext *s)
1333 {
1334     /* The AArch64 architecture mandates that (if enabled via PSTATE
1335      * or SCTLR bits) there is a check that SP is 16-aligned on every
1336      * SP-relative load or store (with an exception generated if it is not).
1337      * In line with general QEMU practice regarding misaligned accesses,
1338      * we omit these checks for the sake of guest program performance.
1339      * This function is provided as a hook so we can more easily add these
1340      * checks in future (possibly as a "favour catching guest program bugs
1341      * over speed" user selectable option).
1342      */
1343 }
1344 
1345 /*
1346  * This provides a simple table based table lookup decoder. It is
1347  * intended to be used when the relevant bits for decode are too
1348  * awkwardly placed and switch/if based logic would be confusing and
1349  * deeply nested. Since it's a linear search through the table, tables
1350  * should be kept small.
1351  *
1352  * It returns the first handler where insn & mask == pattern, or
1353  * NULL if there is no match.
1354  * The table is terminated by an empty mask (i.e. 0)
1355  */
1356 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1357                                                uint32_t insn)
1358 {
1359     const AArch64DecodeTable *tptr = table;
1360 
1361     while (tptr->mask) {
1362         if ((insn & tptr->mask) == tptr->pattern) {
1363             return tptr->disas_fn;
1364         }
1365         tptr++;
1366     }
1367     return NULL;
1368 }
1369 
1370 /*
1371  * The instruction disassembly implemented here matches
1372  * the instruction encoding classifications in chapter C4
1373  * of the ARM Architecture Reference Manual (DDI0487B_a);
1374  * classification names and decode diagrams here should generally
1375  * match up with those in the manual.
1376  */
1377 
1378 static bool trans_B(DisasContext *s, arg_i *a)
1379 {
1380     reset_btype(s);
1381     gen_goto_tb(s, 0, a->imm);
1382     return true;
1383 }
1384 
1385 static bool trans_BL(DisasContext *s, arg_i *a)
1386 {
1387     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1388     reset_btype(s);
1389     gen_goto_tb(s, 0, a->imm);
1390     return true;
1391 }
1392 
1393 
1394 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1395 {
1396     DisasLabel match;
1397     TCGv_i64 tcg_cmp;
1398 
1399     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1400     reset_btype(s);
1401 
1402     match = gen_disas_label(s);
1403     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1404                         tcg_cmp, 0, match.label);
1405     gen_goto_tb(s, 0, 4);
1406     set_disas_label(s, match);
1407     gen_goto_tb(s, 1, a->imm);
1408     return true;
1409 }
1410 
1411 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1412 {
1413     DisasLabel match;
1414     TCGv_i64 tcg_cmp;
1415 
1416     tcg_cmp = tcg_temp_new_i64();
1417     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1418 
1419     reset_btype(s);
1420 
1421     match = gen_disas_label(s);
1422     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1423                         tcg_cmp, 0, match.label);
1424     gen_goto_tb(s, 0, 4);
1425     set_disas_label(s, match);
1426     gen_goto_tb(s, 1, a->imm);
1427     return true;
1428 }
1429 
1430 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1431 {
1432     /* BC.cond is only present with FEAT_HBC */
1433     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1434         return false;
1435     }
1436     reset_btype(s);
1437     if (a->cond < 0x0e) {
1438         /* genuinely conditional branches */
1439         DisasLabel match = gen_disas_label(s);
1440         arm_gen_test_cc(a->cond, match.label);
1441         gen_goto_tb(s, 0, 4);
1442         set_disas_label(s, match);
1443         gen_goto_tb(s, 1, a->imm);
1444     } else {
1445         /* 0xe and 0xf are both "always" conditions */
1446         gen_goto_tb(s, 0, a->imm);
1447     }
1448     return true;
1449 }
1450 
1451 static void set_btype_for_br(DisasContext *s, int rn)
1452 {
1453     if (dc_isar_feature(aa64_bti, s)) {
1454         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1455         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1456     }
1457 }
1458 
1459 static void set_btype_for_blr(DisasContext *s)
1460 {
1461     if (dc_isar_feature(aa64_bti, s)) {
1462         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1463         set_btype(s, 2);
1464     }
1465 }
1466 
1467 static bool trans_BR(DisasContext *s, arg_r *a)
1468 {
1469     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1470     set_btype_for_br(s, a->rn);
1471     s->base.is_jmp = DISAS_JUMP;
1472     return true;
1473 }
1474 
1475 static bool trans_BLR(DisasContext *s, arg_r *a)
1476 {
1477     TCGv_i64 dst = cpu_reg(s, a->rn);
1478     TCGv_i64 lr = cpu_reg(s, 30);
1479     if (dst == lr) {
1480         TCGv_i64 tmp = tcg_temp_new_i64();
1481         tcg_gen_mov_i64(tmp, dst);
1482         dst = tmp;
1483     }
1484     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1485     gen_a64_set_pc(s, dst);
1486     set_btype_for_blr(s);
1487     s->base.is_jmp = DISAS_JUMP;
1488     return true;
1489 }
1490 
1491 static bool trans_RET(DisasContext *s, arg_r *a)
1492 {
1493     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1494     s->base.is_jmp = DISAS_JUMP;
1495     return true;
1496 }
1497 
1498 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1499                                    TCGv_i64 modifier, bool use_key_a)
1500 {
1501     TCGv_i64 truedst;
1502     /*
1503      * Return the branch target for a BRAA/RETA/etc, which is either
1504      * just the destination dst, or that value with the pauth check
1505      * done and the code removed from the high bits.
1506      */
1507     if (!s->pauth_active) {
1508         return dst;
1509     }
1510 
1511     truedst = tcg_temp_new_i64();
1512     if (use_key_a) {
1513         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1514     } else {
1515         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1516     }
1517     return truedst;
1518 }
1519 
1520 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1521 {
1522     TCGv_i64 dst;
1523 
1524     if (!dc_isar_feature(aa64_pauth, s)) {
1525         return false;
1526     }
1527 
1528     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1529     gen_a64_set_pc(s, dst);
1530     set_btype_for_br(s, a->rn);
1531     s->base.is_jmp = DISAS_JUMP;
1532     return true;
1533 }
1534 
1535 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1536 {
1537     TCGv_i64 dst, lr;
1538 
1539     if (!dc_isar_feature(aa64_pauth, s)) {
1540         return false;
1541     }
1542 
1543     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1544     lr = cpu_reg(s, 30);
1545     if (dst == lr) {
1546         TCGv_i64 tmp = tcg_temp_new_i64();
1547         tcg_gen_mov_i64(tmp, dst);
1548         dst = tmp;
1549     }
1550     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1551     gen_a64_set_pc(s, dst);
1552     set_btype_for_blr(s);
1553     s->base.is_jmp = DISAS_JUMP;
1554     return true;
1555 }
1556 
1557 static bool trans_RETA(DisasContext *s, arg_reta *a)
1558 {
1559     TCGv_i64 dst;
1560 
1561     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1562     gen_a64_set_pc(s, dst);
1563     s->base.is_jmp = DISAS_JUMP;
1564     return true;
1565 }
1566 
1567 static bool trans_BRA(DisasContext *s, arg_bra *a)
1568 {
1569     TCGv_i64 dst;
1570 
1571     if (!dc_isar_feature(aa64_pauth, s)) {
1572         return false;
1573     }
1574     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1575     gen_a64_set_pc(s, dst);
1576     set_btype_for_br(s, a->rn);
1577     s->base.is_jmp = DISAS_JUMP;
1578     return true;
1579 }
1580 
1581 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1582 {
1583     TCGv_i64 dst, lr;
1584 
1585     if (!dc_isar_feature(aa64_pauth, s)) {
1586         return false;
1587     }
1588     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1589     lr = cpu_reg(s, 30);
1590     if (dst == lr) {
1591         TCGv_i64 tmp = tcg_temp_new_i64();
1592         tcg_gen_mov_i64(tmp, dst);
1593         dst = tmp;
1594     }
1595     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1596     gen_a64_set_pc(s, dst);
1597     set_btype_for_blr(s);
1598     s->base.is_jmp = DISAS_JUMP;
1599     return true;
1600 }
1601 
1602 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1603 {
1604     TCGv_i64 dst;
1605 
1606     if (s->current_el == 0) {
1607         return false;
1608     }
1609     if (s->fgt_eret) {
1610         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1611         return true;
1612     }
1613     dst = tcg_temp_new_i64();
1614     tcg_gen_ld_i64(dst, tcg_env,
1615                    offsetof(CPUARMState, elr_el[s->current_el]));
1616 
1617     translator_io_start(&s->base);
1618 
1619     gen_helper_exception_return(tcg_env, dst);
1620     /* Must exit loop to check un-masked IRQs */
1621     s->base.is_jmp = DISAS_EXIT;
1622     return true;
1623 }
1624 
1625 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1626 {
1627     TCGv_i64 dst;
1628 
1629     if (!dc_isar_feature(aa64_pauth, s)) {
1630         return false;
1631     }
1632     if (s->current_el == 0) {
1633         return false;
1634     }
1635     /* The FGT trap takes precedence over an auth trap. */
1636     if (s->fgt_eret) {
1637         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1638         return true;
1639     }
1640     dst = tcg_temp_new_i64();
1641     tcg_gen_ld_i64(dst, tcg_env,
1642                    offsetof(CPUARMState, elr_el[s->current_el]));
1643 
1644     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1645 
1646     translator_io_start(&s->base);
1647 
1648     gen_helper_exception_return(tcg_env, dst);
1649     /* Must exit loop to check un-masked IRQs */
1650     s->base.is_jmp = DISAS_EXIT;
1651     return true;
1652 }
1653 
1654 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1655 {
1656     return true;
1657 }
1658 
1659 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1660 {
1661     /*
1662      * When running in MTTCG we don't generate jumps to the yield and
1663      * WFE helpers as it won't affect the scheduling of other vCPUs.
1664      * If we wanted to more completely model WFE/SEV so we don't busy
1665      * spin unnecessarily we would need to do something more involved.
1666      */
1667     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1668         s->base.is_jmp = DISAS_YIELD;
1669     }
1670     return true;
1671 }
1672 
1673 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1674 {
1675     s->base.is_jmp = DISAS_WFI;
1676     return true;
1677 }
1678 
1679 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1680 {
1681     /*
1682      * When running in MTTCG we don't generate jumps to the yield and
1683      * WFE helpers as it won't affect the scheduling of other vCPUs.
1684      * If we wanted to more completely model WFE/SEV so we don't busy
1685      * spin unnecessarily we would need to do something more involved.
1686      */
1687     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1688         s->base.is_jmp = DISAS_WFE;
1689     }
1690     return true;
1691 }
1692 
1693 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1694 {
1695     if (s->pauth_active) {
1696         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1697     }
1698     return true;
1699 }
1700 
1701 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1702 {
1703     if (s->pauth_active) {
1704         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1705     }
1706     return true;
1707 }
1708 
1709 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1710 {
1711     if (s->pauth_active) {
1712         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1713     }
1714     return true;
1715 }
1716 
1717 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1718 {
1719     if (s->pauth_active) {
1720         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1721     }
1722     return true;
1723 }
1724 
1725 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1726 {
1727     if (s->pauth_active) {
1728         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1729     }
1730     return true;
1731 }
1732 
1733 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1734 {
1735     /* Without RAS, we must implement this as NOP. */
1736     if (dc_isar_feature(aa64_ras, s)) {
1737         /*
1738          * QEMU does not have a source of physical SErrors,
1739          * so we are only concerned with virtual SErrors.
1740          * The pseudocode in the ARM for this case is
1741          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1742          *      AArch64.vESBOperation();
1743          * Most of the condition can be evaluated at translation time.
1744          * Test for EL2 present, and defer test for SEL2 to runtime.
1745          */
1746         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1747             gen_helper_vesb(tcg_env);
1748         }
1749     }
1750     return true;
1751 }
1752 
1753 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1754 {
1755     if (s->pauth_active) {
1756         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1757     }
1758     return true;
1759 }
1760 
1761 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1762 {
1763     if (s->pauth_active) {
1764         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1765     }
1766     return true;
1767 }
1768 
1769 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1770 {
1771     if (s->pauth_active) {
1772         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1773     }
1774     return true;
1775 }
1776 
1777 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1778 {
1779     if (s->pauth_active) {
1780         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1781     }
1782     return true;
1783 }
1784 
1785 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1786 {
1787     if (s->pauth_active) {
1788         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1789     }
1790     return true;
1791 }
1792 
1793 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1794 {
1795     if (s->pauth_active) {
1796         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1797     }
1798     return true;
1799 }
1800 
1801 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1802 {
1803     if (s->pauth_active) {
1804         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1805     }
1806     return true;
1807 }
1808 
1809 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1810 {
1811     if (s->pauth_active) {
1812         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1813     }
1814     return true;
1815 }
1816 
1817 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1818 {
1819     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1820     return true;
1821 }
1822 
1823 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1824 {
1825     /* We handle DSB and DMB the same way */
1826     TCGBar bar;
1827 
1828     switch (a->types) {
1829     case 1: /* MBReqTypes_Reads */
1830         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1831         break;
1832     case 2: /* MBReqTypes_Writes */
1833         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1834         break;
1835     default: /* MBReqTypes_All */
1836         bar = TCG_BAR_SC | TCG_MO_ALL;
1837         break;
1838     }
1839     tcg_gen_mb(bar);
1840     return true;
1841 }
1842 
1843 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1844 {
1845     /*
1846      * We need to break the TB after this insn to execute
1847      * self-modifying code correctly and also to take
1848      * any pending interrupts immediately.
1849      */
1850     reset_btype(s);
1851     gen_goto_tb(s, 0, 4);
1852     return true;
1853 }
1854 
1855 static bool trans_SB(DisasContext *s, arg_SB *a)
1856 {
1857     if (!dc_isar_feature(aa64_sb, s)) {
1858         return false;
1859     }
1860     /*
1861      * TODO: There is no speculation barrier opcode for TCG;
1862      * MB and end the TB instead.
1863      */
1864     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1865     gen_goto_tb(s, 0, 4);
1866     return true;
1867 }
1868 
1869 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1870 {
1871     if (!dc_isar_feature(aa64_condm_4, s)) {
1872         return false;
1873     }
1874     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1875     return true;
1876 }
1877 
1878 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1879 {
1880     TCGv_i32 z;
1881 
1882     if (!dc_isar_feature(aa64_condm_5, s)) {
1883         return false;
1884     }
1885 
1886     z = tcg_temp_new_i32();
1887 
1888     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1889 
1890     /*
1891      * (!C & !Z) << 31
1892      * (!(C | Z)) << 31
1893      * ~((C | Z) << 31)
1894      * ~-(C | Z)
1895      * (C | Z) - 1
1896      */
1897     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1898     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1899 
1900     /* !(Z & C) */
1901     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1902     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1903 
1904     /* (!C & Z) << 31 -> -(Z & ~C) */
1905     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1906     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1907 
1908     /* C | Z */
1909     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1910 
1911     return true;
1912 }
1913 
1914 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
1915 {
1916     if (!dc_isar_feature(aa64_condm_5, s)) {
1917         return false;
1918     }
1919 
1920     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1921     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1922 
1923     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1924     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1925 
1926     tcg_gen_movi_i32(cpu_NF, 0);
1927     tcg_gen_movi_i32(cpu_VF, 0);
1928 
1929     return true;
1930 }
1931 
1932 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
1933 {
1934     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1935         return false;
1936     }
1937     if (a->imm & 1) {
1938         set_pstate_bits(PSTATE_UAO);
1939     } else {
1940         clear_pstate_bits(PSTATE_UAO);
1941     }
1942     gen_rebuild_hflags(s);
1943     s->base.is_jmp = DISAS_TOO_MANY;
1944     return true;
1945 }
1946 
1947 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
1948 {
1949     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1950         return false;
1951     }
1952     if (a->imm & 1) {
1953         set_pstate_bits(PSTATE_PAN);
1954     } else {
1955         clear_pstate_bits(PSTATE_PAN);
1956     }
1957     gen_rebuild_hflags(s);
1958     s->base.is_jmp = DISAS_TOO_MANY;
1959     return true;
1960 }
1961 
1962 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
1963 {
1964     if (s->current_el == 0) {
1965         return false;
1966     }
1967     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
1968     s->base.is_jmp = DISAS_TOO_MANY;
1969     return true;
1970 }
1971 
1972 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
1973 {
1974     if (!dc_isar_feature(aa64_ssbs, s)) {
1975         return false;
1976     }
1977     if (a->imm & 1) {
1978         set_pstate_bits(PSTATE_SSBS);
1979     } else {
1980         clear_pstate_bits(PSTATE_SSBS);
1981     }
1982     /* Don't need to rebuild hflags since SSBS is a nop */
1983     s->base.is_jmp = DISAS_TOO_MANY;
1984     return true;
1985 }
1986 
1987 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
1988 {
1989     if (!dc_isar_feature(aa64_dit, s)) {
1990         return false;
1991     }
1992     if (a->imm & 1) {
1993         set_pstate_bits(PSTATE_DIT);
1994     } else {
1995         clear_pstate_bits(PSTATE_DIT);
1996     }
1997     /* There's no need to rebuild hflags because DIT is a nop */
1998     s->base.is_jmp = DISAS_TOO_MANY;
1999     return true;
2000 }
2001 
2002 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2003 {
2004     if (dc_isar_feature(aa64_mte, s)) {
2005         /* Full MTE is enabled -- set the TCO bit as directed. */
2006         if (a->imm & 1) {
2007             set_pstate_bits(PSTATE_TCO);
2008         } else {
2009             clear_pstate_bits(PSTATE_TCO);
2010         }
2011         gen_rebuild_hflags(s);
2012         /* Many factors, including TCO, go into MTE_ACTIVE. */
2013         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2014         return true;
2015     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2016         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2017         return true;
2018     } else {
2019         /* Insn not present */
2020         return false;
2021     }
2022 }
2023 
2024 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2025 {
2026     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2027     s->base.is_jmp = DISAS_TOO_MANY;
2028     return true;
2029 }
2030 
2031 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2032 {
2033     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2034     /* Exit the cpu loop to re-evaluate pending IRQs. */
2035     s->base.is_jmp = DISAS_UPDATE_EXIT;
2036     return true;
2037 }
2038 
2039 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2040 {
2041     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2042         return false;
2043     }
2044     if (sme_access_check(s)) {
2045         int old = s->pstate_sm | (s->pstate_za << 1);
2046         int new = a->imm * 3;
2047 
2048         if ((old ^ new) & a->mask) {
2049             /* At least one bit changes. */
2050             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2051                                 tcg_constant_i32(a->mask));
2052             s->base.is_jmp = DISAS_TOO_MANY;
2053         }
2054     }
2055     return true;
2056 }
2057 
2058 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2059 {
2060     TCGv_i32 tmp = tcg_temp_new_i32();
2061     TCGv_i32 nzcv = tcg_temp_new_i32();
2062 
2063     /* build bit 31, N */
2064     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2065     /* build bit 30, Z */
2066     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2067     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2068     /* build bit 29, C */
2069     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2070     /* build bit 28, V */
2071     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2072     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2073     /* generate result */
2074     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2075 }
2076 
2077 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2078 {
2079     TCGv_i32 nzcv = tcg_temp_new_i32();
2080 
2081     /* take NZCV from R[t] */
2082     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2083 
2084     /* bit 31, N */
2085     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2086     /* bit 30, Z */
2087     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2088     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2089     /* bit 29, C */
2090     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2091     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2092     /* bit 28, V */
2093     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2094     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2095 }
2096 
2097 static void gen_sysreg_undef(DisasContext *s, bool isread,
2098                              uint8_t op0, uint8_t op1, uint8_t op2,
2099                              uint8_t crn, uint8_t crm, uint8_t rt)
2100 {
2101     /*
2102      * Generate code to emit an UNDEF with correct syndrome
2103      * information for a failed system register access.
2104      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2105      * but if FEAT_IDST is implemented then read accesses to registers
2106      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2107      * syndrome.
2108      */
2109     uint32_t syndrome;
2110 
2111     if (isread && dc_isar_feature(aa64_ids, s) &&
2112         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2113         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2114     } else {
2115         syndrome = syn_uncategorized();
2116     }
2117     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2118 }
2119 
2120 /* MRS - move from system register
2121  * MSR (register) - move to system register
2122  * SYS
2123  * SYSL
2124  * These are all essentially the same insn in 'read' and 'write'
2125  * versions, with varying op0 fields.
2126  */
2127 static void handle_sys(DisasContext *s, bool isread,
2128                        unsigned int op0, unsigned int op1, unsigned int op2,
2129                        unsigned int crn, unsigned int crm, unsigned int rt)
2130 {
2131     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2132                                       crn, crm, op0, op1, op2);
2133     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2134     bool need_exit_tb = false;
2135     TCGv_ptr tcg_ri = NULL;
2136     TCGv_i64 tcg_rt;
2137     uint32_t syndrome;
2138 
2139     if (crn == 11 || crn == 15) {
2140         /*
2141          * Check for TIDCP trap, which must take precedence over
2142          * the UNDEF for "no such register" etc.
2143          */
2144         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2145         switch (s->current_el) {
2146         case 0:
2147             if (dc_isar_feature(aa64_tidcp1, s)) {
2148                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2149             }
2150             break;
2151         case 1:
2152             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2153             break;
2154         }
2155     }
2156 
2157     if (!ri) {
2158         /* Unknown register; this might be a guest error or a QEMU
2159          * unimplemented feature.
2160          */
2161         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2162                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2163                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2164         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2165         return;
2166     }
2167 
2168     /* Check access permissions */
2169     if (!cp_access_ok(s->current_el, ri, isread)) {
2170         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2171         return;
2172     }
2173 
2174     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2175         /* Emit code to perform further access permissions checks at
2176          * runtime; this may result in an exception.
2177          */
2178         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2179         gen_a64_update_pc(s, 0);
2180         tcg_ri = tcg_temp_new_ptr();
2181         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2182                                        tcg_constant_i32(key),
2183                                        tcg_constant_i32(syndrome),
2184                                        tcg_constant_i32(isread));
2185     } else if (ri->type & ARM_CP_RAISES_EXC) {
2186         /*
2187          * The readfn or writefn might raise an exception;
2188          * synchronize the CPU state in case it does.
2189          */
2190         gen_a64_update_pc(s, 0);
2191     }
2192 
2193     /* Handle special cases first */
2194     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2195     case 0:
2196         break;
2197     case ARM_CP_NOP:
2198         return;
2199     case ARM_CP_NZCV:
2200         tcg_rt = cpu_reg(s, rt);
2201         if (isread) {
2202             gen_get_nzcv(tcg_rt);
2203         } else {
2204             gen_set_nzcv(tcg_rt);
2205         }
2206         return;
2207     case ARM_CP_CURRENTEL:
2208         /* Reads as current EL value from pstate, which is
2209          * guaranteed to be constant by the tb flags.
2210          */
2211         tcg_rt = cpu_reg(s, rt);
2212         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2213         return;
2214     case ARM_CP_DC_ZVA:
2215         /* Writes clear the aligned block of memory which rt points into. */
2216         if (s->mte_active[0]) {
2217             int desc = 0;
2218 
2219             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2220             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2221             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2222 
2223             tcg_rt = tcg_temp_new_i64();
2224             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2225                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2226         } else {
2227             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2228         }
2229         gen_helper_dc_zva(tcg_env, tcg_rt);
2230         return;
2231     case ARM_CP_DC_GVA:
2232         {
2233             TCGv_i64 clean_addr, tag;
2234 
2235             /*
2236              * DC_GVA, like DC_ZVA, requires that we supply the original
2237              * pointer for an invalid page.  Probe that address first.
2238              */
2239             tcg_rt = cpu_reg(s, rt);
2240             clean_addr = clean_data_tbi(s, tcg_rt);
2241             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2242 
2243             if (s->ata[0]) {
2244                 /* Extract the tag from the register to match STZGM.  */
2245                 tag = tcg_temp_new_i64();
2246                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2247                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2248             }
2249         }
2250         return;
2251     case ARM_CP_DC_GZVA:
2252         {
2253             TCGv_i64 clean_addr, tag;
2254 
2255             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2256             tcg_rt = cpu_reg(s, rt);
2257             clean_addr = clean_data_tbi(s, tcg_rt);
2258             gen_helper_dc_zva(tcg_env, clean_addr);
2259 
2260             if (s->ata[0]) {
2261                 /* Extract the tag from the register to match STZGM.  */
2262                 tag = tcg_temp_new_i64();
2263                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2264                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2265             }
2266         }
2267         return;
2268     default:
2269         g_assert_not_reached();
2270     }
2271     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2272         return;
2273     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2274         return;
2275     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2276         return;
2277     }
2278 
2279     if (ri->type & ARM_CP_IO) {
2280         /* I/O operations must end the TB here (whether read or write) */
2281         need_exit_tb = translator_io_start(&s->base);
2282     }
2283 
2284     tcg_rt = cpu_reg(s, rt);
2285 
2286     if (isread) {
2287         if (ri->type & ARM_CP_CONST) {
2288             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2289         } else if (ri->readfn) {
2290             if (!tcg_ri) {
2291                 tcg_ri = gen_lookup_cp_reg(key);
2292             }
2293             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2294         } else {
2295             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2296         }
2297     } else {
2298         if (ri->type & ARM_CP_CONST) {
2299             /* If not forbidden by access permissions, treat as WI */
2300             return;
2301         } else if (ri->writefn) {
2302             if (!tcg_ri) {
2303                 tcg_ri = gen_lookup_cp_reg(key);
2304             }
2305             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2306         } else {
2307             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2308         }
2309     }
2310 
2311     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2312         /*
2313          * A write to any coprocessor register that ends a TB
2314          * must rebuild the hflags for the next TB.
2315          */
2316         gen_rebuild_hflags(s);
2317         /*
2318          * We default to ending the TB on a coprocessor register write,
2319          * but allow this to be suppressed by the register definition
2320          * (usually only necessary to work around guest bugs).
2321          */
2322         need_exit_tb = true;
2323     }
2324     if (need_exit_tb) {
2325         s->base.is_jmp = DISAS_UPDATE_EXIT;
2326     }
2327 }
2328 
2329 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2330 {
2331     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2332     return true;
2333 }
2334 
2335 static bool trans_SVC(DisasContext *s, arg_i *a)
2336 {
2337     /*
2338      * For SVC, HVC and SMC we advance the single-step state
2339      * machine before taking the exception. This is architecturally
2340      * mandated, to ensure that single-stepping a system call
2341      * instruction works properly.
2342      */
2343     uint32_t syndrome = syn_aa64_svc(a->imm);
2344     if (s->fgt_svc) {
2345         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2346         return true;
2347     }
2348     gen_ss_advance(s);
2349     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2350     return true;
2351 }
2352 
2353 static bool trans_HVC(DisasContext *s, arg_i *a)
2354 {
2355     int target_el = s->current_el == 3 ? 3 : 2;
2356 
2357     if (s->current_el == 0) {
2358         unallocated_encoding(s);
2359         return true;
2360     }
2361     /*
2362      * The pre HVC helper handles cases when HVC gets trapped
2363      * as an undefined insn by runtime configuration.
2364      */
2365     gen_a64_update_pc(s, 0);
2366     gen_helper_pre_hvc(tcg_env);
2367     /* Architecture requires ss advance before we do the actual work */
2368     gen_ss_advance(s);
2369     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2370     return true;
2371 }
2372 
2373 static bool trans_SMC(DisasContext *s, arg_i *a)
2374 {
2375     if (s->current_el == 0) {
2376         unallocated_encoding(s);
2377         return true;
2378     }
2379     gen_a64_update_pc(s, 0);
2380     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2381     /* Architecture requires ss advance before we do the actual work */
2382     gen_ss_advance(s);
2383     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2384     return true;
2385 }
2386 
2387 static bool trans_BRK(DisasContext *s, arg_i *a)
2388 {
2389     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2390     return true;
2391 }
2392 
2393 static bool trans_HLT(DisasContext *s, arg_i *a)
2394 {
2395     /*
2396      * HLT. This has two purposes.
2397      * Architecturally, it is an external halting debug instruction.
2398      * Since QEMU doesn't implement external debug, we treat this as
2399      * it is required for halting debug disabled: it will UNDEF.
2400      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2401      */
2402     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2403         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2404     } else {
2405         unallocated_encoding(s);
2406     }
2407     return true;
2408 }
2409 
2410 /*
2411  * Load/Store exclusive instructions are implemented by remembering
2412  * the value/address loaded, and seeing if these are the same
2413  * when the store is performed. This is not actually the architecturally
2414  * mandated semantics, but it works for typical guest code sequences
2415  * and avoids having to monitor regular stores.
2416  *
2417  * The store exclusive uses the atomic cmpxchg primitives to avoid
2418  * races in multi-threaded linux-user and when MTTCG softmmu is
2419  * enabled.
2420  */
2421 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2422                                int size, bool is_pair)
2423 {
2424     int idx = get_mem_index(s);
2425     TCGv_i64 dirty_addr, clean_addr;
2426     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2427 
2428     s->is_ldex = true;
2429     dirty_addr = cpu_reg_sp(s, rn);
2430     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2431 
2432     g_assert(size <= 3);
2433     if (is_pair) {
2434         g_assert(size >= 2);
2435         if (size == 2) {
2436             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2437             if (s->be_data == MO_LE) {
2438                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2439                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2440             } else {
2441                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2442                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2443             }
2444         } else {
2445             TCGv_i128 t16 = tcg_temp_new_i128();
2446 
2447             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2448 
2449             if (s->be_data == MO_LE) {
2450                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2451                                       cpu_exclusive_high, t16);
2452             } else {
2453                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2454                                       cpu_exclusive_val, t16);
2455             }
2456             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2457             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2458         }
2459     } else {
2460         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2461         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2462     }
2463     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2464 }
2465 
2466 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2467                                 int rn, int size, int is_pair)
2468 {
2469     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2470      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2471      *     [addr] = {Rt};
2472      *     if (is_pair) {
2473      *         [addr + datasize] = {Rt2};
2474      *     }
2475      *     {Rd} = 0;
2476      * } else {
2477      *     {Rd} = 1;
2478      * }
2479      * env->exclusive_addr = -1;
2480      */
2481     TCGLabel *fail_label = gen_new_label();
2482     TCGLabel *done_label = gen_new_label();
2483     TCGv_i64 tmp, clean_addr;
2484     MemOp memop;
2485 
2486     /*
2487      * FIXME: We are out of spec here.  We have recorded only the address
2488      * from load_exclusive, not the entire range, and we assume that the
2489      * size of the access on both sides match.  The architecture allows the
2490      * store to be smaller than the load, so long as the stored bytes are
2491      * within the range recorded by the load.
2492      */
2493 
2494     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2495     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2496     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2497 
2498     /*
2499      * The write, and any associated faults, only happen if the virtual
2500      * and physical addresses pass the exclusive monitor check.  These
2501      * faults are exceedingly unlikely, because normally the guest uses
2502      * the exact same address register for the load_exclusive, and we
2503      * would have recognized these faults there.
2504      *
2505      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2506      * unaligned 4-byte write within the range of an aligned 8-byte load.
2507      * With LSE2, the store would need to cross a 16-byte boundary when the
2508      * load did not, which would mean the store is outside the range
2509      * recorded for the monitor, which would have failed a corrected monitor
2510      * check above.  For now, we assume no size change and retain the
2511      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2512      *
2513      * It is possible to trigger an MTE fault, by performing the load with
2514      * a virtual address with a valid tag and performing the store with the
2515      * same virtual address and a different invalid tag.
2516      */
2517     memop = size + is_pair;
2518     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2519         memop |= MO_ALIGN;
2520     }
2521     memop = finalize_memop(s, memop);
2522     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2523 
2524     tmp = tcg_temp_new_i64();
2525     if (is_pair) {
2526         if (size == 2) {
2527             if (s->be_data == MO_LE) {
2528                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2529             } else {
2530                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2531             }
2532             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2533                                        cpu_exclusive_val, tmp,
2534                                        get_mem_index(s), memop);
2535             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2536         } else {
2537             TCGv_i128 t16 = tcg_temp_new_i128();
2538             TCGv_i128 c16 = tcg_temp_new_i128();
2539             TCGv_i64 a, b;
2540 
2541             if (s->be_data == MO_LE) {
2542                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2543                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2544                                         cpu_exclusive_high);
2545             } else {
2546                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2547                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2548                                         cpu_exclusive_val);
2549             }
2550 
2551             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2552                                         get_mem_index(s), memop);
2553 
2554             a = tcg_temp_new_i64();
2555             b = tcg_temp_new_i64();
2556             if (s->be_data == MO_LE) {
2557                 tcg_gen_extr_i128_i64(a, b, t16);
2558             } else {
2559                 tcg_gen_extr_i128_i64(b, a, t16);
2560             }
2561 
2562             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2563             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2564             tcg_gen_or_i64(tmp, a, b);
2565 
2566             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2567         }
2568     } else {
2569         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2570                                    cpu_reg(s, rt), get_mem_index(s), memop);
2571         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2572     }
2573     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2574     tcg_gen_br(done_label);
2575 
2576     gen_set_label(fail_label);
2577     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2578     gen_set_label(done_label);
2579     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2580 }
2581 
2582 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2583                                  int rn, int size)
2584 {
2585     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2586     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2587     int memidx = get_mem_index(s);
2588     TCGv_i64 clean_addr;
2589     MemOp memop;
2590 
2591     if (rn == 31) {
2592         gen_check_sp_alignment(s);
2593     }
2594     memop = check_atomic_align(s, rn, size);
2595     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2596     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2597                                memidx, memop);
2598 }
2599 
2600 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2601                                       int rn, int size)
2602 {
2603     TCGv_i64 s1 = cpu_reg(s, rs);
2604     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2605     TCGv_i64 t1 = cpu_reg(s, rt);
2606     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2607     TCGv_i64 clean_addr;
2608     int memidx = get_mem_index(s);
2609     MemOp memop;
2610 
2611     if (rn == 31) {
2612         gen_check_sp_alignment(s);
2613     }
2614 
2615     /* This is a single atomic access, despite the "pair". */
2616     memop = check_atomic_align(s, rn, size + 1);
2617     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2618 
2619     if (size == 2) {
2620         TCGv_i64 cmp = tcg_temp_new_i64();
2621         TCGv_i64 val = tcg_temp_new_i64();
2622 
2623         if (s->be_data == MO_LE) {
2624             tcg_gen_concat32_i64(val, t1, t2);
2625             tcg_gen_concat32_i64(cmp, s1, s2);
2626         } else {
2627             tcg_gen_concat32_i64(val, t2, t1);
2628             tcg_gen_concat32_i64(cmp, s2, s1);
2629         }
2630 
2631         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2632 
2633         if (s->be_data == MO_LE) {
2634             tcg_gen_extr32_i64(s1, s2, cmp);
2635         } else {
2636             tcg_gen_extr32_i64(s2, s1, cmp);
2637         }
2638     } else {
2639         TCGv_i128 cmp = tcg_temp_new_i128();
2640         TCGv_i128 val = tcg_temp_new_i128();
2641 
2642         if (s->be_data == MO_LE) {
2643             tcg_gen_concat_i64_i128(val, t1, t2);
2644             tcg_gen_concat_i64_i128(cmp, s1, s2);
2645         } else {
2646             tcg_gen_concat_i64_i128(val, t2, t1);
2647             tcg_gen_concat_i64_i128(cmp, s2, s1);
2648         }
2649 
2650         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2651 
2652         if (s->be_data == MO_LE) {
2653             tcg_gen_extr_i128_i64(s1, s2, cmp);
2654         } else {
2655             tcg_gen_extr_i128_i64(s2, s1, cmp);
2656         }
2657     }
2658 }
2659 
2660 /*
2661  * Compute the ISS.SF bit for syndrome information if an exception
2662  * is taken on a load or store. This indicates whether the instruction
2663  * is accessing a 32-bit or 64-bit register. This logic is derived
2664  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2665  */
2666 static bool ldst_iss_sf(int size, bool sign, bool ext)
2667 {
2668 
2669     if (sign) {
2670         /*
2671          * Signed loads are 64 bit results if we are not going to
2672          * do a zero-extend from 32 to 64 after the load.
2673          * (For a store, sign and ext are always false.)
2674          */
2675         return !ext;
2676     } else {
2677         /* Unsigned loads/stores work at the specified size */
2678         return size == MO_64;
2679     }
2680 }
2681 
2682 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2683 {
2684     if (a->rn == 31) {
2685         gen_check_sp_alignment(s);
2686     }
2687     if (a->lasr) {
2688         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2689     }
2690     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2691     return true;
2692 }
2693 
2694 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2695 {
2696     if (a->rn == 31) {
2697         gen_check_sp_alignment(s);
2698     }
2699     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2700     if (a->lasr) {
2701         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2702     }
2703     return true;
2704 }
2705 
2706 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2707 {
2708     TCGv_i64 clean_addr;
2709     MemOp memop;
2710     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2711 
2712     /*
2713      * StoreLORelease is the same as Store-Release for QEMU, but
2714      * needs the feature-test.
2715      */
2716     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2717         return false;
2718     }
2719     /* Generate ISS for non-exclusive accesses including LASR.  */
2720     if (a->rn == 31) {
2721         gen_check_sp_alignment(s);
2722     }
2723     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2724     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2725     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2726                                 true, a->rn != 31, memop);
2727     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2728               iss_sf, a->lasr);
2729     return true;
2730 }
2731 
2732 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2733 {
2734     TCGv_i64 clean_addr;
2735     MemOp memop;
2736     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2737 
2738     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2739     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2740         return false;
2741     }
2742     /* Generate ISS for non-exclusive accesses including LASR.  */
2743     if (a->rn == 31) {
2744         gen_check_sp_alignment(s);
2745     }
2746     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
2747     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2748                                 false, a->rn != 31, memop);
2749     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
2750               a->rt, iss_sf, a->lasr);
2751     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2752     return true;
2753 }
2754 
2755 static bool trans_STXP(DisasContext *s, arg_stxr *a)
2756 {
2757     if (a->rn == 31) {
2758         gen_check_sp_alignment(s);
2759     }
2760     if (a->lasr) {
2761         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2762     }
2763     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
2764     return true;
2765 }
2766 
2767 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
2768 {
2769     if (a->rn == 31) {
2770         gen_check_sp_alignment(s);
2771     }
2772     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
2773     if (a->lasr) {
2774         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2775     }
2776     return true;
2777 }
2778 
2779 static bool trans_CASP(DisasContext *s, arg_CASP *a)
2780 {
2781     if (!dc_isar_feature(aa64_atomics, s)) {
2782         return false;
2783     }
2784     if (((a->rt | a->rs) & 1) != 0) {
2785         return false;
2786     }
2787 
2788     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
2789     return true;
2790 }
2791 
2792 static bool trans_CAS(DisasContext *s, arg_CAS *a)
2793 {
2794     if (!dc_isar_feature(aa64_atomics, s)) {
2795         return false;
2796     }
2797     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
2798     return true;
2799 }
2800 
2801 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
2802 {
2803     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
2804     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
2805     TCGv_i64 clean_addr = tcg_temp_new_i64();
2806     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
2807 
2808     gen_pc_plus_diff(s, clean_addr, a->imm);
2809     do_gpr_ld(s, tcg_rt, clean_addr, memop,
2810               false, true, a->rt, iss_sf, false);
2811     return true;
2812 }
2813 
2814 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
2815 {
2816     /* Load register (literal), vector version */
2817     TCGv_i64 clean_addr;
2818     MemOp memop;
2819 
2820     if (!fp_access_check(s)) {
2821         return true;
2822     }
2823     memop = finalize_memop_asimd(s, a->sz);
2824     clean_addr = tcg_temp_new_i64();
2825     gen_pc_plus_diff(s, clean_addr, a->imm);
2826     do_fp_ld(s, a->rt, clean_addr, memop);
2827     return true;
2828 }
2829 
2830 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
2831                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
2832                                  uint64_t offset, bool is_store, MemOp mop)
2833 {
2834     if (a->rn == 31) {
2835         gen_check_sp_alignment(s);
2836     }
2837 
2838     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
2839     if (!a->p) {
2840         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
2841     }
2842 
2843     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
2844                                  (a->w || a->rn != 31), 2 << a->sz, mop);
2845 }
2846 
2847 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
2848                                   TCGv_i64 dirty_addr, uint64_t offset)
2849 {
2850     if (a->w) {
2851         if (a->p) {
2852             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2853         }
2854         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
2855     }
2856 }
2857 
2858 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
2859 {
2860     uint64_t offset = a->imm << a->sz;
2861     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
2862     MemOp mop = finalize_memop(s, a->sz);
2863 
2864     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
2865     tcg_rt = cpu_reg(s, a->rt);
2866     tcg_rt2 = cpu_reg(s, a->rt2);
2867     /*
2868      * We built mop above for the single logical access -- rebuild it
2869      * now for the paired operation.
2870      *
2871      * With LSE2, non-sign-extending pairs are treated atomically if
2872      * aligned, and if unaligned one of the pair will be completely
2873      * within a 16-byte block and that element will be atomic.
2874      * Otherwise each element is separately atomic.
2875      * In all cases, issue one operation with the correct atomicity.
2876      */
2877     mop = a->sz + 1;
2878     if (s->align_mem) {
2879         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2880     }
2881     mop = finalize_memop_pair(s, mop);
2882     if (a->sz == 2) {
2883         TCGv_i64 tmp = tcg_temp_new_i64();
2884 
2885         if (s->be_data == MO_LE) {
2886             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
2887         } else {
2888             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
2889         }
2890         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
2891     } else {
2892         TCGv_i128 tmp = tcg_temp_new_i128();
2893 
2894         if (s->be_data == MO_LE) {
2895             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
2896         } else {
2897             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
2898         }
2899         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
2900     }
2901     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2902     return true;
2903 }
2904 
2905 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
2906 {
2907     uint64_t offset = a->imm << a->sz;
2908     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
2909     MemOp mop = finalize_memop(s, a->sz);
2910 
2911     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
2912     tcg_rt = cpu_reg(s, a->rt);
2913     tcg_rt2 = cpu_reg(s, a->rt2);
2914 
2915     /*
2916      * We built mop above for the single logical access -- rebuild it
2917      * now for the paired operation.
2918      *
2919      * With LSE2, non-sign-extending pairs are treated atomically if
2920      * aligned, and if unaligned one of the pair will be completely
2921      * within a 16-byte block and that element will be atomic.
2922      * Otherwise each element is separately atomic.
2923      * In all cases, issue one operation with the correct atomicity.
2924      *
2925      * This treats sign-extending loads like zero-extending loads,
2926      * since that reuses the most code below.
2927      */
2928     mop = a->sz + 1;
2929     if (s->align_mem) {
2930         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2931     }
2932     mop = finalize_memop_pair(s, mop);
2933     if (a->sz == 2) {
2934         int o2 = s->be_data == MO_LE ? 32 : 0;
2935         int o1 = o2 ^ 32;
2936 
2937         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
2938         if (a->sign) {
2939             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
2940             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
2941         } else {
2942             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
2943             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
2944         }
2945     } else {
2946         TCGv_i128 tmp = tcg_temp_new_i128();
2947 
2948         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
2949         if (s->be_data == MO_LE) {
2950             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
2951         } else {
2952             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
2953         }
2954     }
2955     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2956     return true;
2957 }
2958 
2959 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
2960 {
2961     uint64_t offset = a->imm << a->sz;
2962     TCGv_i64 clean_addr, dirty_addr;
2963     MemOp mop;
2964 
2965     if (!fp_access_check(s)) {
2966         return true;
2967     }
2968 
2969     /* LSE2 does not merge FP pairs; leave these as separate operations. */
2970     mop = finalize_memop_asimd(s, a->sz);
2971     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
2972     do_fp_st(s, a->rt, clean_addr, mop);
2973     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
2974     do_fp_st(s, a->rt2, clean_addr, mop);
2975     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2976     return true;
2977 }
2978 
2979 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
2980 {
2981     uint64_t offset = a->imm << a->sz;
2982     TCGv_i64 clean_addr, dirty_addr;
2983     MemOp mop;
2984 
2985     if (!fp_access_check(s)) {
2986         return true;
2987     }
2988 
2989     /* LSE2 does not merge FP pairs; leave these as separate operations. */
2990     mop = finalize_memop_asimd(s, a->sz);
2991     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
2992     do_fp_ld(s, a->rt, clean_addr, mop);
2993     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
2994     do_fp_ld(s, a->rt2, clean_addr, mop);
2995     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2996     return true;
2997 }
2998 
2999 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3000 {
3001     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3002     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3003     MemOp mop;
3004     TCGv_i128 tmp;
3005 
3006     /* STGP only comes in one size. */
3007     tcg_debug_assert(a->sz == MO_64);
3008 
3009     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3010         return false;
3011     }
3012 
3013     if (a->rn == 31) {
3014         gen_check_sp_alignment(s);
3015     }
3016 
3017     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3018     if (!a->p) {
3019         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3020     }
3021 
3022     clean_addr = clean_data_tbi(s, dirty_addr);
3023     tcg_rt = cpu_reg(s, a->rt);
3024     tcg_rt2 = cpu_reg(s, a->rt2);
3025 
3026     /*
3027      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3028      * and one tag operation.  We implement it as one single aligned 16-byte
3029      * memory operation for convenience.  Note that the alignment ensures
3030      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3031      */
3032     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3033 
3034     tmp = tcg_temp_new_i128();
3035     if (s->be_data == MO_LE) {
3036         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3037     } else {
3038         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3039     }
3040     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3041 
3042     /* Perform the tag store, if tag access enabled. */
3043     if (s->ata[0]) {
3044         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3045             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3046         } else {
3047             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3048         }
3049     }
3050 
3051     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3052     return true;
3053 }
3054 
3055 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3056                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3057                                  uint64_t offset, bool is_store, MemOp mop)
3058 {
3059     int memidx;
3060 
3061     if (a->rn == 31) {
3062         gen_check_sp_alignment(s);
3063     }
3064 
3065     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3066     if (!a->p) {
3067         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3068     }
3069     memidx = get_a64_user_mem_index(s, a->unpriv);
3070     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3071                                         a->w || a->rn != 31,
3072                                         mop, a->unpriv, memidx);
3073 }
3074 
3075 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3076                                   TCGv_i64 dirty_addr, uint64_t offset)
3077 {
3078     if (a->w) {
3079         if (a->p) {
3080             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3081         }
3082         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3083     }
3084 }
3085 
3086 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3087 {
3088     bool iss_sf, iss_valid = !a->w;
3089     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3090     int memidx = get_a64_user_mem_index(s, a->unpriv);
3091     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3092 
3093     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3094 
3095     tcg_rt = cpu_reg(s, a->rt);
3096     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3097 
3098     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3099                      iss_valid, a->rt, iss_sf, false);
3100     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3101     return true;
3102 }
3103 
3104 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3105 {
3106     bool iss_sf, iss_valid = !a->w;
3107     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3108     int memidx = get_a64_user_mem_index(s, a->unpriv);
3109     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3110 
3111     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3112 
3113     tcg_rt = cpu_reg(s, a->rt);
3114     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3115 
3116     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3117                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3118     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3119     return true;
3120 }
3121 
3122 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3123 {
3124     TCGv_i64 clean_addr, dirty_addr;
3125     MemOp mop;
3126 
3127     if (!fp_access_check(s)) {
3128         return true;
3129     }
3130     mop = finalize_memop_asimd(s, a->sz);
3131     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3132     do_fp_st(s, a->rt, clean_addr, mop);
3133     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3134     return true;
3135 }
3136 
3137 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3138 {
3139     TCGv_i64 clean_addr, dirty_addr;
3140     MemOp mop;
3141 
3142     if (!fp_access_check(s)) {
3143         return true;
3144     }
3145     mop = finalize_memop_asimd(s, a->sz);
3146     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3147     do_fp_ld(s, a->rt, clean_addr, mop);
3148     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3149     return true;
3150 }
3151 
3152 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3153                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3154                              bool is_store, MemOp memop)
3155 {
3156     TCGv_i64 tcg_rm;
3157 
3158     if (a->rn == 31) {
3159         gen_check_sp_alignment(s);
3160     }
3161     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3162 
3163     tcg_rm = read_cpu_reg(s, a->rm, 1);
3164     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3165 
3166     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3167     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3168 }
3169 
3170 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3171 {
3172     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3173     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3174     MemOp memop;
3175 
3176     if (extract32(a->opt, 1, 1) == 0) {
3177         return false;
3178     }
3179 
3180     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3181     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3182     tcg_rt = cpu_reg(s, a->rt);
3183     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3184               a->ext, true, a->rt, iss_sf, false);
3185     return true;
3186 }
3187 
3188 static bool trans_STR(DisasContext *s, arg_ldst *a)
3189 {
3190     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3191     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3192     MemOp memop;
3193 
3194     if (extract32(a->opt, 1, 1) == 0) {
3195         return false;
3196     }
3197 
3198     memop = finalize_memop(s, a->sz);
3199     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3200     tcg_rt = cpu_reg(s, a->rt);
3201     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3202     return true;
3203 }
3204 
3205 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3206 {
3207     TCGv_i64 clean_addr, dirty_addr;
3208     MemOp memop;
3209 
3210     if (extract32(a->opt, 1, 1) == 0) {
3211         return false;
3212     }
3213 
3214     if (!fp_access_check(s)) {
3215         return true;
3216     }
3217 
3218     memop = finalize_memop_asimd(s, a->sz);
3219     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3220     do_fp_ld(s, a->rt, clean_addr, memop);
3221     return true;
3222 }
3223 
3224 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3225 {
3226     TCGv_i64 clean_addr, dirty_addr;
3227     MemOp memop;
3228 
3229     if (extract32(a->opt, 1, 1) == 0) {
3230         return false;
3231     }
3232 
3233     if (!fp_access_check(s)) {
3234         return true;
3235     }
3236 
3237     memop = finalize_memop_asimd(s, a->sz);
3238     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3239     do_fp_st(s, a->rt, clean_addr, memop);
3240     return true;
3241 }
3242 
3243 
3244 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3245                          int sign, bool invert)
3246 {
3247     MemOp mop = a->sz | sign;
3248     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3249 
3250     if (a->rn == 31) {
3251         gen_check_sp_alignment(s);
3252     }
3253     mop = check_atomic_align(s, a->rn, mop);
3254     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3255                                 a->rn != 31, mop);
3256     tcg_rs = read_cpu_reg(s, a->rs, true);
3257     tcg_rt = cpu_reg(s, a->rt);
3258     if (invert) {
3259         tcg_gen_not_i64(tcg_rs, tcg_rs);
3260     }
3261     /*
3262      * The tcg atomic primitives are all full barriers.  Therefore we
3263      * can ignore the Acquire and Release bits of this instruction.
3264      */
3265     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3266 
3267     if (mop & MO_SIGN) {
3268         switch (a->sz) {
3269         case MO_8:
3270             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3271             break;
3272         case MO_16:
3273             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3274             break;
3275         case MO_32:
3276             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3277             break;
3278         case MO_64:
3279             break;
3280         default:
3281             g_assert_not_reached();
3282         }
3283     }
3284     return true;
3285 }
3286 
3287 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3288 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3289 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3290 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3291 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3292 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3293 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3294 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3295 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3296 
3297 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3298 {
3299     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3300     TCGv_i64 clean_addr;
3301     MemOp mop;
3302 
3303     if (!dc_isar_feature(aa64_atomics, s) ||
3304         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3305         return false;
3306     }
3307     if (a->rn == 31) {
3308         gen_check_sp_alignment(s);
3309     }
3310     mop = check_atomic_align(s, a->rn, a->sz);
3311     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3312                                 a->rn != 31, mop);
3313     /*
3314      * LDAPR* are a special case because they are a simple load, not a
3315      * fetch-and-do-something op.
3316      * The architectural consistency requirements here are weaker than
3317      * full load-acquire (we only need "load-acquire processor consistent"),
3318      * but we choose to implement them as full LDAQ.
3319      */
3320     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3321               true, a->rt, iss_sf, true);
3322     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3323     return true;
3324 }
3325 
3326 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3327 {
3328     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3329     MemOp memop;
3330 
3331     /* Load with pointer authentication */
3332     if (!dc_isar_feature(aa64_pauth, s)) {
3333         return false;
3334     }
3335 
3336     if (a->rn == 31) {
3337         gen_check_sp_alignment(s);
3338     }
3339     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3340 
3341     if (s->pauth_active) {
3342         if (!a->m) {
3343             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3344                                       tcg_constant_i64(0));
3345         } else {
3346             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3347                                       tcg_constant_i64(0));
3348         }
3349     }
3350 
3351     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3352 
3353     memop = finalize_memop(s, MO_64);
3354 
3355     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3356     clean_addr = gen_mte_check1(s, dirty_addr, false,
3357                                 a->w || a->rn != 31, memop);
3358 
3359     tcg_rt = cpu_reg(s, a->rt);
3360     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3361               /* extend */ false, /* iss_valid */ !a->w,
3362               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3363 
3364     if (a->w) {
3365         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3366     }
3367     return true;
3368 }
3369 
3370 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3371 {
3372     TCGv_i64 clean_addr, dirty_addr;
3373     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3374     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3375 
3376     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3377         return false;
3378     }
3379 
3380     if (a->rn == 31) {
3381         gen_check_sp_alignment(s);
3382     }
3383 
3384     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3385     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3386     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3387     clean_addr = clean_data_tbi(s, dirty_addr);
3388 
3389     /*
3390      * Load-AcquirePC semantics; we implement as the slightly more
3391      * restrictive Load-Acquire.
3392      */
3393     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3394               a->rt, iss_sf, true);
3395     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3396     return true;
3397 }
3398 
3399 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3400 {
3401     TCGv_i64 clean_addr, dirty_addr;
3402     MemOp mop = a->sz;
3403     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3404 
3405     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3406         return false;
3407     }
3408 
3409     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3410 
3411     if (a->rn == 31) {
3412         gen_check_sp_alignment(s);
3413     }
3414 
3415     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3416     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3417     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3418     clean_addr = clean_data_tbi(s, dirty_addr);
3419 
3420     /* Store-Release semantics */
3421     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3422     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3423     return true;
3424 }
3425 
3426 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3427 {
3428     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3429     MemOp endian, align, mop;
3430 
3431     int total;    /* total bytes */
3432     int elements; /* elements per vector */
3433     int r;
3434     int size = a->sz;
3435 
3436     if (!a->p && a->rm != 0) {
3437         /* For non-postindexed accesses the Rm field must be 0 */
3438         return false;
3439     }
3440     if (size == 3 && !a->q && a->selem != 1) {
3441         return false;
3442     }
3443     if (!fp_access_check(s)) {
3444         return true;
3445     }
3446 
3447     if (a->rn == 31) {
3448         gen_check_sp_alignment(s);
3449     }
3450 
3451     /* For our purposes, bytes are always little-endian.  */
3452     endian = s->be_data;
3453     if (size == 0) {
3454         endian = MO_LE;
3455     }
3456 
3457     total = a->rpt * a->selem * (a->q ? 16 : 8);
3458     tcg_rn = cpu_reg_sp(s, a->rn);
3459 
3460     /*
3461      * Issue the MTE check vs the logical repeat count, before we
3462      * promote consecutive little-endian elements below.
3463      */
3464     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3465                                 finalize_memop_asimd(s, size));
3466 
3467     /*
3468      * Consecutive little-endian elements from a single register
3469      * can be promoted to a larger little-endian operation.
3470      */
3471     align = MO_ALIGN;
3472     if (a->selem == 1 && endian == MO_LE) {
3473         align = pow2_align(size);
3474         size = 3;
3475     }
3476     if (!s->align_mem) {
3477         align = 0;
3478     }
3479     mop = endian | size | align;
3480 
3481     elements = (a->q ? 16 : 8) >> size;
3482     tcg_ebytes = tcg_constant_i64(1 << size);
3483     for (r = 0; r < a->rpt; r++) {
3484         int e;
3485         for (e = 0; e < elements; e++) {
3486             int xs;
3487             for (xs = 0; xs < a->selem; xs++) {
3488                 int tt = (a->rt + r + xs) % 32;
3489                 do_vec_ld(s, tt, e, clean_addr, mop);
3490                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3491             }
3492         }
3493     }
3494 
3495     /*
3496      * For non-quad operations, setting a slice of the low 64 bits of
3497      * the register clears the high 64 bits (in the ARM ARM pseudocode
3498      * this is implicit in the fact that 'rval' is a 64 bit wide
3499      * variable).  For quad operations, we might still need to zero
3500      * the high bits of SVE.
3501      */
3502     for (r = 0; r < a->rpt * a->selem; r++) {
3503         int tt = (a->rt + r) % 32;
3504         clear_vec_high(s, a->q, tt);
3505     }
3506 
3507     if (a->p) {
3508         if (a->rm == 31) {
3509             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3510         } else {
3511             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3512         }
3513     }
3514     return true;
3515 }
3516 
3517 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3518 {
3519     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3520     MemOp endian, align, mop;
3521 
3522     int total;    /* total bytes */
3523     int elements; /* elements per vector */
3524     int r;
3525     int size = a->sz;
3526 
3527     if (!a->p && a->rm != 0) {
3528         /* For non-postindexed accesses the Rm field must be 0 */
3529         return false;
3530     }
3531     if (size == 3 && !a->q && a->selem != 1) {
3532         return false;
3533     }
3534     if (!fp_access_check(s)) {
3535         return true;
3536     }
3537 
3538     if (a->rn == 31) {
3539         gen_check_sp_alignment(s);
3540     }
3541 
3542     /* For our purposes, bytes are always little-endian.  */
3543     endian = s->be_data;
3544     if (size == 0) {
3545         endian = MO_LE;
3546     }
3547 
3548     total = a->rpt * a->selem * (a->q ? 16 : 8);
3549     tcg_rn = cpu_reg_sp(s, a->rn);
3550 
3551     /*
3552      * Issue the MTE check vs the logical repeat count, before we
3553      * promote consecutive little-endian elements below.
3554      */
3555     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3556                                 finalize_memop_asimd(s, size));
3557 
3558     /*
3559      * Consecutive little-endian elements from a single register
3560      * can be promoted to a larger little-endian operation.
3561      */
3562     align = MO_ALIGN;
3563     if (a->selem == 1 && endian == MO_LE) {
3564         align = pow2_align(size);
3565         size = 3;
3566     }
3567     if (!s->align_mem) {
3568         align = 0;
3569     }
3570     mop = endian | size | align;
3571 
3572     elements = (a->q ? 16 : 8) >> size;
3573     tcg_ebytes = tcg_constant_i64(1 << size);
3574     for (r = 0; r < a->rpt; r++) {
3575         int e;
3576         for (e = 0; e < elements; e++) {
3577             int xs;
3578             for (xs = 0; xs < a->selem; xs++) {
3579                 int tt = (a->rt + r + xs) % 32;
3580                 do_vec_st(s, tt, e, clean_addr, mop);
3581                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3582             }
3583         }
3584     }
3585 
3586     if (a->p) {
3587         if (a->rm == 31) {
3588             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3589         } else {
3590             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3591         }
3592     }
3593     return true;
3594 }
3595 
3596 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3597 {
3598     int xs, total, rt;
3599     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3600     MemOp mop;
3601 
3602     if (!a->p && a->rm != 0) {
3603         return false;
3604     }
3605     if (!fp_access_check(s)) {
3606         return true;
3607     }
3608 
3609     if (a->rn == 31) {
3610         gen_check_sp_alignment(s);
3611     }
3612 
3613     total = a->selem << a->scale;
3614     tcg_rn = cpu_reg_sp(s, a->rn);
3615 
3616     mop = finalize_memop_asimd(s, a->scale);
3617     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3618                                 total, mop);
3619 
3620     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3621     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3622         do_vec_st(s, rt, a->index, clean_addr, mop);
3623         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3624     }
3625 
3626     if (a->p) {
3627         if (a->rm == 31) {
3628             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3629         } else {
3630             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3631         }
3632     }
3633     return true;
3634 }
3635 
3636 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3637 {
3638     int xs, total, rt;
3639     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3640     MemOp mop;
3641 
3642     if (!a->p && a->rm != 0) {
3643         return false;
3644     }
3645     if (!fp_access_check(s)) {
3646         return true;
3647     }
3648 
3649     if (a->rn == 31) {
3650         gen_check_sp_alignment(s);
3651     }
3652 
3653     total = a->selem << a->scale;
3654     tcg_rn = cpu_reg_sp(s, a->rn);
3655 
3656     mop = finalize_memop_asimd(s, a->scale);
3657     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3658                                 total, mop);
3659 
3660     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3661     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3662         do_vec_ld(s, rt, a->index, clean_addr, mop);
3663         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3664     }
3665 
3666     if (a->p) {
3667         if (a->rm == 31) {
3668             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3669         } else {
3670             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3671         }
3672     }
3673     return true;
3674 }
3675 
3676 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3677 {
3678     int xs, total, rt;
3679     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3680     MemOp mop;
3681 
3682     if (!a->p && a->rm != 0) {
3683         return false;
3684     }
3685     if (!fp_access_check(s)) {
3686         return true;
3687     }
3688 
3689     if (a->rn == 31) {
3690         gen_check_sp_alignment(s);
3691     }
3692 
3693     total = a->selem << a->scale;
3694     tcg_rn = cpu_reg_sp(s, a->rn);
3695 
3696     mop = finalize_memop_asimd(s, a->scale);
3697     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3698                                 total, mop);
3699 
3700     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3701     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3702         /* Load and replicate to all elements */
3703         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3704 
3705         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3706         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3707                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3708         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3709     }
3710 
3711     if (a->p) {
3712         if (a->rm == 31) {
3713             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3714         } else {
3715             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3716         }
3717     }
3718     return true;
3719 }
3720 
3721 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3722 {
3723     TCGv_i64 addr, clean_addr, tcg_rt;
3724     int size = 4 << s->dcz_blocksize;
3725 
3726     if (!dc_isar_feature(aa64_mte, s)) {
3727         return false;
3728     }
3729     if (s->current_el == 0) {
3730         return false;
3731     }
3732 
3733     if (a->rn == 31) {
3734         gen_check_sp_alignment(s);
3735     }
3736 
3737     addr = read_cpu_reg_sp(s, a->rn, true);
3738     tcg_gen_addi_i64(addr, addr, a->imm);
3739     tcg_rt = cpu_reg(s, a->rt);
3740 
3741     if (s->ata[0]) {
3742         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
3743     }
3744     /*
3745      * The non-tags portion of STZGM is mostly like DC_ZVA,
3746      * except the alignment happens before the access.
3747      */
3748     clean_addr = clean_data_tbi(s, addr);
3749     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3750     gen_helper_dc_zva(tcg_env, clean_addr);
3751     return true;
3752 }
3753 
3754 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
3755 {
3756     TCGv_i64 addr, clean_addr, tcg_rt;
3757 
3758     if (!dc_isar_feature(aa64_mte, s)) {
3759         return false;
3760     }
3761     if (s->current_el == 0) {
3762         return false;
3763     }
3764 
3765     if (a->rn == 31) {
3766         gen_check_sp_alignment(s);
3767     }
3768 
3769     addr = read_cpu_reg_sp(s, a->rn, true);
3770     tcg_gen_addi_i64(addr, addr, a->imm);
3771     tcg_rt = cpu_reg(s, a->rt);
3772 
3773     if (s->ata[0]) {
3774         gen_helper_stgm(tcg_env, addr, tcg_rt);
3775     } else {
3776         MMUAccessType acc = MMU_DATA_STORE;
3777         int size = 4 << s->gm_blocksize;
3778 
3779         clean_addr = clean_data_tbi(s, addr);
3780         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3781         gen_probe_access(s, clean_addr, acc, size);
3782     }
3783     return true;
3784 }
3785 
3786 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
3787 {
3788     TCGv_i64 addr, clean_addr, tcg_rt;
3789 
3790     if (!dc_isar_feature(aa64_mte, s)) {
3791         return false;
3792     }
3793     if (s->current_el == 0) {
3794         return false;
3795     }
3796 
3797     if (a->rn == 31) {
3798         gen_check_sp_alignment(s);
3799     }
3800 
3801     addr = read_cpu_reg_sp(s, a->rn, true);
3802     tcg_gen_addi_i64(addr, addr, a->imm);
3803     tcg_rt = cpu_reg(s, a->rt);
3804 
3805     if (s->ata[0]) {
3806         gen_helper_ldgm(tcg_rt, tcg_env, addr);
3807     } else {
3808         MMUAccessType acc = MMU_DATA_LOAD;
3809         int size = 4 << s->gm_blocksize;
3810 
3811         clean_addr = clean_data_tbi(s, addr);
3812         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3813         gen_probe_access(s, clean_addr, acc, size);
3814         /* The result tags are zeros.  */
3815         tcg_gen_movi_i64(tcg_rt, 0);
3816     }
3817     return true;
3818 }
3819 
3820 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
3821 {
3822     TCGv_i64 addr, clean_addr, tcg_rt;
3823 
3824     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3825         return false;
3826     }
3827 
3828     if (a->rn == 31) {
3829         gen_check_sp_alignment(s);
3830     }
3831 
3832     addr = read_cpu_reg_sp(s, a->rn, true);
3833     if (!a->p) {
3834         /* pre-index or signed offset */
3835         tcg_gen_addi_i64(addr, addr, a->imm);
3836     }
3837 
3838     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
3839     tcg_rt = cpu_reg(s, a->rt);
3840     if (s->ata[0]) {
3841         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
3842     } else {
3843         /*
3844          * Tag access disabled: we must check for aborts on the load
3845          * load from [rn+offset], and then insert a 0 tag into rt.
3846          */
3847         clean_addr = clean_data_tbi(s, addr);
3848         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
3849         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
3850     }
3851 
3852     if (a->w) {
3853         /* pre-index or post-index */
3854         if (a->p) {
3855             /* post-index */
3856             tcg_gen_addi_i64(addr, addr, a->imm);
3857         }
3858         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
3859     }
3860     return true;
3861 }
3862 
3863 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
3864 {
3865     TCGv_i64 addr, tcg_rt;
3866 
3867     if (a->rn == 31) {
3868         gen_check_sp_alignment(s);
3869     }
3870 
3871     addr = read_cpu_reg_sp(s, a->rn, true);
3872     if (!a->p) {
3873         /* pre-index or signed offset */
3874         tcg_gen_addi_i64(addr, addr, a->imm);
3875     }
3876     tcg_rt = cpu_reg_sp(s, a->rt);
3877     if (!s->ata[0]) {
3878         /*
3879          * For STG and ST2G, we need to check alignment and probe memory.
3880          * TODO: For STZG and STZ2G, we could rely on the stores below,
3881          * at least for system mode; user-only won't enforce alignment.
3882          */
3883         if (is_pair) {
3884             gen_helper_st2g_stub(tcg_env, addr);
3885         } else {
3886             gen_helper_stg_stub(tcg_env, addr);
3887         }
3888     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3889         if (is_pair) {
3890             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
3891         } else {
3892             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
3893         }
3894     } else {
3895         if (is_pair) {
3896             gen_helper_st2g(tcg_env, addr, tcg_rt);
3897         } else {
3898             gen_helper_stg(tcg_env, addr, tcg_rt);
3899         }
3900     }
3901 
3902     if (is_zero) {
3903         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
3904         TCGv_i64 zero64 = tcg_constant_i64(0);
3905         TCGv_i128 zero128 = tcg_temp_new_i128();
3906         int mem_index = get_mem_index(s);
3907         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
3908 
3909         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
3910 
3911         /* This is 1 or 2 atomic 16-byte operations. */
3912         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
3913         if (is_pair) {
3914             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
3915             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
3916         }
3917     }
3918 
3919     if (a->w) {
3920         /* pre-index or post-index */
3921         if (a->p) {
3922             /* post-index */
3923             tcg_gen_addi_i64(addr, addr, a->imm);
3924         }
3925         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
3926     }
3927     return true;
3928 }
3929 
3930 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
3931 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
3932 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
3933 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
3934 
3935 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
3936 
3937 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
3938                    bool is_setg, SetFn fn)
3939 {
3940     int memidx;
3941     uint32_t syndrome, desc = 0;
3942 
3943     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
3944         return false;
3945     }
3946 
3947     /*
3948      * UNPREDICTABLE cases: we choose to UNDEF, which allows
3949      * us to pull this check before the CheckMOPSEnabled() test
3950      * (which we do in the helper function)
3951      */
3952     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
3953         a->rd == 31 || a->rn == 31) {
3954         return false;
3955     }
3956 
3957     memidx = get_a64_user_mem_index(s, a->unpriv);
3958 
3959     /*
3960      * We pass option_a == true, matching our implementation;
3961      * we pass wrong_option == false: helper function may set that bit.
3962      */
3963     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
3964                        is_epilogue, false, true, a->rd, a->rs, a->rn);
3965 
3966     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
3967         /* We may need to do MTE tag checking, so assemble the descriptor */
3968         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
3969         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
3970         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
3971         /* SIZEM1 and ALIGN we leave 0 (byte write) */
3972     }
3973     /* The helper function always needs the memidx even with MTE disabled */
3974     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
3975 
3976     /*
3977      * The helper needs the register numbers, but since they're in
3978      * the syndrome anyway, we let it extract them from there rather
3979      * than passing in an extra three integer arguments.
3980      */
3981     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
3982     return true;
3983 }
3984 
3985 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
3986 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
3987 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
3988 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
3989 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
3990 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
3991 
3992 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
3993 
3994 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
3995 {
3996     int rmemidx, wmemidx;
3997     uint32_t syndrome, rdesc = 0, wdesc = 0;
3998     bool wunpriv = extract32(a->options, 0, 1);
3999     bool runpriv = extract32(a->options, 1, 1);
4000 
4001     /*
4002      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4003      * us to pull this check before the CheckMOPSEnabled() test
4004      * (which we do in the helper function)
4005      */
4006     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4007         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4008         return false;
4009     }
4010 
4011     rmemidx = get_a64_user_mem_index(s, runpriv);
4012     wmemidx = get_a64_user_mem_index(s, wunpriv);
4013 
4014     /*
4015      * We pass option_a == true, matching our implementation;
4016      * we pass wrong_option == false: helper function may set that bit.
4017      */
4018     syndrome = syn_mop(false, false, a->options, is_epilogue,
4019                        false, true, a->rd, a->rs, a->rn);
4020 
4021     /* If we need to do MTE tag checking, assemble the descriptors */
4022     if (s->mte_active[runpriv]) {
4023         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4024         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4025     }
4026     if (s->mte_active[wunpriv]) {
4027         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4028         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4029         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4030     }
4031     /* The helper function needs these parts of the descriptor regardless */
4032     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4033     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4034 
4035     /*
4036      * The helper needs the register numbers, but since they're in
4037      * the syndrome anyway, we let it extract them from there rather
4038      * than passing in an extra three integer arguments.
4039      */
4040     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4041        tcg_constant_i32(rdesc));
4042     return true;
4043 }
4044 
4045 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4046 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4047 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4048 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4049 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4050 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4051 
4052 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4053 
4054 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4055                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4056 {
4057     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4058     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4059     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4060 
4061     fn(tcg_rd, tcg_rn, tcg_imm);
4062     if (!a->sf) {
4063         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4064     }
4065     return true;
4066 }
4067 
4068 /*
4069  * PC-rel. addressing
4070  */
4071 
4072 static bool trans_ADR(DisasContext *s, arg_ri *a)
4073 {
4074     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4075     return true;
4076 }
4077 
4078 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4079 {
4080     int64_t offset = (int64_t)a->imm << 12;
4081 
4082     /* The page offset is ok for CF_PCREL. */
4083     offset -= s->pc_curr & 0xfff;
4084     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4085     return true;
4086 }
4087 
4088 /*
4089  * Add/subtract (immediate)
4090  */
4091 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4092 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4093 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4094 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4095 
4096 /*
4097  * Add/subtract (immediate, with tags)
4098  */
4099 
4100 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4101                                       bool sub_op)
4102 {
4103     TCGv_i64 tcg_rn, tcg_rd;
4104     int imm;
4105 
4106     imm = a->uimm6 << LOG2_TAG_GRANULE;
4107     if (sub_op) {
4108         imm = -imm;
4109     }
4110 
4111     tcg_rn = cpu_reg_sp(s, a->rn);
4112     tcg_rd = cpu_reg_sp(s, a->rd);
4113 
4114     if (s->ata[0]) {
4115         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4116                            tcg_constant_i32(imm),
4117                            tcg_constant_i32(a->uimm4));
4118     } else {
4119         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4120         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4121     }
4122     return true;
4123 }
4124 
4125 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4126 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4127 
4128 /* The input should be a value in the bottom e bits (with higher
4129  * bits zero); returns that value replicated into every element
4130  * of size e in a 64 bit integer.
4131  */
4132 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4133 {
4134     assert(e != 0);
4135     while (e < 64) {
4136         mask |= mask << e;
4137         e *= 2;
4138     }
4139     return mask;
4140 }
4141 
4142 /*
4143  * Logical (immediate)
4144  */
4145 
4146 /*
4147  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4148  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4149  * value (ie should cause a guest UNDEF exception), and true if they are
4150  * valid, in which case the decoded bit pattern is written to result.
4151  */
4152 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4153                             unsigned int imms, unsigned int immr)
4154 {
4155     uint64_t mask;
4156     unsigned e, levels, s, r;
4157     int len;
4158 
4159     assert(immn < 2 && imms < 64 && immr < 64);
4160 
4161     /* The bit patterns we create here are 64 bit patterns which
4162      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4163      * 64 bits each. Each element contains the same value: a run
4164      * of between 1 and e-1 non-zero bits, rotated within the
4165      * element by between 0 and e-1 bits.
4166      *
4167      * The element size and run length are encoded into immn (1 bit)
4168      * and imms (6 bits) as follows:
4169      * 64 bit elements: immn = 1, imms = <length of run - 1>
4170      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4171      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4172      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4173      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4174      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4175      * Notice that immn = 0, imms = 11111x is the only combination
4176      * not covered by one of the above options; this is reserved.
4177      * Further, <length of run - 1> all-ones is a reserved pattern.
4178      *
4179      * In all cases the rotation is by immr % e (and immr is 6 bits).
4180      */
4181 
4182     /* First determine the element size */
4183     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4184     if (len < 1) {
4185         /* This is the immn == 0, imms == 0x11111x case */
4186         return false;
4187     }
4188     e = 1 << len;
4189 
4190     levels = e - 1;
4191     s = imms & levels;
4192     r = immr & levels;
4193 
4194     if (s == levels) {
4195         /* <length of run - 1> mustn't be all-ones. */
4196         return false;
4197     }
4198 
4199     /* Create the value of one element: s+1 set bits rotated
4200      * by r within the element (which is e bits wide)...
4201      */
4202     mask = MAKE_64BIT_MASK(0, s + 1);
4203     if (r) {
4204         mask = (mask >> r) | (mask << (e - r));
4205         mask &= MAKE_64BIT_MASK(0, e);
4206     }
4207     /* ...then replicate the element over the whole 64 bit value */
4208     mask = bitfield_replicate(mask, e);
4209     *result = mask;
4210     return true;
4211 }
4212 
4213 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4214                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4215 {
4216     TCGv_i64 tcg_rd, tcg_rn;
4217     uint64_t imm;
4218 
4219     /* Some immediate field values are reserved. */
4220     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4221                                 extract32(a->dbm, 0, 6),
4222                                 extract32(a->dbm, 6, 6))) {
4223         return false;
4224     }
4225     if (!a->sf) {
4226         imm &= 0xffffffffull;
4227     }
4228 
4229     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4230     tcg_rn = cpu_reg(s, a->rn);
4231 
4232     fn(tcg_rd, tcg_rn, imm);
4233     if (set_cc) {
4234         gen_logic_CC(a->sf, tcg_rd);
4235     }
4236     if (!a->sf) {
4237         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4238     }
4239     return true;
4240 }
4241 
4242 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4243 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4244 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4245 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4246 
4247 /*
4248  * Move wide (immediate)
4249  */
4250 
4251 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4252 {
4253     int pos = a->hw << 4;
4254     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4255     return true;
4256 }
4257 
4258 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4259 {
4260     int pos = a->hw << 4;
4261     uint64_t imm = a->imm;
4262 
4263     imm = ~(imm << pos);
4264     if (!a->sf) {
4265         imm = (uint32_t)imm;
4266     }
4267     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4268     return true;
4269 }
4270 
4271 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4272 {
4273     int pos = a->hw << 4;
4274     TCGv_i64 tcg_rd, tcg_im;
4275 
4276     tcg_rd = cpu_reg(s, a->rd);
4277     tcg_im = tcg_constant_i64(a->imm);
4278     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4279     if (!a->sf) {
4280         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4281     }
4282     return true;
4283 }
4284 
4285 /*
4286  * Bitfield
4287  */
4288 
4289 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4290 {
4291     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4292     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4293     unsigned int bitsize = a->sf ? 64 : 32;
4294     unsigned int ri = a->immr;
4295     unsigned int si = a->imms;
4296     unsigned int pos, len;
4297 
4298     if (si >= ri) {
4299         /* Wd<s-r:0> = Wn<s:r> */
4300         len = (si - ri) + 1;
4301         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4302         if (!a->sf) {
4303             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4304         }
4305     } else {
4306         /* Wd<32+s-r,32-r> = Wn<s:0> */
4307         len = si + 1;
4308         pos = (bitsize - ri) & (bitsize - 1);
4309 
4310         if (len < ri) {
4311             /*
4312              * Sign extend the destination field from len to fill the
4313              * balance of the word.  Let the deposit below insert all
4314              * of those sign bits.
4315              */
4316             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4317             len = ri;
4318         }
4319 
4320         /*
4321          * We start with zero, and we haven't modified any bits outside
4322          * bitsize, therefore no final zero-extension is unneeded for !sf.
4323          */
4324         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4325     }
4326     return true;
4327 }
4328 
4329 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4330 {
4331     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4332     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4333     unsigned int bitsize = a->sf ? 64 : 32;
4334     unsigned int ri = a->immr;
4335     unsigned int si = a->imms;
4336     unsigned int pos, len;
4337 
4338     tcg_rd = cpu_reg(s, a->rd);
4339     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4340 
4341     if (si >= ri) {
4342         /* Wd<s-r:0> = Wn<s:r> */
4343         len = (si - ri) + 1;
4344         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4345     } else {
4346         /* Wd<32+s-r,32-r> = Wn<s:0> */
4347         len = si + 1;
4348         pos = (bitsize - ri) & (bitsize - 1);
4349         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4350     }
4351     return true;
4352 }
4353 
4354 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4355 {
4356     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4357     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4358     unsigned int bitsize = a->sf ? 64 : 32;
4359     unsigned int ri = a->immr;
4360     unsigned int si = a->imms;
4361     unsigned int pos, len;
4362 
4363     tcg_rd = cpu_reg(s, a->rd);
4364     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4365 
4366     if (si >= ri) {
4367         /* Wd<s-r:0> = Wn<s:r> */
4368         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4369         len = (si - ri) + 1;
4370         pos = 0;
4371     } else {
4372         /* Wd<32+s-r,32-r> = Wn<s:0> */
4373         len = si + 1;
4374         pos = (bitsize - ri) & (bitsize - 1);
4375     }
4376 
4377     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4378     if (!a->sf) {
4379         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4380     }
4381     return true;
4382 }
4383 
4384 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4385 {
4386     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4387 
4388     tcg_rd = cpu_reg(s, a->rd);
4389 
4390     if (unlikely(a->imm == 0)) {
4391         /*
4392          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4393          * so an extract from bit 0 is a special case.
4394          */
4395         if (a->sf) {
4396             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4397         } else {
4398             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4399         }
4400     } else {
4401         tcg_rm = cpu_reg(s, a->rm);
4402         tcg_rn = cpu_reg(s, a->rn);
4403 
4404         if (a->sf) {
4405             /* Specialization to ROR happens in EXTRACT2.  */
4406             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4407         } else {
4408             TCGv_i32 t0 = tcg_temp_new_i32();
4409 
4410             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4411             if (a->rm == a->rn) {
4412                 tcg_gen_rotri_i32(t0, t0, a->imm);
4413             } else {
4414                 TCGv_i32 t1 = tcg_temp_new_i32();
4415                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4416                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4417             }
4418             tcg_gen_extu_i32_i64(tcg_rd, t0);
4419         }
4420     }
4421     return true;
4422 }
4423 
4424 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4425  * Note that it is the caller's responsibility to ensure that the
4426  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4427  * mandated semantics for out of range shifts.
4428  */
4429 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4430                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4431 {
4432     switch (shift_type) {
4433     case A64_SHIFT_TYPE_LSL:
4434         tcg_gen_shl_i64(dst, src, shift_amount);
4435         break;
4436     case A64_SHIFT_TYPE_LSR:
4437         tcg_gen_shr_i64(dst, src, shift_amount);
4438         break;
4439     case A64_SHIFT_TYPE_ASR:
4440         if (!sf) {
4441             tcg_gen_ext32s_i64(dst, src);
4442         }
4443         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4444         break;
4445     case A64_SHIFT_TYPE_ROR:
4446         if (sf) {
4447             tcg_gen_rotr_i64(dst, src, shift_amount);
4448         } else {
4449             TCGv_i32 t0, t1;
4450             t0 = tcg_temp_new_i32();
4451             t1 = tcg_temp_new_i32();
4452             tcg_gen_extrl_i64_i32(t0, src);
4453             tcg_gen_extrl_i64_i32(t1, shift_amount);
4454             tcg_gen_rotr_i32(t0, t0, t1);
4455             tcg_gen_extu_i32_i64(dst, t0);
4456         }
4457         break;
4458     default:
4459         assert(FALSE); /* all shift types should be handled */
4460         break;
4461     }
4462 
4463     if (!sf) { /* zero extend final result */
4464         tcg_gen_ext32u_i64(dst, dst);
4465     }
4466 }
4467 
4468 /* Shift a TCGv src by immediate, put result in dst.
4469  * The shift amount must be in range (this should always be true as the
4470  * relevant instructions will UNDEF on bad shift immediates).
4471  */
4472 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4473                           enum a64_shift_type shift_type, unsigned int shift_i)
4474 {
4475     assert(shift_i < (sf ? 64 : 32));
4476 
4477     if (shift_i == 0) {
4478         tcg_gen_mov_i64(dst, src);
4479     } else {
4480         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4481     }
4482 }
4483 
4484 /* Logical (shifted register)
4485  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4486  * +----+-----+-----------+-------+---+------+--------+------+------+
4487  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4488  * +----+-----+-----------+-------+---+------+--------+------+------+
4489  */
4490 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4491 {
4492     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4493     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4494 
4495     sf = extract32(insn, 31, 1);
4496     opc = extract32(insn, 29, 2);
4497     shift_type = extract32(insn, 22, 2);
4498     invert = extract32(insn, 21, 1);
4499     rm = extract32(insn, 16, 5);
4500     shift_amount = extract32(insn, 10, 6);
4501     rn = extract32(insn, 5, 5);
4502     rd = extract32(insn, 0, 5);
4503 
4504     if (!sf && (shift_amount & (1 << 5))) {
4505         unallocated_encoding(s);
4506         return;
4507     }
4508 
4509     tcg_rd = cpu_reg(s, rd);
4510 
4511     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4512         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4513          * register-register MOV and MVN, so it is worth special casing.
4514          */
4515         tcg_rm = cpu_reg(s, rm);
4516         if (invert) {
4517             tcg_gen_not_i64(tcg_rd, tcg_rm);
4518             if (!sf) {
4519                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4520             }
4521         } else {
4522             if (sf) {
4523                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4524             } else {
4525                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4526             }
4527         }
4528         return;
4529     }
4530 
4531     tcg_rm = read_cpu_reg(s, rm, sf);
4532 
4533     if (shift_amount) {
4534         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4535     }
4536 
4537     tcg_rn = cpu_reg(s, rn);
4538 
4539     switch (opc | (invert << 2)) {
4540     case 0: /* AND */
4541     case 3: /* ANDS */
4542         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4543         break;
4544     case 1: /* ORR */
4545         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4546         break;
4547     case 2: /* EOR */
4548         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4549         break;
4550     case 4: /* BIC */
4551     case 7: /* BICS */
4552         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4553         break;
4554     case 5: /* ORN */
4555         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4556         break;
4557     case 6: /* EON */
4558         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4559         break;
4560     default:
4561         assert(FALSE);
4562         break;
4563     }
4564 
4565     if (!sf) {
4566         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4567     }
4568 
4569     if (opc == 3) {
4570         gen_logic_CC(sf, tcg_rd);
4571     }
4572 }
4573 
4574 /*
4575  * Add/subtract (extended register)
4576  *
4577  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4578  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4579  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4580  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4581  *
4582  *  sf: 0 -> 32bit, 1 -> 64bit
4583  *  op: 0 -> add  , 1 -> sub
4584  *   S: 1 -> set flags
4585  * opt: 00
4586  * option: extension type (see DecodeRegExtend)
4587  * imm3: optional shift to Rm
4588  *
4589  * Rd = Rn + LSL(extend(Rm), amount)
4590  */
4591 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4592 {
4593     int rd = extract32(insn, 0, 5);
4594     int rn = extract32(insn, 5, 5);
4595     int imm3 = extract32(insn, 10, 3);
4596     int option = extract32(insn, 13, 3);
4597     int rm = extract32(insn, 16, 5);
4598     int opt = extract32(insn, 22, 2);
4599     bool setflags = extract32(insn, 29, 1);
4600     bool sub_op = extract32(insn, 30, 1);
4601     bool sf = extract32(insn, 31, 1);
4602 
4603     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4604     TCGv_i64 tcg_rd;
4605     TCGv_i64 tcg_result;
4606 
4607     if (imm3 > 4 || opt != 0) {
4608         unallocated_encoding(s);
4609         return;
4610     }
4611 
4612     /* non-flag setting ops may use SP */
4613     if (!setflags) {
4614         tcg_rd = cpu_reg_sp(s, rd);
4615     } else {
4616         tcg_rd = cpu_reg(s, rd);
4617     }
4618     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4619 
4620     tcg_rm = read_cpu_reg(s, rm, sf);
4621     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4622 
4623     tcg_result = tcg_temp_new_i64();
4624 
4625     if (!setflags) {
4626         if (sub_op) {
4627             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4628         } else {
4629             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4630         }
4631     } else {
4632         if (sub_op) {
4633             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4634         } else {
4635             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4636         }
4637     }
4638 
4639     if (sf) {
4640         tcg_gen_mov_i64(tcg_rd, tcg_result);
4641     } else {
4642         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4643     }
4644 }
4645 
4646 /*
4647  * Add/subtract (shifted register)
4648  *
4649  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4650  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4651  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4652  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4653  *
4654  *    sf: 0 -> 32bit, 1 -> 64bit
4655  *    op: 0 -> add  , 1 -> sub
4656  *     S: 1 -> set flags
4657  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4658  *  imm6: Shift amount to apply to Rm before the add/sub
4659  */
4660 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4661 {
4662     int rd = extract32(insn, 0, 5);
4663     int rn = extract32(insn, 5, 5);
4664     int imm6 = extract32(insn, 10, 6);
4665     int rm = extract32(insn, 16, 5);
4666     int shift_type = extract32(insn, 22, 2);
4667     bool setflags = extract32(insn, 29, 1);
4668     bool sub_op = extract32(insn, 30, 1);
4669     bool sf = extract32(insn, 31, 1);
4670 
4671     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4672     TCGv_i64 tcg_rn, tcg_rm;
4673     TCGv_i64 tcg_result;
4674 
4675     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4676         unallocated_encoding(s);
4677         return;
4678     }
4679 
4680     tcg_rn = read_cpu_reg(s, rn, sf);
4681     tcg_rm = read_cpu_reg(s, rm, sf);
4682 
4683     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4684 
4685     tcg_result = tcg_temp_new_i64();
4686 
4687     if (!setflags) {
4688         if (sub_op) {
4689             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4690         } else {
4691             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4692         }
4693     } else {
4694         if (sub_op) {
4695             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4696         } else {
4697             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4698         }
4699     }
4700 
4701     if (sf) {
4702         tcg_gen_mov_i64(tcg_rd, tcg_result);
4703     } else {
4704         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4705     }
4706 }
4707 
4708 /* Data-processing (3 source)
4709  *
4710  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4711  *  +--+------+-----------+------+------+----+------+------+------+
4712  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4713  *  +--+------+-----------+------+------+----+------+------+------+
4714  */
4715 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4716 {
4717     int rd = extract32(insn, 0, 5);
4718     int rn = extract32(insn, 5, 5);
4719     int ra = extract32(insn, 10, 5);
4720     int rm = extract32(insn, 16, 5);
4721     int op_id = (extract32(insn, 29, 3) << 4) |
4722         (extract32(insn, 21, 3) << 1) |
4723         extract32(insn, 15, 1);
4724     bool sf = extract32(insn, 31, 1);
4725     bool is_sub = extract32(op_id, 0, 1);
4726     bool is_high = extract32(op_id, 2, 1);
4727     bool is_signed = false;
4728     TCGv_i64 tcg_op1;
4729     TCGv_i64 tcg_op2;
4730     TCGv_i64 tcg_tmp;
4731 
4732     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4733     switch (op_id) {
4734     case 0x42: /* SMADDL */
4735     case 0x43: /* SMSUBL */
4736     case 0x44: /* SMULH */
4737         is_signed = true;
4738         break;
4739     case 0x0: /* MADD (32bit) */
4740     case 0x1: /* MSUB (32bit) */
4741     case 0x40: /* MADD (64bit) */
4742     case 0x41: /* MSUB (64bit) */
4743     case 0x4a: /* UMADDL */
4744     case 0x4b: /* UMSUBL */
4745     case 0x4c: /* UMULH */
4746         break;
4747     default:
4748         unallocated_encoding(s);
4749         return;
4750     }
4751 
4752     if (is_high) {
4753         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4754         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4755         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4756         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4757 
4758         if (is_signed) {
4759             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4760         } else {
4761             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4762         }
4763         return;
4764     }
4765 
4766     tcg_op1 = tcg_temp_new_i64();
4767     tcg_op2 = tcg_temp_new_i64();
4768     tcg_tmp = tcg_temp_new_i64();
4769 
4770     if (op_id < 0x42) {
4771         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4772         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4773     } else {
4774         if (is_signed) {
4775             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4776             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4777         } else {
4778             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4779             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4780         }
4781     }
4782 
4783     if (ra == 31 && !is_sub) {
4784         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4785         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4786     } else {
4787         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4788         if (is_sub) {
4789             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4790         } else {
4791             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4792         }
4793     }
4794 
4795     if (!sf) {
4796         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4797     }
4798 }
4799 
4800 /* Add/subtract (with carry)
4801  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4802  * +--+--+--+------------------------+------+-------------+------+-----+
4803  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4804  * +--+--+--+------------------------+------+-------------+------+-----+
4805  */
4806 
4807 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4808 {
4809     unsigned int sf, op, setflags, rm, rn, rd;
4810     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4811 
4812     sf = extract32(insn, 31, 1);
4813     op = extract32(insn, 30, 1);
4814     setflags = extract32(insn, 29, 1);
4815     rm = extract32(insn, 16, 5);
4816     rn = extract32(insn, 5, 5);
4817     rd = extract32(insn, 0, 5);
4818 
4819     tcg_rd = cpu_reg(s, rd);
4820     tcg_rn = cpu_reg(s, rn);
4821 
4822     if (op) {
4823         tcg_y = tcg_temp_new_i64();
4824         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4825     } else {
4826         tcg_y = cpu_reg(s, rm);
4827     }
4828 
4829     if (setflags) {
4830         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4831     } else {
4832         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4833     }
4834 }
4835 
4836 /*
4837  * Rotate right into flags
4838  *  31 30 29                21       15          10      5  4      0
4839  * +--+--+--+-----------------+--------+-----------+------+--+------+
4840  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4841  * +--+--+--+-----------------+--------+-----------+------+--+------+
4842  */
4843 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4844 {
4845     int mask = extract32(insn, 0, 4);
4846     int o2 = extract32(insn, 4, 1);
4847     int rn = extract32(insn, 5, 5);
4848     int imm6 = extract32(insn, 15, 6);
4849     int sf_op_s = extract32(insn, 29, 3);
4850     TCGv_i64 tcg_rn;
4851     TCGv_i32 nzcv;
4852 
4853     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4854         unallocated_encoding(s);
4855         return;
4856     }
4857 
4858     tcg_rn = read_cpu_reg(s, rn, 1);
4859     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4860 
4861     nzcv = tcg_temp_new_i32();
4862     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4863 
4864     if (mask & 8) { /* N */
4865         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4866     }
4867     if (mask & 4) { /* Z */
4868         tcg_gen_not_i32(cpu_ZF, nzcv);
4869         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4870     }
4871     if (mask & 2) { /* C */
4872         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4873     }
4874     if (mask & 1) { /* V */
4875         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4876     }
4877 }
4878 
4879 /*
4880  * Evaluate into flags
4881  *  31 30 29                21        15   14        10      5  4      0
4882  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4883  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4884  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4885  */
4886 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4887 {
4888     int o3_mask = extract32(insn, 0, 5);
4889     int rn = extract32(insn, 5, 5);
4890     int o2 = extract32(insn, 15, 6);
4891     int sz = extract32(insn, 14, 1);
4892     int sf_op_s = extract32(insn, 29, 3);
4893     TCGv_i32 tmp;
4894     int shift;
4895 
4896     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4897         !dc_isar_feature(aa64_condm_4, s)) {
4898         unallocated_encoding(s);
4899         return;
4900     }
4901     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4902 
4903     tmp = tcg_temp_new_i32();
4904     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4905     tcg_gen_shli_i32(cpu_NF, tmp, shift);
4906     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4907     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4908     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4909 }
4910 
4911 /* Conditional compare (immediate / register)
4912  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4913  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4914  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4915  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4916  *        [1]                             y                [0]       [0]
4917  */
4918 static void disas_cc(DisasContext *s, uint32_t insn)
4919 {
4920     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4921     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4922     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4923     DisasCompare c;
4924 
4925     if (!extract32(insn, 29, 1)) {
4926         unallocated_encoding(s);
4927         return;
4928     }
4929     if (insn & (1 << 10 | 1 << 4)) {
4930         unallocated_encoding(s);
4931         return;
4932     }
4933     sf = extract32(insn, 31, 1);
4934     op = extract32(insn, 30, 1);
4935     is_imm = extract32(insn, 11, 1);
4936     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4937     cond = extract32(insn, 12, 4);
4938     rn = extract32(insn, 5, 5);
4939     nzcv = extract32(insn, 0, 4);
4940 
4941     /* Set T0 = !COND.  */
4942     tcg_t0 = tcg_temp_new_i32();
4943     arm_test_cc(&c, cond);
4944     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4945 
4946     /* Load the arguments for the new comparison.  */
4947     if (is_imm) {
4948         tcg_y = tcg_temp_new_i64();
4949         tcg_gen_movi_i64(tcg_y, y);
4950     } else {
4951         tcg_y = cpu_reg(s, y);
4952     }
4953     tcg_rn = cpu_reg(s, rn);
4954 
4955     /* Set the flags for the new comparison.  */
4956     tcg_tmp = tcg_temp_new_i64();
4957     if (op) {
4958         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4959     } else {
4960         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4961     }
4962 
4963     /* If COND was false, force the flags to #nzcv.  Compute two masks
4964      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4965      * For tcg hosts that support ANDC, we can make do with just T1.
4966      * In either case, allow the tcg optimizer to delete any unused mask.
4967      */
4968     tcg_t1 = tcg_temp_new_i32();
4969     tcg_t2 = tcg_temp_new_i32();
4970     tcg_gen_neg_i32(tcg_t1, tcg_t0);
4971     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4972 
4973     if (nzcv & 8) { /* N */
4974         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4975     } else {
4976         if (TCG_TARGET_HAS_andc_i32) {
4977             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4978         } else {
4979             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4980         }
4981     }
4982     if (nzcv & 4) { /* Z */
4983         if (TCG_TARGET_HAS_andc_i32) {
4984             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4985         } else {
4986             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4987         }
4988     } else {
4989         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4990     }
4991     if (nzcv & 2) { /* C */
4992         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4993     } else {
4994         if (TCG_TARGET_HAS_andc_i32) {
4995             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4996         } else {
4997             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4998         }
4999     }
5000     if (nzcv & 1) { /* V */
5001         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5002     } else {
5003         if (TCG_TARGET_HAS_andc_i32) {
5004             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5005         } else {
5006             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5007         }
5008     }
5009 }
5010 
5011 /* Conditional select
5012  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5013  * +----+----+---+-----------------+------+------+-----+------+------+
5014  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5015  * +----+----+---+-----------------+------+------+-----+------+------+
5016  */
5017 static void disas_cond_select(DisasContext *s, uint32_t insn)
5018 {
5019     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5020     TCGv_i64 tcg_rd, zero;
5021     DisasCompare64 c;
5022 
5023     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5024         /* S == 1 or op2<1> == 1 */
5025         unallocated_encoding(s);
5026         return;
5027     }
5028     sf = extract32(insn, 31, 1);
5029     else_inv = extract32(insn, 30, 1);
5030     rm = extract32(insn, 16, 5);
5031     cond = extract32(insn, 12, 4);
5032     else_inc = extract32(insn, 10, 1);
5033     rn = extract32(insn, 5, 5);
5034     rd = extract32(insn, 0, 5);
5035 
5036     tcg_rd = cpu_reg(s, rd);
5037 
5038     a64_test_cc(&c, cond);
5039     zero = tcg_constant_i64(0);
5040 
5041     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5042         /* CSET & CSETM.  */
5043         if (else_inv) {
5044             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
5045                                    tcg_rd, c.value, zero);
5046         } else {
5047             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
5048                                 tcg_rd, c.value, zero);
5049         }
5050     } else {
5051         TCGv_i64 t_true = cpu_reg(s, rn);
5052         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5053         if (else_inv && else_inc) {
5054             tcg_gen_neg_i64(t_false, t_false);
5055         } else if (else_inv) {
5056             tcg_gen_not_i64(t_false, t_false);
5057         } else if (else_inc) {
5058             tcg_gen_addi_i64(t_false, t_false, 1);
5059         }
5060         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5061     }
5062 
5063     if (!sf) {
5064         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5065     }
5066 }
5067 
5068 static void handle_clz(DisasContext *s, unsigned int sf,
5069                        unsigned int rn, unsigned int rd)
5070 {
5071     TCGv_i64 tcg_rd, tcg_rn;
5072     tcg_rd = cpu_reg(s, rd);
5073     tcg_rn = cpu_reg(s, rn);
5074 
5075     if (sf) {
5076         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5077     } else {
5078         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5079         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5080         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5081         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5082     }
5083 }
5084 
5085 static void handle_cls(DisasContext *s, unsigned int sf,
5086                        unsigned int rn, unsigned int rd)
5087 {
5088     TCGv_i64 tcg_rd, tcg_rn;
5089     tcg_rd = cpu_reg(s, rd);
5090     tcg_rn = cpu_reg(s, rn);
5091 
5092     if (sf) {
5093         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5094     } else {
5095         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5096         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5097         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5098         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5099     }
5100 }
5101 
5102 static void handle_rbit(DisasContext *s, unsigned int sf,
5103                         unsigned int rn, unsigned int rd)
5104 {
5105     TCGv_i64 tcg_rd, tcg_rn;
5106     tcg_rd = cpu_reg(s, rd);
5107     tcg_rn = cpu_reg(s, rn);
5108 
5109     if (sf) {
5110         gen_helper_rbit64(tcg_rd, tcg_rn);
5111     } else {
5112         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5113         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5114         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5115         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5116     }
5117 }
5118 
5119 /* REV with sf==1, opcode==3 ("REV64") */
5120 static void handle_rev64(DisasContext *s, unsigned int sf,
5121                          unsigned int rn, unsigned int rd)
5122 {
5123     if (!sf) {
5124         unallocated_encoding(s);
5125         return;
5126     }
5127     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5128 }
5129 
5130 /* REV with sf==0, opcode==2
5131  * REV32 (sf==1, opcode==2)
5132  */
5133 static void handle_rev32(DisasContext *s, unsigned int sf,
5134                          unsigned int rn, unsigned int rd)
5135 {
5136     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5137     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5138 
5139     if (sf) {
5140         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5141         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5142     } else {
5143         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5144     }
5145 }
5146 
5147 /* REV16 (opcode==1) */
5148 static void handle_rev16(DisasContext *s, unsigned int sf,
5149                          unsigned int rn, unsigned int rd)
5150 {
5151     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5152     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5153     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5154     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5155 
5156     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5157     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5158     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5159     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5160     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5161 }
5162 
5163 /* Data-processing (1 source)
5164  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5165  * +----+---+---+-----------------+---------+--------+------+------+
5166  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5167  * +----+---+---+-----------------+---------+--------+------+------+
5168  */
5169 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5170 {
5171     unsigned int sf, opcode, opcode2, rn, rd;
5172     TCGv_i64 tcg_rd;
5173 
5174     if (extract32(insn, 29, 1)) {
5175         unallocated_encoding(s);
5176         return;
5177     }
5178 
5179     sf = extract32(insn, 31, 1);
5180     opcode = extract32(insn, 10, 6);
5181     opcode2 = extract32(insn, 16, 5);
5182     rn = extract32(insn, 5, 5);
5183     rd = extract32(insn, 0, 5);
5184 
5185 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5186 
5187     switch (MAP(sf, opcode2, opcode)) {
5188     case MAP(0, 0x00, 0x00): /* RBIT */
5189     case MAP(1, 0x00, 0x00):
5190         handle_rbit(s, sf, rn, rd);
5191         break;
5192     case MAP(0, 0x00, 0x01): /* REV16 */
5193     case MAP(1, 0x00, 0x01):
5194         handle_rev16(s, sf, rn, rd);
5195         break;
5196     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5197     case MAP(1, 0x00, 0x02):
5198         handle_rev32(s, sf, rn, rd);
5199         break;
5200     case MAP(1, 0x00, 0x03): /* REV64 */
5201         handle_rev64(s, sf, rn, rd);
5202         break;
5203     case MAP(0, 0x00, 0x04): /* CLZ */
5204     case MAP(1, 0x00, 0x04):
5205         handle_clz(s, sf, rn, rd);
5206         break;
5207     case MAP(0, 0x00, 0x05): /* CLS */
5208     case MAP(1, 0x00, 0x05):
5209         handle_cls(s, sf, rn, rd);
5210         break;
5211     case MAP(1, 0x01, 0x00): /* PACIA */
5212         if (s->pauth_active) {
5213             tcg_rd = cpu_reg(s, rd);
5214             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5215         } else if (!dc_isar_feature(aa64_pauth, s)) {
5216             goto do_unallocated;
5217         }
5218         break;
5219     case MAP(1, 0x01, 0x01): /* PACIB */
5220         if (s->pauth_active) {
5221             tcg_rd = cpu_reg(s, rd);
5222             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5223         } else if (!dc_isar_feature(aa64_pauth, s)) {
5224             goto do_unallocated;
5225         }
5226         break;
5227     case MAP(1, 0x01, 0x02): /* PACDA */
5228         if (s->pauth_active) {
5229             tcg_rd = cpu_reg(s, rd);
5230             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5231         } else if (!dc_isar_feature(aa64_pauth, s)) {
5232             goto do_unallocated;
5233         }
5234         break;
5235     case MAP(1, 0x01, 0x03): /* PACDB */
5236         if (s->pauth_active) {
5237             tcg_rd = cpu_reg(s, rd);
5238             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5239         } else if (!dc_isar_feature(aa64_pauth, s)) {
5240             goto do_unallocated;
5241         }
5242         break;
5243     case MAP(1, 0x01, 0x04): /* AUTIA */
5244         if (s->pauth_active) {
5245             tcg_rd = cpu_reg(s, rd);
5246             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5247         } else if (!dc_isar_feature(aa64_pauth, s)) {
5248             goto do_unallocated;
5249         }
5250         break;
5251     case MAP(1, 0x01, 0x05): /* AUTIB */
5252         if (s->pauth_active) {
5253             tcg_rd = cpu_reg(s, rd);
5254             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5255         } else if (!dc_isar_feature(aa64_pauth, s)) {
5256             goto do_unallocated;
5257         }
5258         break;
5259     case MAP(1, 0x01, 0x06): /* AUTDA */
5260         if (s->pauth_active) {
5261             tcg_rd = cpu_reg(s, rd);
5262             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5263         } else if (!dc_isar_feature(aa64_pauth, s)) {
5264             goto do_unallocated;
5265         }
5266         break;
5267     case MAP(1, 0x01, 0x07): /* AUTDB */
5268         if (s->pauth_active) {
5269             tcg_rd = cpu_reg(s, rd);
5270             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5271         } else if (!dc_isar_feature(aa64_pauth, s)) {
5272             goto do_unallocated;
5273         }
5274         break;
5275     case MAP(1, 0x01, 0x08): /* PACIZA */
5276         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5277             goto do_unallocated;
5278         } else if (s->pauth_active) {
5279             tcg_rd = cpu_reg(s, rd);
5280             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5281         }
5282         break;
5283     case MAP(1, 0x01, 0x09): /* PACIZB */
5284         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5285             goto do_unallocated;
5286         } else if (s->pauth_active) {
5287             tcg_rd = cpu_reg(s, rd);
5288             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5289         }
5290         break;
5291     case MAP(1, 0x01, 0x0a): /* PACDZA */
5292         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5293             goto do_unallocated;
5294         } else if (s->pauth_active) {
5295             tcg_rd = cpu_reg(s, rd);
5296             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5297         }
5298         break;
5299     case MAP(1, 0x01, 0x0b): /* PACDZB */
5300         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5301             goto do_unallocated;
5302         } else if (s->pauth_active) {
5303             tcg_rd = cpu_reg(s, rd);
5304             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5305         }
5306         break;
5307     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5308         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5309             goto do_unallocated;
5310         } else if (s->pauth_active) {
5311             tcg_rd = cpu_reg(s, rd);
5312             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5313         }
5314         break;
5315     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5316         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5317             goto do_unallocated;
5318         } else if (s->pauth_active) {
5319             tcg_rd = cpu_reg(s, rd);
5320             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5321         }
5322         break;
5323     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5324         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5325             goto do_unallocated;
5326         } else if (s->pauth_active) {
5327             tcg_rd = cpu_reg(s, rd);
5328             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5329         }
5330         break;
5331     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5332         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5333             goto do_unallocated;
5334         } else if (s->pauth_active) {
5335             tcg_rd = cpu_reg(s, rd);
5336             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5337         }
5338         break;
5339     case MAP(1, 0x01, 0x10): /* XPACI */
5340         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5341             goto do_unallocated;
5342         } else if (s->pauth_active) {
5343             tcg_rd = cpu_reg(s, rd);
5344             gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
5345         }
5346         break;
5347     case MAP(1, 0x01, 0x11): /* XPACD */
5348         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5349             goto do_unallocated;
5350         } else if (s->pauth_active) {
5351             tcg_rd = cpu_reg(s, rd);
5352             gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
5353         }
5354         break;
5355     default:
5356     do_unallocated:
5357         unallocated_encoding(s);
5358         break;
5359     }
5360 
5361 #undef MAP
5362 }
5363 
5364 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5365                        unsigned int rm, unsigned int rn, unsigned int rd)
5366 {
5367     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5368     tcg_rd = cpu_reg(s, rd);
5369 
5370     if (!sf && is_signed) {
5371         tcg_n = tcg_temp_new_i64();
5372         tcg_m = tcg_temp_new_i64();
5373         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5374         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5375     } else {
5376         tcg_n = read_cpu_reg(s, rn, sf);
5377         tcg_m = read_cpu_reg(s, rm, sf);
5378     }
5379 
5380     if (is_signed) {
5381         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5382     } else {
5383         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5384     }
5385 
5386     if (!sf) { /* zero extend final result */
5387         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5388     }
5389 }
5390 
5391 /* LSLV, LSRV, ASRV, RORV */
5392 static void handle_shift_reg(DisasContext *s,
5393                              enum a64_shift_type shift_type, unsigned int sf,
5394                              unsigned int rm, unsigned int rn, unsigned int rd)
5395 {
5396     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5397     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5398     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5399 
5400     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5401     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5402 }
5403 
5404 /* CRC32[BHWX], CRC32C[BHWX] */
5405 static void handle_crc32(DisasContext *s,
5406                          unsigned int sf, unsigned int sz, bool crc32c,
5407                          unsigned int rm, unsigned int rn, unsigned int rd)
5408 {
5409     TCGv_i64 tcg_acc, tcg_val;
5410     TCGv_i32 tcg_bytes;
5411 
5412     if (!dc_isar_feature(aa64_crc32, s)
5413         || (sf == 1 && sz != 3)
5414         || (sf == 0 && sz == 3)) {
5415         unallocated_encoding(s);
5416         return;
5417     }
5418 
5419     if (sz == 3) {
5420         tcg_val = cpu_reg(s, rm);
5421     } else {
5422         uint64_t mask;
5423         switch (sz) {
5424         case 0:
5425             mask = 0xFF;
5426             break;
5427         case 1:
5428             mask = 0xFFFF;
5429             break;
5430         case 2:
5431             mask = 0xFFFFFFFF;
5432             break;
5433         default:
5434             g_assert_not_reached();
5435         }
5436         tcg_val = tcg_temp_new_i64();
5437         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5438     }
5439 
5440     tcg_acc = cpu_reg(s, rn);
5441     tcg_bytes = tcg_constant_i32(1 << sz);
5442 
5443     if (crc32c) {
5444         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5445     } else {
5446         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5447     }
5448 }
5449 
5450 /* Data-processing (2 source)
5451  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5452  * +----+---+---+-----------------+------+--------+------+------+
5453  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5454  * +----+---+---+-----------------+------+--------+------+------+
5455  */
5456 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5457 {
5458     unsigned int sf, rm, opcode, rn, rd, setflag;
5459     sf = extract32(insn, 31, 1);
5460     setflag = extract32(insn, 29, 1);
5461     rm = extract32(insn, 16, 5);
5462     opcode = extract32(insn, 10, 6);
5463     rn = extract32(insn, 5, 5);
5464     rd = extract32(insn, 0, 5);
5465 
5466     if (setflag && opcode != 0) {
5467         unallocated_encoding(s);
5468         return;
5469     }
5470 
5471     switch (opcode) {
5472     case 0: /* SUBP(S) */
5473         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5474             goto do_unallocated;
5475         } else {
5476             TCGv_i64 tcg_n, tcg_m, tcg_d;
5477 
5478             tcg_n = read_cpu_reg_sp(s, rn, true);
5479             tcg_m = read_cpu_reg_sp(s, rm, true);
5480             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5481             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5482             tcg_d = cpu_reg(s, rd);
5483 
5484             if (setflag) {
5485                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5486             } else {
5487                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5488             }
5489         }
5490         break;
5491     case 2: /* UDIV */
5492         handle_div(s, false, sf, rm, rn, rd);
5493         break;
5494     case 3: /* SDIV */
5495         handle_div(s, true, sf, rm, rn, rd);
5496         break;
5497     case 4: /* IRG */
5498         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5499             goto do_unallocated;
5500         }
5501         if (s->ata[0]) {
5502             gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
5503                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5504         } else {
5505             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5506                                              cpu_reg_sp(s, rn));
5507         }
5508         break;
5509     case 5: /* GMI */
5510         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5511             goto do_unallocated;
5512         } else {
5513             TCGv_i64 t = tcg_temp_new_i64();
5514 
5515             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5516             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5517             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5518         }
5519         break;
5520     case 8: /* LSLV */
5521         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5522         break;
5523     case 9: /* LSRV */
5524         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5525         break;
5526     case 10: /* ASRV */
5527         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5528         break;
5529     case 11: /* RORV */
5530         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5531         break;
5532     case 12: /* PACGA */
5533         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5534             goto do_unallocated;
5535         }
5536         gen_helper_pacga(cpu_reg(s, rd), tcg_env,
5537                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5538         break;
5539     case 16:
5540     case 17:
5541     case 18:
5542     case 19:
5543     case 20:
5544     case 21:
5545     case 22:
5546     case 23: /* CRC32 */
5547     {
5548         int sz = extract32(opcode, 0, 2);
5549         bool crc32c = extract32(opcode, 2, 1);
5550         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5551         break;
5552     }
5553     default:
5554     do_unallocated:
5555         unallocated_encoding(s);
5556         break;
5557     }
5558 }
5559 
5560 /*
5561  * Data processing - register
5562  *  31  30 29  28      25    21  20  16      10         0
5563  * +--+---+--+---+-------+-----+-------+-------+---------+
5564  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5565  * +--+---+--+---+-------+-----+-------+-------+---------+
5566  */
5567 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5568 {
5569     int op0 = extract32(insn, 30, 1);
5570     int op1 = extract32(insn, 28, 1);
5571     int op2 = extract32(insn, 21, 4);
5572     int op3 = extract32(insn, 10, 6);
5573 
5574     if (!op1) {
5575         if (op2 & 8) {
5576             if (op2 & 1) {
5577                 /* Add/sub (extended register) */
5578                 disas_add_sub_ext_reg(s, insn);
5579             } else {
5580                 /* Add/sub (shifted register) */
5581                 disas_add_sub_reg(s, insn);
5582             }
5583         } else {
5584             /* Logical (shifted register) */
5585             disas_logic_reg(s, insn);
5586         }
5587         return;
5588     }
5589 
5590     switch (op2) {
5591     case 0x0:
5592         switch (op3) {
5593         case 0x00: /* Add/subtract (with carry) */
5594             disas_adc_sbc(s, insn);
5595             break;
5596 
5597         case 0x01: /* Rotate right into flags */
5598         case 0x21:
5599             disas_rotate_right_into_flags(s, insn);
5600             break;
5601 
5602         case 0x02: /* Evaluate into flags */
5603         case 0x12:
5604         case 0x22:
5605         case 0x32:
5606             disas_evaluate_into_flags(s, insn);
5607             break;
5608 
5609         default:
5610             goto do_unallocated;
5611         }
5612         break;
5613 
5614     case 0x2: /* Conditional compare */
5615         disas_cc(s, insn); /* both imm and reg forms */
5616         break;
5617 
5618     case 0x4: /* Conditional select */
5619         disas_cond_select(s, insn);
5620         break;
5621 
5622     case 0x6: /* Data-processing */
5623         if (op0) {    /* (1 source) */
5624             disas_data_proc_1src(s, insn);
5625         } else {      /* (2 source) */
5626             disas_data_proc_2src(s, insn);
5627         }
5628         break;
5629     case 0x8 ... 0xf: /* (3 source) */
5630         disas_data_proc_3src(s, insn);
5631         break;
5632 
5633     default:
5634     do_unallocated:
5635         unallocated_encoding(s);
5636         break;
5637     }
5638 }
5639 
5640 static void handle_fp_compare(DisasContext *s, int size,
5641                               unsigned int rn, unsigned int rm,
5642                               bool cmp_with_zero, bool signal_all_nans)
5643 {
5644     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5645     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5646 
5647     if (size == MO_64) {
5648         TCGv_i64 tcg_vn, tcg_vm;
5649 
5650         tcg_vn = read_fp_dreg(s, rn);
5651         if (cmp_with_zero) {
5652             tcg_vm = tcg_constant_i64(0);
5653         } else {
5654             tcg_vm = read_fp_dreg(s, rm);
5655         }
5656         if (signal_all_nans) {
5657             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5658         } else {
5659             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5660         }
5661     } else {
5662         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5663         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5664 
5665         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5666         if (cmp_with_zero) {
5667             tcg_gen_movi_i32(tcg_vm, 0);
5668         } else {
5669             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5670         }
5671 
5672         switch (size) {
5673         case MO_32:
5674             if (signal_all_nans) {
5675                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5676             } else {
5677                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5678             }
5679             break;
5680         case MO_16:
5681             if (signal_all_nans) {
5682                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5683             } else {
5684                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5685             }
5686             break;
5687         default:
5688             g_assert_not_reached();
5689         }
5690     }
5691 
5692     gen_set_nzcv(tcg_flags);
5693 }
5694 
5695 /* Floating point compare
5696  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5697  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5698  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5699  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5700  */
5701 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5702 {
5703     unsigned int mos, type, rm, op, rn, opc, op2r;
5704     int size;
5705 
5706     mos = extract32(insn, 29, 3);
5707     type = extract32(insn, 22, 2);
5708     rm = extract32(insn, 16, 5);
5709     op = extract32(insn, 14, 2);
5710     rn = extract32(insn, 5, 5);
5711     opc = extract32(insn, 3, 2);
5712     op2r = extract32(insn, 0, 3);
5713 
5714     if (mos || op || op2r) {
5715         unallocated_encoding(s);
5716         return;
5717     }
5718 
5719     switch (type) {
5720     case 0:
5721         size = MO_32;
5722         break;
5723     case 1:
5724         size = MO_64;
5725         break;
5726     case 3:
5727         size = MO_16;
5728         if (dc_isar_feature(aa64_fp16, s)) {
5729             break;
5730         }
5731         /* fallthru */
5732     default:
5733         unallocated_encoding(s);
5734         return;
5735     }
5736 
5737     if (!fp_access_check(s)) {
5738         return;
5739     }
5740 
5741     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5742 }
5743 
5744 /* Floating point conditional compare
5745  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5746  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5747  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5748  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5749  */
5750 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5751 {
5752     unsigned int mos, type, rm, cond, rn, op, nzcv;
5753     TCGLabel *label_continue = NULL;
5754     int size;
5755 
5756     mos = extract32(insn, 29, 3);
5757     type = extract32(insn, 22, 2);
5758     rm = extract32(insn, 16, 5);
5759     cond = extract32(insn, 12, 4);
5760     rn = extract32(insn, 5, 5);
5761     op = extract32(insn, 4, 1);
5762     nzcv = extract32(insn, 0, 4);
5763 
5764     if (mos) {
5765         unallocated_encoding(s);
5766         return;
5767     }
5768 
5769     switch (type) {
5770     case 0:
5771         size = MO_32;
5772         break;
5773     case 1:
5774         size = MO_64;
5775         break;
5776     case 3:
5777         size = MO_16;
5778         if (dc_isar_feature(aa64_fp16, s)) {
5779             break;
5780         }
5781         /* fallthru */
5782     default:
5783         unallocated_encoding(s);
5784         return;
5785     }
5786 
5787     if (!fp_access_check(s)) {
5788         return;
5789     }
5790 
5791     if (cond < 0x0e) { /* not always */
5792         TCGLabel *label_match = gen_new_label();
5793         label_continue = gen_new_label();
5794         arm_gen_test_cc(cond, label_match);
5795         /* nomatch: */
5796         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5797         tcg_gen_br(label_continue);
5798         gen_set_label(label_match);
5799     }
5800 
5801     handle_fp_compare(s, size, rn, rm, false, op);
5802 
5803     if (cond < 0x0e) {
5804         gen_set_label(label_continue);
5805     }
5806 }
5807 
5808 /* Floating point conditional select
5809  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5810  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5811  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5812  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5813  */
5814 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5815 {
5816     unsigned int mos, type, rm, cond, rn, rd;
5817     TCGv_i64 t_true, t_false;
5818     DisasCompare64 c;
5819     MemOp sz;
5820 
5821     mos = extract32(insn, 29, 3);
5822     type = extract32(insn, 22, 2);
5823     rm = extract32(insn, 16, 5);
5824     cond = extract32(insn, 12, 4);
5825     rn = extract32(insn, 5, 5);
5826     rd = extract32(insn, 0, 5);
5827 
5828     if (mos) {
5829         unallocated_encoding(s);
5830         return;
5831     }
5832 
5833     switch (type) {
5834     case 0:
5835         sz = MO_32;
5836         break;
5837     case 1:
5838         sz = MO_64;
5839         break;
5840     case 3:
5841         sz = MO_16;
5842         if (dc_isar_feature(aa64_fp16, s)) {
5843             break;
5844         }
5845         /* fallthru */
5846     default:
5847         unallocated_encoding(s);
5848         return;
5849     }
5850 
5851     if (!fp_access_check(s)) {
5852         return;
5853     }
5854 
5855     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5856     t_true = tcg_temp_new_i64();
5857     t_false = tcg_temp_new_i64();
5858     read_vec_element(s, t_true, rn, 0, sz);
5859     read_vec_element(s, t_false, rm, 0, sz);
5860 
5861     a64_test_cc(&c, cond);
5862     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5863                         t_true, t_false);
5864 
5865     /* Note that sregs & hregs write back zeros to the high bits,
5866        and we've already done the zero-extension.  */
5867     write_fp_dreg(s, rd, t_true);
5868 }
5869 
5870 /* Floating-point data-processing (1 source) - half precision */
5871 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5872 {
5873     TCGv_ptr fpst = NULL;
5874     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5875     TCGv_i32 tcg_res = tcg_temp_new_i32();
5876 
5877     switch (opcode) {
5878     case 0x0: /* FMOV */
5879         tcg_gen_mov_i32(tcg_res, tcg_op);
5880         break;
5881     case 0x1: /* FABS */
5882         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5883         break;
5884     case 0x2: /* FNEG */
5885         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5886         break;
5887     case 0x3: /* FSQRT */
5888         fpst = fpstatus_ptr(FPST_FPCR_F16);
5889         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5890         break;
5891     case 0x8: /* FRINTN */
5892     case 0x9: /* FRINTP */
5893     case 0xa: /* FRINTM */
5894     case 0xb: /* FRINTZ */
5895     case 0xc: /* FRINTA */
5896     {
5897         TCGv_i32 tcg_rmode;
5898 
5899         fpst = fpstatus_ptr(FPST_FPCR_F16);
5900         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
5901         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5902         gen_restore_rmode(tcg_rmode, fpst);
5903         break;
5904     }
5905     case 0xe: /* FRINTX */
5906         fpst = fpstatus_ptr(FPST_FPCR_F16);
5907         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5908         break;
5909     case 0xf: /* FRINTI */
5910         fpst = fpstatus_ptr(FPST_FPCR_F16);
5911         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5912         break;
5913     default:
5914         g_assert_not_reached();
5915     }
5916 
5917     write_fp_sreg(s, rd, tcg_res);
5918 }
5919 
5920 /* Floating-point data-processing (1 source) - single precision */
5921 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5922 {
5923     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5924     TCGv_i32 tcg_op, tcg_res;
5925     TCGv_ptr fpst;
5926     int rmode = -1;
5927 
5928     tcg_op = read_fp_sreg(s, rn);
5929     tcg_res = tcg_temp_new_i32();
5930 
5931     switch (opcode) {
5932     case 0x0: /* FMOV */
5933         tcg_gen_mov_i32(tcg_res, tcg_op);
5934         goto done;
5935     case 0x1: /* FABS */
5936         gen_helper_vfp_abss(tcg_res, tcg_op);
5937         goto done;
5938     case 0x2: /* FNEG */
5939         gen_helper_vfp_negs(tcg_res, tcg_op);
5940         goto done;
5941     case 0x3: /* FSQRT */
5942         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
5943         goto done;
5944     case 0x6: /* BFCVT */
5945         gen_fpst = gen_helper_bfcvt;
5946         break;
5947     case 0x8: /* FRINTN */
5948     case 0x9: /* FRINTP */
5949     case 0xa: /* FRINTM */
5950     case 0xb: /* FRINTZ */
5951     case 0xc: /* FRINTA */
5952         rmode = opcode & 7;
5953         gen_fpst = gen_helper_rints;
5954         break;
5955     case 0xe: /* FRINTX */
5956         gen_fpst = gen_helper_rints_exact;
5957         break;
5958     case 0xf: /* FRINTI */
5959         gen_fpst = gen_helper_rints;
5960         break;
5961     case 0x10: /* FRINT32Z */
5962         rmode = FPROUNDING_ZERO;
5963         gen_fpst = gen_helper_frint32_s;
5964         break;
5965     case 0x11: /* FRINT32X */
5966         gen_fpst = gen_helper_frint32_s;
5967         break;
5968     case 0x12: /* FRINT64Z */
5969         rmode = FPROUNDING_ZERO;
5970         gen_fpst = gen_helper_frint64_s;
5971         break;
5972     case 0x13: /* FRINT64X */
5973         gen_fpst = gen_helper_frint64_s;
5974         break;
5975     default:
5976         g_assert_not_reached();
5977     }
5978 
5979     fpst = fpstatus_ptr(FPST_FPCR);
5980     if (rmode >= 0) {
5981         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
5982         gen_fpst(tcg_res, tcg_op, fpst);
5983         gen_restore_rmode(tcg_rmode, fpst);
5984     } else {
5985         gen_fpst(tcg_res, tcg_op, fpst);
5986     }
5987 
5988  done:
5989     write_fp_sreg(s, rd, tcg_res);
5990 }
5991 
5992 /* Floating-point data-processing (1 source) - double precision */
5993 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5994 {
5995     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5996     TCGv_i64 tcg_op, tcg_res;
5997     TCGv_ptr fpst;
5998     int rmode = -1;
5999 
6000     switch (opcode) {
6001     case 0x0: /* FMOV */
6002         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6003         return;
6004     }
6005 
6006     tcg_op = read_fp_dreg(s, rn);
6007     tcg_res = tcg_temp_new_i64();
6008 
6009     switch (opcode) {
6010     case 0x1: /* FABS */
6011         gen_helper_vfp_absd(tcg_res, tcg_op);
6012         goto done;
6013     case 0x2: /* FNEG */
6014         gen_helper_vfp_negd(tcg_res, tcg_op);
6015         goto done;
6016     case 0x3: /* FSQRT */
6017         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
6018         goto done;
6019     case 0x8: /* FRINTN */
6020     case 0x9: /* FRINTP */
6021     case 0xa: /* FRINTM */
6022     case 0xb: /* FRINTZ */
6023     case 0xc: /* FRINTA */
6024         rmode = opcode & 7;
6025         gen_fpst = gen_helper_rintd;
6026         break;
6027     case 0xe: /* FRINTX */
6028         gen_fpst = gen_helper_rintd_exact;
6029         break;
6030     case 0xf: /* FRINTI */
6031         gen_fpst = gen_helper_rintd;
6032         break;
6033     case 0x10: /* FRINT32Z */
6034         rmode = FPROUNDING_ZERO;
6035         gen_fpst = gen_helper_frint32_d;
6036         break;
6037     case 0x11: /* FRINT32X */
6038         gen_fpst = gen_helper_frint32_d;
6039         break;
6040     case 0x12: /* FRINT64Z */
6041         rmode = FPROUNDING_ZERO;
6042         gen_fpst = gen_helper_frint64_d;
6043         break;
6044     case 0x13: /* FRINT64X */
6045         gen_fpst = gen_helper_frint64_d;
6046         break;
6047     default:
6048         g_assert_not_reached();
6049     }
6050 
6051     fpst = fpstatus_ptr(FPST_FPCR);
6052     if (rmode >= 0) {
6053         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6054         gen_fpst(tcg_res, tcg_op, fpst);
6055         gen_restore_rmode(tcg_rmode, fpst);
6056     } else {
6057         gen_fpst(tcg_res, tcg_op, fpst);
6058     }
6059 
6060  done:
6061     write_fp_dreg(s, rd, tcg_res);
6062 }
6063 
6064 static void handle_fp_fcvt(DisasContext *s, int opcode,
6065                            int rd, int rn, int dtype, int ntype)
6066 {
6067     switch (ntype) {
6068     case 0x0:
6069     {
6070         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6071         if (dtype == 1) {
6072             /* Single to double */
6073             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6074             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
6075             write_fp_dreg(s, rd, tcg_rd);
6076         } else {
6077             /* Single to half */
6078             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6079             TCGv_i32 ahp = get_ahp_flag();
6080             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6081 
6082             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6083             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6084             write_fp_sreg(s, rd, tcg_rd);
6085         }
6086         break;
6087     }
6088     case 0x1:
6089     {
6090         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6091         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6092         if (dtype == 0) {
6093             /* Double to single */
6094             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
6095         } else {
6096             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6097             TCGv_i32 ahp = get_ahp_flag();
6098             /* Double to half */
6099             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6100             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6101         }
6102         write_fp_sreg(s, rd, tcg_rd);
6103         break;
6104     }
6105     case 0x3:
6106     {
6107         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6108         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6109         TCGv_i32 tcg_ahp = get_ahp_flag();
6110         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6111         if (dtype == 0) {
6112             /* Half to single */
6113             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6114             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6115             write_fp_sreg(s, rd, tcg_rd);
6116         } else {
6117             /* Half to double */
6118             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6119             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6120             write_fp_dreg(s, rd, tcg_rd);
6121         }
6122         break;
6123     }
6124     default:
6125         g_assert_not_reached();
6126     }
6127 }
6128 
6129 /* Floating point data-processing (1 source)
6130  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6131  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6132  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6133  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6134  */
6135 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6136 {
6137     int mos = extract32(insn, 29, 3);
6138     int type = extract32(insn, 22, 2);
6139     int opcode = extract32(insn, 15, 6);
6140     int rn = extract32(insn, 5, 5);
6141     int rd = extract32(insn, 0, 5);
6142 
6143     if (mos) {
6144         goto do_unallocated;
6145     }
6146 
6147     switch (opcode) {
6148     case 0x4: case 0x5: case 0x7:
6149     {
6150         /* FCVT between half, single and double precision */
6151         int dtype = extract32(opcode, 0, 2);
6152         if (type == 2 || dtype == type) {
6153             goto do_unallocated;
6154         }
6155         if (!fp_access_check(s)) {
6156             return;
6157         }
6158 
6159         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6160         break;
6161     }
6162 
6163     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6164         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6165             goto do_unallocated;
6166         }
6167         /* fall through */
6168     case 0x0 ... 0x3:
6169     case 0x8 ... 0xc:
6170     case 0xe ... 0xf:
6171         /* 32-to-32 and 64-to-64 ops */
6172         switch (type) {
6173         case 0:
6174             if (!fp_access_check(s)) {
6175                 return;
6176             }
6177             handle_fp_1src_single(s, opcode, rd, rn);
6178             break;
6179         case 1:
6180             if (!fp_access_check(s)) {
6181                 return;
6182             }
6183             handle_fp_1src_double(s, opcode, rd, rn);
6184             break;
6185         case 3:
6186             if (!dc_isar_feature(aa64_fp16, s)) {
6187                 goto do_unallocated;
6188             }
6189 
6190             if (!fp_access_check(s)) {
6191                 return;
6192             }
6193             handle_fp_1src_half(s, opcode, rd, rn);
6194             break;
6195         default:
6196             goto do_unallocated;
6197         }
6198         break;
6199 
6200     case 0x6:
6201         switch (type) {
6202         case 1: /* BFCVT */
6203             if (!dc_isar_feature(aa64_bf16, s)) {
6204                 goto do_unallocated;
6205             }
6206             if (!fp_access_check(s)) {
6207                 return;
6208             }
6209             handle_fp_1src_single(s, opcode, rd, rn);
6210             break;
6211         default:
6212             goto do_unallocated;
6213         }
6214         break;
6215 
6216     default:
6217     do_unallocated:
6218         unallocated_encoding(s);
6219         break;
6220     }
6221 }
6222 
6223 /* Floating-point data-processing (2 source) - single precision */
6224 static void handle_fp_2src_single(DisasContext *s, int opcode,
6225                                   int rd, int rn, int rm)
6226 {
6227     TCGv_i32 tcg_op1;
6228     TCGv_i32 tcg_op2;
6229     TCGv_i32 tcg_res;
6230     TCGv_ptr fpst;
6231 
6232     tcg_res = tcg_temp_new_i32();
6233     fpst = fpstatus_ptr(FPST_FPCR);
6234     tcg_op1 = read_fp_sreg(s, rn);
6235     tcg_op2 = read_fp_sreg(s, rm);
6236 
6237     switch (opcode) {
6238     case 0x0: /* FMUL */
6239         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6240         break;
6241     case 0x1: /* FDIV */
6242         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6243         break;
6244     case 0x2: /* FADD */
6245         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6246         break;
6247     case 0x3: /* FSUB */
6248         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6249         break;
6250     case 0x4: /* FMAX */
6251         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6252         break;
6253     case 0x5: /* FMIN */
6254         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6255         break;
6256     case 0x6: /* FMAXNM */
6257         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6258         break;
6259     case 0x7: /* FMINNM */
6260         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6261         break;
6262     case 0x8: /* FNMUL */
6263         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6264         gen_helper_vfp_negs(tcg_res, tcg_res);
6265         break;
6266     }
6267 
6268     write_fp_sreg(s, rd, tcg_res);
6269 }
6270 
6271 /* Floating-point data-processing (2 source) - double precision */
6272 static void handle_fp_2src_double(DisasContext *s, int opcode,
6273                                   int rd, int rn, int rm)
6274 {
6275     TCGv_i64 tcg_op1;
6276     TCGv_i64 tcg_op2;
6277     TCGv_i64 tcg_res;
6278     TCGv_ptr fpst;
6279 
6280     tcg_res = tcg_temp_new_i64();
6281     fpst = fpstatus_ptr(FPST_FPCR);
6282     tcg_op1 = read_fp_dreg(s, rn);
6283     tcg_op2 = read_fp_dreg(s, rm);
6284 
6285     switch (opcode) {
6286     case 0x0: /* FMUL */
6287         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6288         break;
6289     case 0x1: /* FDIV */
6290         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6291         break;
6292     case 0x2: /* FADD */
6293         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6294         break;
6295     case 0x3: /* FSUB */
6296         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6297         break;
6298     case 0x4: /* FMAX */
6299         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6300         break;
6301     case 0x5: /* FMIN */
6302         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6303         break;
6304     case 0x6: /* FMAXNM */
6305         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6306         break;
6307     case 0x7: /* FMINNM */
6308         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6309         break;
6310     case 0x8: /* FNMUL */
6311         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6312         gen_helper_vfp_negd(tcg_res, tcg_res);
6313         break;
6314     }
6315 
6316     write_fp_dreg(s, rd, tcg_res);
6317 }
6318 
6319 /* Floating-point data-processing (2 source) - half precision */
6320 static void handle_fp_2src_half(DisasContext *s, int opcode,
6321                                 int rd, int rn, int rm)
6322 {
6323     TCGv_i32 tcg_op1;
6324     TCGv_i32 tcg_op2;
6325     TCGv_i32 tcg_res;
6326     TCGv_ptr fpst;
6327 
6328     tcg_res = tcg_temp_new_i32();
6329     fpst = fpstatus_ptr(FPST_FPCR_F16);
6330     tcg_op1 = read_fp_hreg(s, rn);
6331     tcg_op2 = read_fp_hreg(s, rm);
6332 
6333     switch (opcode) {
6334     case 0x0: /* FMUL */
6335         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6336         break;
6337     case 0x1: /* FDIV */
6338         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6339         break;
6340     case 0x2: /* FADD */
6341         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6342         break;
6343     case 0x3: /* FSUB */
6344         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6345         break;
6346     case 0x4: /* FMAX */
6347         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6348         break;
6349     case 0x5: /* FMIN */
6350         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6351         break;
6352     case 0x6: /* FMAXNM */
6353         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6354         break;
6355     case 0x7: /* FMINNM */
6356         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6357         break;
6358     case 0x8: /* FNMUL */
6359         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6360         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6361         break;
6362     default:
6363         g_assert_not_reached();
6364     }
6365 
6366     write_fp_sreg(s, rd, tcg_res);
6367 }
6368 
6369 /* Floating point data-processing (2 source)
6370  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6371  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6372  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6373  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6374  */
6375 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6376 {
6377     int mos = extract32(insn, 29, 3);
6378     int type = extract32(insn, 22, 2);
6379     int rd = extract32(insn, 0, 5);
6380     int rn = extract32(insn, 5, 5);
6381     int rm = extract32(insn, 16, 5);
6382     int opcode = extract32(insn, 12, 4);
6383 
6384     if (opcode > 8 || mos) {
6385         unallocated_encoding(s);
6386         return;
6387     }
6388 
6389     switch (type) {
6390     case 0:
6391         if (!fp_access_check(s)) {
6392             return;
6393         }
6394         handle_fp_2src_single(s, opcode, rd, rn, rm);
6395         break;
6396     case 1:
6397         if (!fp_access_check(s)) {
6398             return;
6399         }
6400         handle_fp_2src_double(s, opcode, rd, rn, rm);
6401         break;
6402     case 3:
6403         if (!dc_isar_feature(aa64_fp16, s)) {
6404             unallocated_encoding(s);
6405             return;
6406         }
6407         if (!fp_access_check(s)) {
6408             return;
6409         }
6410         handle_fp_2src_half(s, opcode, rd, rn, rm);
6411         break;
6412     default:
6413         unallocated_encoding(s);
6414     }
6415 }
6416 
6417 /* Floating-point data-processing (3 source) - single precision */
6418 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6419                                   int rd, int rn, int rm, int ra)
6420 {
6421     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6422     TCGv_i32 tcg_res = tcg_temp_new_i32();
6423     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6424 
6425     tcg_op1 = read_fp_sreg(s, rn);
6426     tcg_op2 = read_fp_sreg(s, rm);
6427     tcg_op3 = read_fp_sreg(s, ra);
6428 
6429     /* These are fused multiply-add, and must be done as one
6430      * floating point operation with no rounding between the
6431      * multiplication and addition steps.
6432      * NB that doing the negations here as separate steps is
6433      * correct : an input NaN should come out with its sign bit
6434      * flipped if it is a negated-input.
6435      */
6436     if (o1 == true) {
6437         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6438     }
6439 
6440     if (o0 != o1) {
6441         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6442     }
6443 
6444     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6445 
6446     write_fp_sreg(s, rd, tcg_res);
6447 }
6448 
6449 /* Floating-point data-processing (3 source) - double precision */
6450 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6451                                   int rd, int rn, int rm, int ra)
6452 {
6453     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6454     TCGv_i64 tcg_res = tcg_temp_new_i64();
6455     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6456 
6457     tcg_op1 = read_fp_dreg(s, rn);
6458     tcg_op2 = read_fp_dreg(s, rm);
6459     tcg_op3 = read_fp_dreg(s, ra);
6460 
6461     /* These are fused multiply-add, and must be done as one
6462      * floating point operation with no rounding between the
6463      * multiplication and addition steps.
6464      * NB that doing the negations here as separate steps is
6465      * correct : an input NaN should come out with its sign bit
6466      * flipped if it is a negated-input.
6467      */
6468     if (o1 == true) {
6469         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6470     }
6471 
6472     if (o0 != o1) {
6473         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6474     }
6475 
6476     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6477 
6478     write_fp_dreg(s, rd, tcg_res);
6479 }
6480 
6481 /* Floating-point data-processing (3 source) - half precision */
6482 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6483                                 int rd, int rn, int rm, int ra)
6484 {
6485     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6486     TCGv_i32 tcg_res = tcg_temp_new_i32();
6487     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6488 
6489     tcg_op1 = read_fp_hreg(s, rn);
6490     tcg_op2 = read_fp_hreg(s, rm);
6491     tcg_op3 = read_fp_hreg(s, ra);
6492 
6493     /* These are fused multiply-add, and must be done as one
6494      * floating point operation with no rounding between the
6495      * multiplication and addition steps.
6496      * NB that doing the negations here as separate steps is
6497      * correct : an input NaN should come out with its sign bit
6498      * flipped if it is a negated-input.
6499      */
6500     if (o1 == true) {
6501         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6502     }
6503 
6504     if (o0 != o1) {
6505         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6506     }
6507 
6508     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6509 
6510     write_fp_sreg(s, rd, tcg_res);
6511 }
6512 
6513 /* Floating point data-processing (3 source)
6514  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6515  * +---+---+---+-----------+------+----+------+----+------+------+------+
6516  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6517  * +---+---+---+-----------+------+----+------+----+------+------+------+
6518  */
6519 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6520 {
6521     int mos = extract32(insn, 29, 3);
6522     int type = extract32(insn, 22, 2);
6523     int rd = extract32(insn, 0, 5);
6524     int rn = extract32(insn, 5, 5);
6525     int ra = extract32(insn, 10, 5);
6526     int rm = extract32(insn, 16, 5);
6527     bool o0 = extract32(insn, 15, 1);
6528     bool o1 = extract32(insn, 21, 1);
6529 
6530     if (mos) {
6531         unallocated_encoding(s);
6532         return;
6533     }
6534 
6535     switch (type) {
6536     case 0:
6537         if (!fp_access_check(s)) {
6538             return;
6539         }
6540         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6541         break;
6542     case 1:
6543         if (!fp_access_check(s)) {
6544             return;
6545         }
6546         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6547         break;
6548     case 3:
6549         if (!dc_isar_feature(aa64_fp16, s)) {
6550             unallocated_encoding(s);
6551             return;
6552         }
6553         if (!fp_access_check(s)) {
6554             return;
6555         }
6556         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6557         break;
6558     default:
6559         unallocated_encoding(s);
6560     }
6561 }
6562 
6563 /* Floating point immediate
6564  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6565  * +---+---+---+-----------+------+---+------------+-------+------+------+
6566  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6567  * +---+---+---+-----------+------+---+------------+-------+------+------+
6568  */
6569 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6570 {
6571     int rd = extract32(insn, 0, 5);
6572     int imm5 = extract32(insn, 5, 5);
6573     int imm8 = extract32(insn, 13, 8);
6574     int type = extract32(insn, 22, 2);
6575     int mos = extract32(insn, 29, 3);
6576     uint64_t imm;
6577     MemOp sz;
6578 
6579     if (mos || imm5) {
6580         unallocated_encoding(s);
6581         return;
6582     }
6583 
6584     switch (type) {
6585     case 0:
6586         sz = MO_32;
6587         break;
6588     case 1:
6589         sz = MO_64;
6590         break;
6591     case 3:
6592         sz = MO_16;
6593         if (dc_isar_feature(aa64_fp16, s)) {
6594             break;
6595         }
6596         /* fallthru */
6597     default:
6598         unallocated_encoding(s);
6599         return;
6600     }
6601 
6602     if (!fp_access_check(s)) {
6603         return;
6604     }
6605 
6606     imm = vfp_expand_imm(sz, imm8);
6607     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6608 }
6609 
6610 /* Handle floating point <=> fixed point conversions. Note that we can
6611  * also deal with fp <=> integer conversions as a special case (scale == 64)
6612  * OPTME: consider handling that special case specially or at least skipping
6613  * the call to scalbn in the helpers for zero shifts.
6614  */
6615 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6616                            bool itof, int rmode, int scale, int sf, int type)
6617 {
6618     bool is_signed = !(opcode & 1);
6619     TCGv_ptr tcg_fpstatus;
6620     TCGv_i32 tcg_shift, tcg_single;
6621     TCGv_i64 tcg_double;
6622 
6623     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6624 
6625     tcg_shift = tcg_constant_i32(64 - scale);
6626 
6627     if (itof) {
6628         TCGv_i64 tcg_int = cpu_reg(s, rn);
6629         if (!sf) {
6630             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6631 
6632             if (is_signed) {
6633                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6634             } else {
6635                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6636             }
6637 
6638             tcg_int = tcg_extend;
6639         }
6640 
6641         switch (type) {
6642         case 1: /* float64 */
6643             tcg_double = tcg_temp_new_i64();
6644             if (is_signed) {
6645                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6646                                      tcg_shift, tcg_fpstatus);
6647             } else {
6648                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6649                                      tcg_shift, tcg_fpstatus);
6650             }
6651             write_fp_dreg(s, rd, tcg_double);
6652             break;
6653 
6654         case 0: /* float32 */
6655             tcg_single = tcg_temp_new_i32();
6656             if (is_signed) {
6657                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6658                                      tcg_shift, tcg_fpstatus);
6659             } else {
6660                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6661                                      tcg_shift, tcg_fpstatus);
6662             }
6663             write_fp_sreg(s, rd, tcg_single);
6664             break;
6665 
6666         case 3: /* float16 */
6667             tcg_single = tcg_temp_new_i32();
6668             if (is_signed) {
6669                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6670                                      tcg_shift, tcg_fpstatus);
6671             } else {
6672                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6673                                      tcg_shift, tcg_fpstatus);
6674             }
6675             write_fp_sreg(s, rd, tcg_single);
6676             break;
6677 
6678         default:
6679             g_assert_not_reached();
6680         }
6681     } else {
6682         TCGv_i64 tcg_int = cpu_reg(s, rd);
6683         TCGv_i32 tcg_rmode;
6684 
6685         if (extract32(opcode, 2, 1)) {
6686             /* There are too many rounding modes to all fit into rmode,
6687              * so FCVTA[US] is a special case.
6688              */
6689             rmode = FPROUNDING_TIEAWAY;
6690         }
6691 
6692         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6693 
6694         switch (type) {
6695         case 1: /* float64 */
6696             tcg_double = read_fp_dreg(s, rn);
6697             if (is_signed) {
6698                 if (!sf) {
6699                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6700                                          tcg_shift, tcg_fpstatus);
6701                 } else {
6702                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6703                                          tcg_shift, tcg_fpstatus);
6704                 }
6705             } else {
6706                 if (!sf) {
6707                     gen_helper_vfp_tould(tcg_int, tcg_double,
6708                                          tcg_shift, tcg_fpstatus);
6709                 } else {
6710                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6711                                          tcg_shift, tcg_fpstatus);
6712                 }
6713             }
6714             if (!sf) {
6715                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6716             }
6717             break;
6718 
6719         case 0: /* float32 */
6720             tcg_single = read_fp_sreg(s, rn);
6721             if (sf) {
6722                 if (is_signed) {
6723                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6724                                          tcg_shift, tcg_fpstatus);
6725                 } else {
6726                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6727                                          tcg_shift, tcg_fpstatus);
6728                 }
6729             } else {
6730                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6731                 if (is_signed) {
6732                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6733                                          tcg_shift, tcg_fpstatus);
6734                 } else {
6735                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6736                                          tcg_shift, tcg_fpstatus);
6737                 }
6738                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6739             }
6740             break;
6741 
6742         case 3: /* float16 */
6743             tcg_single = read_fp_sreg(s, rn);
6744             if (sf) {
6745                 if (is_signed) {
6746                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6747                                          tcg_shift, tcg_fpstatus);
6748                 } else {
6749                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6750                                          tcg_shift, tcg_fpstatus);
6751                 }
6752             } else {
6753                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6754                 if (is_signed) {
6755                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6756                                          tcg_shift, tcg_fpstatus);
6757                 } else {
6758                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6759                                          tcg_shift, tcg_fpstatus);
6760                 }
6761                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6762             }
6763             break;
6764 
6765         default:
6766             g_assert_not_reached();
6767         }
6768 
6769         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6770     }
6771 }
6772 
6773 /* Floating point <-> fixed point conversions
6774  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6775  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6776  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6777  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6778  */
6779 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6780 {
6781     int rd = extract32(insn, 0, 5);
6782     int rn = extract32(insn, 5, 5);
6783     int scale = extract32(insn, 10, 6);
6784     int opcode = extract32(insn, 16, 3);
6785     int rmode = extract32(insn, 19, 2);
6786     int type = extract32(insn, 22, 2);
6787     bool sbit = extract32(insn, 29, 1);
6788     bool sf = extract32(insn, 31, 1);
6789     bool itof;
6790 
6791     if (sbit || (!sf && scale < 32)) {
6792         unallocated_encoding(s);
6793         return;
6794     }
6795 
6796     switch (type) {
6797     case 0: /* float32 */
6798     case 1: /* float64 */
6799         break;
6800     case 3: /* float16 */
6801         if (dc_isar_feature(aa64_fp16, s)) {
6802             break;
6803         }
6804         /* fallthru */
6805     default:
6806         unallocated_encoding(s);
6807         return;
6808     }
6809 
6810     switch ((rmode << 3) | opcode) {
6811     case 0x2: /* SCVTF */
6812     case 0x3: /* UCVTF */
6813         itof = true;
6814         break;
6815     case 0x18: /* FCVTZS */
6816     case 0x19: /* FCVTZU */
6817         itof = false;
6818         break;
6819     default:
6820         unallocated_encoding(s);
6821         return;
6822     }
6823 
6824     if (!fp_access_check(s)) {
6825         return;
6826     }
6827 
6828     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6829 }
6830 
6831 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6832 {
6833     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6834      * without conversion.
6835      */
6836 
6837     if (itof) {
6838         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6839         TCGv_i64 tmp;
6840 
6841         switch (type) {
6842         case 0:
6843             /* 32 bit */
6844             tmp = tcg_temp_new_i64();
6845             tcg_gen_ext32u_i64(tmp, tcg_rn);
6846             write_fp_dreg(s, rd, tmp);
6847             break;
6848         case 1:
6849             /* 64 bit */
6850             write_fp_dreg(s, rd, tcg_rn);
6851             break;
6852         case 2:
6853             /* 64 bit to top half. */
6854             tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
6855             clear_vec_high(s, true, rd);
6856             break;
6857         case 3:
6858             /* 16 bit */
6859             tmp = tcg_temp_new_i64();
6860             tcg_gen_ext16u_i64(tmp, tcg_rn);
6861             write_fp_dreg(s, rd, tmp);
6862             break;
6863         default:
6864             g_assert_not_reached();
6865         }
6866     } else {
6867         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6868 
6869         switch (type) {
6870         case 0:
6871             /* 32 bit */
6872             tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
6873             break;
6874         case 1:
6875             /* 64 bit */
6876             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
6877             break;
6878         case 2:
6879             /* 64 bits from top half */
6880             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
6881             break;
6882         case 3:
6883             /* 16 bit */
6884             tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
6885             break;
6886         default:
6887             g_assert_not_reached();
6888         }
6889     }
6890 }
6891 
6892 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6893 {
6894     TCGv_i64 t = read_fp_dreg(s, rn);
6895     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
6896 
6897     gen_helper_fjcvtzs(t, t, fpstatus);
6898 
6899     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6900     tcg_gen_extrh_i64_i32(cpu_ZF, t);
6901     tcg_gen_movi_i32(cpu_CF, 0);
6902     tcg_gen_movi_i32(cpu_NF, 0);
6903     tcg_gen_movi_i32(cpu_VF, 0);
6904 }
6905 
6906 /* Floating point <-> integer conversions
6907  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6908  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6909  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6910  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6911  */
6912 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6913 {
6914     int rd = extract32(insn, 0, 5);
6915     int rn = extract32(insn, 5, 5);
6916     int opcode = extract32(insn, 16, 3);
6917     int rmode = extract32(insn, 19, 2);
6918     int type = extract32(insn, 22, 2);
6919     bool sbit = extract32(insn, 29, 1);
6920     bool sf = extract32(insn, 31, 1);
6921     bool itof = false;
6922 
6923     if (sbit) {
6924         goto do_unallocated;
6925     }
6926 
6927     switch (opcode) {
6928     case 2: /* SCVTF */
6929     case 3: /* UCVTF */
6930         itof = true;
6931         /* fallthru */
6932     case 4: /* FCVTAS */
6933     case 5: /* FCVTAU */
6934         if (rmode != 0) {
6935             goto do_unallocated;
6936         }
6937         /* fallthru */
6938     case 0: /* FCVT[NPMZ]S */
6939     case 1: /* FCVT[NPMZ]U */
6940         switch (type) {
6941         case 0: /* float32 */
6942         case 1: /* float64 */
6943             break;
6944         case 3: /* float16 */
6945             if (!dc_isar_feature(aa64_fp16, s)) {
6946                 goto do_unallocated;
6947             }
6948             break;
6949         default:
6950             goto do_unallocated;
6951         }
6952         if (!fp_access_check(s)) {
6953             return;
6954         }
6955         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6956         break;
6957 
6958     default:
6959         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6960         case 0b01100110: /* FMOV half <-> 32-bit int */
6961         case 0b01100111:
6962         case 0b11100110: /* FMOV half <-> 64-bit int */
6963         case 0b11100111:
6964             if (!dc_isar_feature(aa64_fp16, s)) {
6965                 goto do_unallocated;
6966             }
6967             /* fallthru */
6968         case 0b00000110: /* FMOV 32-bit */
6969         case 0b00000111:
6970         case 0b10100110: /* FMOV 64-bit */
6971         case 0b10100111:
6972         case 0b11001110: /* FMOV top half of 128-bit */
6973         case 0b11001111:
6974             if (!fp_access_check(s)) {
6975                 return;
6976             }
6977             itof = opcode & 1;
6978             handle_fmov(s, rd, rn, type, itof);
6979             break;
6980 
6981         case 0b00111110: /* FJCVTZS */
6982             if (!dc_isar_feature(aa64_jscvt, s)) {
6983                 goto do_unallocated;
6984             } else if (fp_access_check(s)) {
6985                 handle_fjcvtzs(s, rd, rn);
6986             }
6987             break;
6988 
6989         default:
6990         do_unallocated:
6991             unallocated_encoding(s);
6992             return;
6993         }
6994         break;
6995     }
6996 }
6997 
6998 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6999  *   31  30  29 28     25 24                          0
7000  * +---+---+---+---------+-----------------------------+
7001  * |   | 0 |   | 1 1 1 1 |                             |
7002  * +---+---+---+---------+-----------------------------+
7003  */
7004 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7005 {
7006     if (extract32(insn, 24, 1)) {
7007         /* Floating point data-processing (3 source) */
7008         disas_fp_3src(s, insn);
7009     } else if (extract32(insn, 21, 1) == 0) {
7010         /* Floating point to fixed point conversions */
7011         disas_fp_fixed_conv(s, insn);
7012     } else {
7013         switch (extract32(insn, 10, 2)) {
7014         case 1:
7015             /* Floating point conditional compare */
7016             disas_fp_ccomp(s, insn);
7017             break;
7018         case 2:
7019             /* Floating point data-processing (2 source) */
7020             disas_fp_2src(s, insn);
7021             break;
7022         case 3:
7023             /* Floating point conditional select */
7024             disas_fp_csel(s, insn);
7025             break;
7026         case 0:
7027             switch (ctz32(extract32(insn, 12, 4))) {
7028             case 0: /* [15:12] == xxx1 */
7029                 /* Floating point immediate */
7030                 disas_fp_imm(s, insn);
7031                 break;
7032             case 1: /* [15:12] == xx10 */
7033                 /* Floating point compare */
7034                 disas_fp_compare(s, insn);
7035                 break;
7036             case 2: /* [15:12] == x100 */
7037                 /* Floating point data-processing (1 source) */
7038                 disas_fp_1src(s, insn);
7039                 break;
7040             case 3: /* [15:12] == 1000 */
7041                 unallocated_encoding(s);
7042                 break;
7043             default: /* [15:12] == 0000 */
7044                 /* Floating point <-> integer conversions */
7045                 disas_fp_int_conv(s, insn);
7046                 break;
7047             }
7048             break;
7049         }
7050     }
7051 }
7052 
7053 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7054                      int pos)
7055 {
7056     /* Extract 64 bits from the middle of two concatenated 64 bit
7057      * vector register slices left:right. The extracted bits start
7058      * at 'pos' bits into the right (least significant) side.
7059      * We return the result in tcg_right, and guarantee not to
7060      * trash tcg_left.
7061      */
7062     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7063     assert(pos > 0 && pos < 64);
7064 
7065     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7066     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7067     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7068 }
7069 
7070 /* EXT
7071  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7072  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7073  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7074  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7075  */
7076 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7077 {
7078     int is_q = extract32(insn, 30, 1);
7079     int op2 = extract32(insn, 22, 2);
7080     int imm4 = extract32(insn, 11, 4);
7081     int rm = extract32(insn, 16, 5);
7082     int rn = extract32(insn, 5, 5);
7083     int rd = extract32(insn, 0, 5);
7084     int pos = imm4 << 3;
7085     TCGv_i64 tcg_resl, tcg_resh;
7086 
7087     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7088         unallocated_encoding(s);
7089         return;
7090     }
7091 
7092     if (!fp_access_check(s)) {
7093         return;
7094     }
7095 
7096     tcg_resh = tcg_temp_new_i64();
7097     tcg_resl = tcg_temp_new_i64();
7098 
7099     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7100      * either extracting 128 bits from a 128:128 concatenation, or
7101      * extracting 64 bits from a 64:64 concatenation.
7102      */
7103     if (!is_q) {
7104         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7105         if (pos != 0) {
7106             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7107             do_ext64(s, tcg_resh, tcg_resl, pos);
7108         }
7109     } else {
7110         TCGv_i64 tcg_hh;
7111         typedef struct {
7112             int reg;
7113             int elt;
7114         } EltPosns;
7115         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7116         EltPosns *elt = eltposns;
7117 
7118         if (pos >= 64) {
7119             elt++;
7120             pos -= 64;
7121         }
7122 
7123         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7124         elt++;
7125         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7126         elt++;
7127         if (pos != 0) {
7128             do_ext64(s, tcg_resh, tcg_resl, pos);
7129             tcg_hh = tcg_temp_new_i64();
7130             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7131             do_ext64(s, tcg_hh, tcg_resh, pos);
7132         }
7133     }
7134 
7135     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7136     if (is_q) {
7137         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7138     }
7139     clear_vec_high(s, is_q, rd);
7140 }
7141 
7142 /* TBL/TBX
7143  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7144  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7145  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7146  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7147  */
7148 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7149 {
7150     int op2 = extract32(insn, 22, 2);
7151     int is_q = extract32(insn, 30, 1);
7152     int rm = extract32(insn, 16, 5);
7153     int rn = extract32(insn, 5, 5);
7154     int rd = extract32(insn, 0, 5);
7155     int is_tbx = extract32(insn, 12, 1);
7156     int len = (extract32(insn, 13, 2) + 1) * 16;
7157 
7158     if (op2 != 0) {
7159         unallocated_encoding(s);
7160         return;
7161     }
7162 
7163     if (!fp_access_check(s)) {
7164         return;
7165     }
7166 
7167     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7168                        vec_full_reg_offset(s, rm), tcg_env,
7169                        is_q ? 16 : 8, vec_full_reg_size(s),
7170                        (len << 6) | (is_tbx << 5) | rn,
7171                        gen_helper_simd_tblx);
7172 }
7173 
7174 /* ZIP/UZP/TRN
7175  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7176  * +---+---+-------------+------+---+------+---+------------------+------+
7177  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7178  * +---+---+-------------+------+---+------+---+------------------+------+
7179  */
7180 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7181 {
7182     int rd = extract32(insn, 0, 5);
7183     int rn = extract32(insn, 5, 5);
7184     int rm = extract32(insn, 16, 5);
7185     int size = extract32(insn, 22, 2);
7186     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7187      * bit 2 indicates 1 vs 2 variant of the insn.
7188      */
7189     int opcode = extract32(insn, 12, 2);
7190     bool part = extract32(insn, 14, 1);
7191     bool is_q = extract32(insn, 30, 1);
7192     int esize = 8 << size;
7193     int i;
7194     int datasize = is_q ? 128 : 64;
7195     int elements = datasize / esize;
7196     TCGv_i64 tcg_res[2], tcg_ele;
7197 
7198     if (opcode == 0 || (size == 3 && !is_q)) {
7199         unallocated_encoding(s);
7200         return;
7201     }
7202 
7203     if (!fp_access_check(s)) {
7204         return;
7205     }
7206 
7207     tcg_res[0] = tcg_temp_new_i64();
7208     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7209     tcg_ele = tcg_temp_new_i64();
7210 
7211     for (i = 0; i < elements; i++) {
7212         int o, w;
7213 
7214         switch (opcode) {
7215         case 1: /* UZP1/2 */
7216         {
7217             int midpoint = elements / 2;
7218             if (i < midpoint) {
7219                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7220             } else {
7221                 read_vec_element(s, tcg_ele, rm,
7222                                  2 * (i - midpoint) + part, size);
7223             }
7224             break;
7225         }
7226         case 2: /* TRN1/2 */
7227             if (i & 1) {
7228                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7229             } else {
7230                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7231             }
7232             break;
7233         case 3: /* ZIP1/2 */
7234         {
7235             int base = part * elements / 2;
7236             if (i & 1) {
7237                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7238             } else {
7239                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7240             }
7241             break;
7242         }
7243         default:
7244             g_assert_not_reached();
7245         }
7246 
7247         w = (i * esize) / 64;
7248         o = (i * esize) % 64;
7249         if (o == 0) {
7250             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7251         } else {
7252             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7253             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7254         }
7255     }
7256 
7257     for (i = 0; i <= is_q; ++i) {
7258         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7259     }
7260     clear_vec_high(s, is_q, rd);
7261 }
7262 
7263 /*
7264  * do_reduction_op helper
7265  *
7266  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7267  * important for correct NaN propagation that we do these
7268  * operations in exactly the order specified by the pseudocode.
7269  *
7270  * This is a recursive function, TCG temps should be freed by the
7271  * calling function once it is done with the values.
7272  */
7273 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7274                                 int esize, int size, int vmap, TCGv_ptr fpst)
7275 {
7276     if (esize == size) {
7277         int element;
7278         MemOp msize = esize == 16 ? MO_16 : MO_32;
7279         TCGv_i32 tcg_elem;
7280 
7281         /* We should have one register left here */
7282         assert(ctpop8(vmap) == 1);
7283         element = ctz32(vmap);
7284         assert(element < 8);
7285 
7286         tcg_elem = tcg_temp_new_i32();
7287         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7288         return tcg_elem;
7289     } else {
7290         int bits = size / 2;
7291         int shift = ctpop8(vmap) / 2;
7292         int vmap_lo = (vmap >> shift) & vmap;
7293         int vmap_hi = (vmap & ~vmap_lo);
7294         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7295 
7296         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7297         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7298         tcg_res = tcg_temp_new_i32();
7299 
7300         switch (fpopcode) {
7301         case 0x0c: /* fmaxnmv half-precision */
7302             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7303             break;
7304         case 0x0f: /* fmaxv half-precision */
7305             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7306             break;
7307         case 0x1c: /* fminnmv half-precision */
7308             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7309             break;
7310         case 0x1f: /* fminv half-precision */
7311             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7312             break;
7313         case 0x2c: /* fmaxnmv */
7314             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7315             break;
7316         case 0x2f: /* fmaxv */
7317             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7318             break;
7319         case 0x3c: /* fminnmv */
7320             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7321             break;
7322         case 0x3f: /* fminv */
7323             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7324             break;
7325         default:
7326             g_assert_not_reached();
7327         }
7328         return tcg_res;
7329     }
7330 }
7331 
7332 /* AdvSIMD across lanes
7333  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7334  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7335  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7336  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7337  */
7338 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7339 {
7340     int rd = extract32(insn, 0, 5);
7341     int rn = extract32(insn, 5, 5);
7342     int size = extract32(insn, 22, 2);
7343     int opcode = extract32(insn, 12, 5);
7344     bool is_q = extract32(insn, 30, 1);
7345     bool is_u = extract32(insn, 29, 1);
7346     bool is_fp = false;
7347     bool is_min = false;
7348     int esize;
7349     int elements;
7350     int i;
7351     TCGv_i64 tcg_res, tcg_elt;
7352 
7353     switch (opcode) {
7354     case 0x1b: /* ADDV */
7355         if (is_u) {
7356             unallocated_encoding(s);
7357             return;
7358         }
7359         /* fall through */
7360     case 0x3: /* SADDLV, UADDLV */
7361     case 0xa: /* SMAXV, UMAXV */
7362     case 0x1a: /* SMINV, UMINV */
7363         if (size == 3 || (size == 2 && !is_q)) {
7364             unallocated_encoding(s);
7365             return;
7366         }
7367         break;
7368     case 0xc: /* FMAXNMV, FMINNMV */
7369     case 0xf: /* FMAXV, FMINV */
7370         /* Bit 1 of size field encodes min vs max and the actual size
7371          * depends on the encoding of the U bit. If not set (and FP16
7372          * enabled) then we do half-precision float instead of single
7373          * precision.
7374          */
7375         is_min = extract32(size, 1, 1);
7376         is_fp = true;
7377         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7378             size = 1;
7379         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7380             unallocated_encoding(s);
7381             return;
7382         } else {
7383             size = 2;
7384         }
7385         break;
7386     default:
7387         unallocated_encoding(s);
7388         return;
7389     }
7390 
7391     if (!fp_access_check(s)) {
7392         return;
7393     }
7394 
7395     esize = 8 << size;
7396     elements = (is_q ? 128 : 64) / esize;
7397 
7398     tcg_res = tcg_temp_new_i64();
7399     tcg_elt = tcg_temp_new_i64();
7400 
7401     /* These instructions operate across all lanes of a vector
7402      * to produce a single result. We can guarantee that a 64
7403      * bit intermediate is sufficient:
7404      *  + for [US]ADDLV the maximum element size is 32 bits, and
7405      *    the result type is 64 bits
7406      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7407      *    same as the element size, which is 32 bits at most
7408      * For the integer operations we can choose to work at 64
7409      * or 32 bits and truncate at the end; for simplicity
7410      * we use 64 bits always. The floating point
7411      * ops do require 32 bit intermediates, though.
7412      */
7413     if (!is_fp) {
7414         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7415 
7416         for (i = 1; i < elements; i++) {
7417             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7418 
7419             switch (opcode) {
7420             case 0x03: /* SADDLV / UADDLV */
7421             case 0x1b: /* ADDV */
7422                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7423                 break;
7424             case 0x0a: /* SMAXV / UMAXV */
7425                 if (is_u) {
7426                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7427                 } else {
7428                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7429                 }
7430                 break;
7431             case 0x1a: /* SMINV / UMINV */
7432                 if (is_u) {
7433                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7434                 } else {
7435                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7436                 }
7437                 break;
7438             default:
7439                 g_assert_not_reached();
7440             }
7441 
7442         }
7443     } else {
7444         /* Floating point vector reduction ops which work across 32
7445          * bit (single) or 16 bit (half-precision) intermediates.
7446          * Note that correct NaN propagation requires that we do these
7447          * operations in exactly the order specified by the pseudocode.
7448          */
7449         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7450         int fpopcode = opcode | is_min << 4 | is_u << 5;
7451         int vmap = (1 << elements) - 1;
7452         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7453                                              (is_q ? 128 : 64), vmap, fpst);
7454         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7455     }
7456 
7457     /* Now truncate the result to the width required for the final output */
7458     if (opcode == 0x03) {
7459         /* SADDLV, UADDLV: result is 2*esize */
7460         size++;
7461     }
7462 
7463     switch (size) {
7464     case 0:
7465         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7466         break;
7467     case 1:
7468         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7469         break;
7470     case 2:
7471         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7472         break;
7473     case 3:
7474         break;
7475     default:
7476         g_assert_not_reached();
7477     }
7478 
7479     write_fp_dreg(s, rd, tcg_res);
7480 }
7481 
7482 /* DUP (Element, Vector)
7483  *
7484  *  31  30   29              21 20    16 15        10  9    5 4    0
7485  * +---+---+-------------------+--------+-------------+------+------+
7486  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7487  * +---+---+-------------------+--------+-------------+------+------+
7488  *
7489  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7490  */
7491 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7492                              int imm5)
7493 {
7494     int size = ctz32(imm5);
7495     int index;
7496 
7497     if (size > 3 || (size == 3 && !is_q)) {
7498         unallocated_encoding(s);
7499         return;
7500     }
7501 
7502     if (!fp_access_check(s)) {
7503         return;
7504     }
7505 
7506     index = imm5 >> (size + 1);
7507     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7508                          vec_reg_offset(s, rn, index, size),
7509                          is_q ? 16 : 8, vec_full_reg_size(s));
7510 }
7511 
7512 /* DUP (element, scalar)
7513  *  31                   21 20    16 15        10  9    5 4    0
7514  * +-----------------------+--------+-------------+------+------+
7515  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7516  * +-----------------------+--------+-------------+------+------+
7517  */
7518 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7519                               int imm5)
7520 {
7521     int size = ctz32(imm5);
7522     int index;
7523     TCGv_i64 tmp;
7524 
7525     if (size > 3) {
7526         unallocated_encoding(s);
7527         return;
7528     }
7529 
7530     if (!fp_access_check(s)) {
7531         return;
7532     }
7533 
7534     index = imm5 >> (size + 1);
7535 
7536     /* This instruction just extracts the specified element and
7537      * zero-extends it into the bottom of the destination register.
7538      */
7539     tmp = tcg_temp_new_i64();
7540     read_vec_element(s, tmp, rn, index, size);
7541     write_fp_dreg(s, rd, tmp);
7542 }
7543 
7544 /* DUP (General)
7545  *
7546  *  31  30   29              21 20    16 15        10  9    5 4    0
7547  * +---+---+-------------------+--------+-------------+------+------+
7548  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7549  * +---+---+-------------------+--------+-------------+------+------+
7550  *
7551  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7552  */
7553 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7554                              int imm5)
7555 {
7556     int size = ctz32(imm5);
7557     uint32_t dofs, oprsz, maxsz;
7558 
7559     if (size > 3 || ((size == 3) && !is_q)) {
7560         unallocated_encoding(s);
7561         return;
7562     }
7563 
7564     if (!fp_access_check(s)) {
7565         return;
7566     }
7567 
7568     dofs = vec_full_reg_offset(s, rd);
7569     oprsz = is_q ? 16 : 8;
7570     maxsz = vec_full_reg_size(s);
7571 
7572     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7573 }
7574 
7575 /* INS (Element)
7576  *
7577  *  31                   21 20    16 15  14    11  10 9    5 4    0
7578  * +-----------------------+--------+------------+---+------+------+
7579  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7580  * +-----------------------+--------+------------+---+------+------+
7581  *
7582  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7583  * index: encoded in imm5<4:size+1>
7584  */
7585 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7586                              int imm4, int imm5)
7587 {
7588     int size = ctz32(imm5);
7589     int src_index, dst_index;
7590     TCGv_i64 tmp;
7591 
7592     if (size > 3) {
7593         unallocated_encoding(s);
7594         return;
7595     }
7596 
7597     if (!fp_access_check(s)) {
7598         return;
7599     }
7600 
7601     dst_index = extract32(imm5, 1+size, 5);
7602     src_index = extract32(imm4, size, 4);
7603 
7604     tmp = tcg_temp_new_i64();
7605 
7606     read_vec_element(s, tmp, rn, src_index, size);
7607     write_vec_element(s, tmp, rd, dst_index, size);
7608 
7609     /* INS is considered a 128-bit write for SVE. */
7610     clear_vec_high(s, true, rd);
7611 }
7612 
7613 
7614 /* INS (General)
7615  *
7616  *  31                   21 20    16 15        10  9    5 4    0
7617  * +-----------------------+--------+-------------+------+------+
7618  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7619  * +-----------------------+--------+-------------+------+------+
7620  *
7621  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7622  * index: encoded in imm5<4:size+1>
7623  */
7624 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7625 {
7626     int size = ctz32(imm5);
7627     int idx;
7628 
7629     if (size > 3) {
7630         unallocated_encoding(s);
7631         return;
7632     }
7633 
7634     if (!fp_access_check(s)) {
7635         return;
7636     }
7637 
7638     idx = extract32(imm5, 1 + size, 4 - size);
7639     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7640 
7641     /* INS is considered a 128-bit write for SVE. */
7642     clear_vec_high(s, true, rd);
7643 }
7644 
7645 /*
7646  * UMOV (General)
7647  * SMOV (General)
7648  *
7649  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7650  * +---+---+-------------------+--------+-------------+------+------+
7651  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7652  * +---+---+-------------------+--------+-------------+------+------+
7653  *
7654  * U: unsigned when set
7655  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7656  */
7657 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7658                                   int rn, int rd, int imm5)
7659 {
7660     int size = ctz32(imm5);
7661     int element;
7662     TCGv_i64 tcg_rd;
7663 
7664     /* Check for UnallocatedEncodings */
7665     if (is_signed) {
7666         if (size > 2 || (size == 2 && !is_q)) {
7667             unallocated_encoding(s);
7668             return;
7669         }
7670     } else {
7671         if (size > 3
7672             || (size < 3 && is_q)
7673             || (size == 3 && !is_q)) {
7674             unallocated_encoding(s);
7675             return;
7676         }
7677     }
7678 
7679     if (!fp_access_check(s)) {
7680         return;
7681     }
7682 
7683     element = extract32(imm5, 1+size, 4);
7684 
7685     tcg_rd = cpu_reg(s, rd);
7686     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7687     if (is_signed && !is_q) {
7688         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7689     }
7690 }
7691 
7692 /* AdvSIMD copy
7693  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7694  * +---+---+----+-----------------+------+---+------+---+------+------+
7695  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7696  * +---+---+----+-----------------+------+---+------+---+------+------+
7697  */
7698 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7699 {
7700     int rd = extract32(insn, 0, 5);
7701     int rn = extract32(insn, 5, 5);
7702     int imm4 = extract32(insn, 11, 4);
7703     int op = extract32(insn, 29, 1);
7704     int is_q = extract32(insn, 30, 1);
7705     int imm5 = extract32(insn, 16, 5);
7706 
7707     if (op) {
7708         if (is_q) {
7709             /* INS (element) */
7710             handle_simd_inse(s, rd, rn, imm4, imm5);
7711         } else {
7712             unallocated_encoding(s);
7713         }
7714     } else {
7715         switch (imm4) {
7716         case 0:
7717             /* DUP (element - vector) */
7718             handle_simd_dupe(s, is_q, rd, rn, imm5);
7719             break;
7720         case 1:
7721             /* DUP (general) */
7722             handle_simd_dupg(s, is_q, rd, rn, imm5);
7723             break;
7724         case 3:
7725             if (is_q) {
7726                 /* INS (general) */
7727                 handle_simd_insg(s, rd, rn, imm5);
7728             } else {
7729                 unallocated_encoding(s);
7730             }
7731             break;
7732         case 5:
7733         case 7:
7734             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7735             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7736             break;
7737         default:
7738             unallocated_encoding(s);
7739             break;
7740         }
7741     }
7742 }
7743 
7744 /* AdvSIMD modified immediate
7745  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7746  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7747  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7748  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7749  *
7750  * There are a number of operations that can be carried out here:
7751  *   MOVI - move (shifted) imm into register
7752  *   MVNI - move inverted (shifted) imm into register
7753  *   ORR  - bitwise OR of (shifted) imm with register
7754  *   BIC  - bitwise clear of (shifted) imm with register
7755  * With ARMv8.2 we also have:
7756  *   FMOV half-precision
7757  */
7758 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7759 {
7760     int rd = extract32(insn, 0, 5);
7761     int cmode = extract32(insn, 12, 4);
7762     int o2 = extract32(insn, 11, 1);
7763     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7764     bool is_neg = extract32(insn, 29, 1);
7765     bool is_q = extract32(insn, 30, 1);
7766     uint64_t imm = 0;
7767 
7768     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7769         /* Check for FMOV (vector, immediate) - half-precision */
7770         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7771             unallocated_encoding(s);
7772             return;
7773         }
7774     }
7775 
7776     if (!fp_access_check(s)) {
7777         return;
7778     }
7779 
7780     if (cmode == 15 && o2 && !is_neg) {
7781         /* FMOV (vector, immediate) - half-precision */
7782         imm = vfp_expand_imm(MO_16, abcdefgh);
7783         /* now duplicate across the lanes */
7784         imm = dup_const(MO_16, imm);
7785     } else {
7786         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7787     }
7788 
7789     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7790         /* MOVI or MVNI, with MVNI negation handled above.  */
7791         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7792                              vec_full_reg_size(s), imm);
7793     } else {
7794         /* ORR or BIC, with BIC negation to AND handled above.  */
7795         if (is_neg) {
7796             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7797         } else {
7798             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7799         }
7800     }
7801 }
7802 
7803 /* AdvSIMD scalar copy
7804  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7805  * +-----+----+-----------------+------+---+------+---+------+------+
7806  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7807  * +-----+----+-----------------+------+---+------+---+------+------+
7808  */
7809 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7810 {
7811     int rd = extract32(insn, 0, 5);
7812     int rn = extract32(insn, 5, 5);
7813     int imm4 = extract32(insn, 11, 4);
7814     int imm5 = extract32(insn, 16, 5);
7815     int op = extract32(insn, 29, 1);
7816 
7817     if (op != 0 || imm4 != 0) {
7818         unallocated_encoding(s);
7819         return;
7820     }
7821 
7822     /* DUP (element, scalar) */
7823     handle_simd_dupes(s, rd, rn, imm5);
7824 }
7825 
7826 /* AdvSIMD scalar pairwise
7827  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7828  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7829  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7830  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7831  */
7832 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7833 {
7834     int u = extract32(insn, 29, 1);
7835     int size = extract32(insn, 22, 2);
7836     int opcode = extract32(insn, 12, 5);
7837     int rn = extract32(insn, 5, 5);
7838     int rd = extract32(insn, 0, 5);
7839     TCGv_ptr fpst;
7840 
7841     /* For some ops (the FP ones), size[1] is part of the encoding.
7842      * For ADDP strictly it is not but size[1] is always 1 for valid
7843      * encodings.
7844      */
7845     opcode |= (extract32(size, 1, 1) << 5);
7846 
7847     switch (opcode) {
7848     case 0x3b: /* ADDP */
7849         if (u || size != 3) {
7850             unallocated_encoding(s);
7851             return;
7852         }
7853         if (!fp_access_check(s)) {
7854             return;
7855         }
7856 
7857         fpst = NULL;
7858         break;
7859     case 0xc: /* FMAXNMP */
7860     case 0xd: /* FADDP */
7861     case 0xf: /* FMAXP */
7862     case 0x2c: /* FMINNMP */
7863     case 0x2f: /* FMINP */
7864         /* FP op, size[0] is 32 or 64 bit*/
7865         if (!u) {
7866             if (!dc_isar_feature(aa64_fp16, s)) {
7867                 unallocated_encoding(s);
7868                 return;
7869             } else {
7870                 size = MO_16;
7871             }
7872         } else {
7873             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7874         }
7875 
7876         if (!fp_access_check(s)) {
7877             return;
7878         }
7879 
7880         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7881         break;
7882     default:
7883         unallocated_encoding(s);
7884         return;
7885     }
7886 
7887     if (size == MO_64) {
7888         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7889         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7890         TCGv_i64 tcg_res = tcg_temp_new_i64();
7891 
7892         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7893         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7894 
7895         switch (opcode) {
7896         case 0x3b: /* ADDP */
7897             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7898             break;
7899         case 0xc: /* FMAXNMP */
7900             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7901             break;
7902         case 0xd: /* FADDP */
7903             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7904             break;
7905         case 0xf: /* FMAXP */
7906             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7907             break;
7908         case 0x2c: /* FMINNMP */
7909             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7910             break;
7911         case 0x2f: /* FMINP */
7912             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7913             break;
7914         default:
7915             g_assert_not_reached();
7916         }
7917 
7918         write_fp_dreg(s, rd, tcg_res);
7919     } else {
7920         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7921         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7922         TCGv_i32 tcg_res = tcg_temp_new_i32();
7923 
7924         read_vec_element_i32(s, tcg_op1, rn, 0, size);
7925         read_vec_element_i32(s, tcg_op2, rn, 1, size);
7926 
7927         if (size == MO_16) {
7928             switch (opcode) {
7929             case 0xc: /* FMAXNMP */
7930                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7931                 break;
7932             case 0xd: /* FADDP */
7933                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7934                 break;
7935             case 0xf: /* FMAXP */
7936                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7937                 break;
7938             case 0x2c: /* FMINNMP */
7939                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7940                 break;
7941             case 0x2f: /* FMINP */
7942                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7943                 break;
7944             default:
7945                 g_assert_not_reached();
7946             }
7947         } else {
7948             switch (opcode) {
7949             case 0xc: /* FMAXNMP */
7950                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7951                 break;
7952             case 0xd: /* FADDP */
7953                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7954                 break;
7955             case 0xf: /* FMAXP */
7956                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7957                 break;
7958             case 0x2c: /* FMINNMP */
7959                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7960                 break;
7961             case 0x2f: /* FMINP */
7962                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7963                 break;
7964             default:
7965                 g_assert_not_reached();
7966             }
7967         }
7968 
7969         write_fp_sreg(s, rd, tcg_res);
7970     }
7971 }
7972 
7973 /*
7974  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7975  *
7976  * This code is handles the common shifting code and is used by both
7977  * the vector and scalar code.
7978  */
7979 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7980                                     TCGv_i64 tcg_rnd, bool accumulate,
7981                                     bool is_u, int size, int shift)
7982 {
7983     bool extended_result = false;
7984     bool round = tcg_rnd != NULL;
7985     int ext_lshift = 0;
7986     TCGv_i64 tcg_src_hi;
7987 
7988     if (round && size == 3) {
7989         extended_result = true;
7990         ext_lshift = 64 - shift;
7991         tcg_src_hi = tcg_temp_new_i64();
7992     } else if (shift == 64) {
7993         if (!accumulate && is_u) {
7994             /* result is zero */
7995             tcg_gen_movi_i64(tcg_res, 0);
7996             return;
7997         }
7998     }
7999 
8000     /* Deal with the rounding step */
8001     if (round) {
8002         if (extended_result) {
8003             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8004             if (!is_u) {
8005                 /* take care of sign extending tcg_res */
8006                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8007                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8008                                  tcg_src, tcg_src_hi,
8009                                  tcg_rnd, tcg_zero);
8010             } else {
8011                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8012                                  tcg_src, tcg_zero,
8013                                  tcg_rnd, tcg_zero);
8014             }
8015         } else {
8016             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8017         }
8018     }
8019 
8020     /* Now do the shift right */
8021     if (round && extended_result) {
8022         /* extended case, >64 bit precision required */
8023         if (ext_lshift == 0) {
8024             /* special case, only high bits matter */
8025             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8026         } else {
8027             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8028             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8029             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8030         }
8031     } else {
8032         if (is_u) {
8033             if (shift == 64) {
8034                 /* essentially shifting in 64 zeros */
8035                 tcg_gen_movi_i64(tcg_src, 0);
8036             } else {
8037                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8038             }
8039         } else {
8040             if (shift == 64) {
8041                 /* effectively extending the sign-bit */
8042                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8043             } else {
8044                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8045             }
8046         }
8047     }
8048 
8049     if (accumulate) {
8050         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8051     } else {
8052         tcg_gen_mov_i64(tcg_res, tcg_src);
8053     }
8054 }
8055 
8056 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8057 static void handle_scalar_simd_shri(DisasContext *s,
8058                                     bool is_u, int immh, int immb,
8059                                     int opcode, int rn, int rd)
8060 {
8061     const int size = 3;
8062     int immhb = immh << 3 | immb;
8063     int shift = 2 * (8 << size) - immhb;
8064     bool accumulate = false;
8065     bool round = false;
8066     bool insert = false;
8067     TCGv_i64 tcg_rn;
8068     TCGv_i64 tcg_rd;
8069     TCGv_i64 tcg_round;
8070 
8071     if (!extract32(immh, 3, 1)) {
8072         unallocated_encoding(s);
8073         return;
8074     }
8075 
8076     if (!fp_access_check(s)) {
8077         return;
8078     }
8079 
8080     switch (opcode) {
8081     case 0x02: /* SSRA / USRA (accumulate) */
8082         accumulate = true;
8083         break;
8084     case 0x04: /* SRSHR / URSHR (rounding) */
8085         round = true;
8086         break;
8087     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8088         accumulate = round = true;
8089         break;
8090     case 0x08: /* SRI */
8091         insert = true;
8092         break;
8093     }
8094 
8095     if (round) {
8096         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8097     } else {
8098         tcg_round = NULL;
8099     }
8100 
8101     tcg_rn = read_fp_dreg(s, rn);
8102     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8103 
8104     if (insert) {
8105         /* shift count same as element size is valid but does nothing;
8106          * special case to avoid potential shift by 64.
8107          */
8108         int esize = 8 << size;
8109         if (shift != esize) {
8110             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8111             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8112         }
8113     } else {
8114         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8115                                 accumulate, is_u, size, shift);
8116     }
8117 
8118     write_fp_dreg(s, rd, tcg_rd);
8119 }
8120 
8121 /* SHL/SLI - Scalar shift left */
8122 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8123                                     int immh, int immb, int opcode,
8124                                     int rn, int rd)
8125 {
8126     int size = 32 - clz32(immh) - 1;
8127     int immhb = immh << 3 | immb;
8128     int shift = immhb - (8 << size);
8129     TCGv_i64 tcg_rn;
8130     TCGv_i64 tcg_rd;
8131 
8132     if (!extract32(immh, 3, 1)) {
8133         unallocated_encoding(s);
8134         return;
8135     }
8136 
8137     if (!fp_access_check(s)) {
8138         return;
8139     }
8140 
8141     tcg_rn = read_fp_dreg(s, rn);
8142     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8143 
8144     if (insert) {
8145         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8146     } else {
8147         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8148     }
8149 
8150     write_fp_dreg(s, rd, tcg_rd);
8151 }
8152 
8153 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8154  * (signed/unsigned) narrowing */
8155 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8156                                    bool is_u_shift, bool is_u_narrow,
8157                                    int immh, int immb, int opcode,
8158                                    int rn, int rd)
8159 {
8160     int immhb = immh << 3 | immb;
8161     int size = 32 - clz32(immh) - 1;
8162     int esize = 8 << size;
8163     int shift = (2 * esize) - immhb;
8164     int elements = is_scalar ? 1 : (64 / esize);
8165     bool round = extract32(opcode, 0, 1);
8166     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8167     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8168     TCGv_i32 tcg_rd_narrowed;
8169     TCGv_i64 tcg_final;
8170 
8171     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8172         { gen_helper_neon_narrow_sat_s8,
8173           gen_helper_neon_unarrow_sat8 },
8174         { gen_helper_neon_narrow_sat_s16,
8175           gen_helper_neon_unarrow_sat16 },
8176         { gen_helper_neon_narrow_sat_s32,
8177           gen_helper_neon_unarrow_sat32 },
8178         { NULL, NULL },
8179     };
8180     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8181         gen_helper_neon_narrow_sat_u8,
8182         gen_helper_neon_narrow_sat_u16,
8183         gen_helper_neon_narrow_sat_u32,
8184         NULL
8185     };
8186     NeonGenNarrowEnvFn *narrowfn;
8187 
8188     int i;
8189 
8190     assert(size < 4);
8191 
8192     if (extract32(immh, 3, 1)) {
8193         unallocated_encoding(s);
8194         return;
8195     }
8196 
8197     if (!fp_access_check(s)) {
8198         return;
8199     }
8200 
8201     if (is_u_shift) {
8202         narrowfn = unsigned_narrow_fns[size];
8203     } else {
8204         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8205     }
8206 
8207     tcg_rn = tcg_temp_new_i64();
8208     tcg_rd = tcg_temp_new_i64();
8209     tcg_rd_narrowed = tcg_temp_new_i32();
8210     tcg_final = tcg_temp_new_i64();
8211 
8212     if (round) {
8213         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8214     } else {
8215         tcg_round = NULL;
8216     }
8217 
8218     for (i = 0; i < elements; i++) {
8219         read_vec_element(s, tcg_rn, rn, i, ldop);
8220         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8221                                 false, is_u_shift, size+1, shift);
8222         narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
8223         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8224         if (i == 0) {
8225             tcg_gen_mov_i64(tcg_final, tcg_rd);
8226         } else {
8227             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8228         }
8229     }
8230 
8231     if (!is_q) {
8232         write_vec_element(s, tcg_final, rd, 0, MO_64);
8233     } else {
8234         write_vec_element(s, tcg_final, rd, 1, MO_64);
8235     }
8236     clear_vec_high(s, is_q, rd);
8237 }
8238 
8239 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8240 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8241                              bool src_unsigned, bool dst_unsigned,
8242                              int immh, int immb, int rn, int rd)
8243 {
8244     int immhb = immh << 3 | immb;
8245     int size = 32 - clz32(immh) - 1;
8246     int shift = immhb - (8 << size);
8247     int pass;
8248 
8249     assert(immh != 0);
8250     assert(!(scalar && is_q));
8251 
8252     if (!scalar) {
8253         if (!is_q && extract32(immh, 3, 1)) {
8254             unallocated_encoding(s);
8255             return;
8256         }
8257 
8258         /* Since we use the variable-shift helpers we must
8259          * replicate the shift count into each element of
8260          * the tcg_shift value.
8261          */
8262         switch (size) {
8263         case 0:
8264             shift |= shift << 8;
8265             /* fall through */
8266         case 1:
8267             shift |= shift << 16;
8268             break;
8269         case 2:
8270         case 3:
8271             break;
8272         default:
8273             g_assert_not_reached();
8274         }
8275     }
8276 
8277     if (!fp_access_check(s)) {
8278         return;
8279     }
8280 
8281     if (size == 3) {
8282         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8283         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8284             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8285             { NULL, gen_helper_neon_qshl_u64 },
8286         };
8287         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8288         int maxpass = is_q ? 2 : 1;
8289 
8290         for (pass = 0; pass < maxpass; pass++) {
8291             TCGv_i64 tcg_op = tcg_temp_new_i64();
8292 
8293             read_vec_element(s, tcg_op, rn, pass, MO_64);
8294             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
8295             write_vec_element(s, tcg_op, rd, pass, MO_64);
8296         }
8297         clear_vec_high(s, is_q, rd);
8298     } else {
8299         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8300         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8301             {
8302                 { gen_helper_neon_qshl_s8,
8303                   gen_helper_neon_qshl_s16,
8304                   gen_helper_neon_qshl_s32 },
8305                 { gen_helper_neon_qshlu_s8,
8306                   gen_helper_neon_qshlu_s16,
8307                   gen_helper_neon_qshlu_s32 }
8308             }, {
8309                 { NULL, NULL, NULL },
8310                 { gen_helper_neon_qshl_u8,
8311                   gen_helper_neon_qshl_u16,
8312                   gen_helper_neon_qshl_u32 }
8313             }
8314         };
8315         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8316         MemOp memop = scalar ? size : MO_32;
8317         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8318 
8319         for (pass = 0; pass < maxpass; pass++) {
8320             TCGv_i32 tcg_op = tcg_temp_new_i32();
8321 
8322             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8323             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
8324             if (scalar) {
8325                 switch (size) {
8326                 case 0:
8327                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8328                     break;
8329                 case 1:
8330                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8331                     break;
8332                 case 2:
8333                     break;
8334                 default:
8335                     g_assert_not_reached();
8336                 }
8337                 write_fp_sreg(s, rd, tcg_op);
8338             } else {
8339                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8340             }
8341         }
8342 
8343         if (!scalar) {
8344             clear_vec_high(s, is_q, rd);
8345         }
8346     }
8347 }
8348 
8349 /* Common vector code for handling integer to FP conversion */
8350 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8351                                    int elements, int is_signed,
8352                                    int fracbits, int size)
8353 {
8354     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8355     TCGv_i32 tcg_shift = NULL;
8356 
8357     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8358     int pass;
8359 
8360     if (fracbits || size == MO_64) {
8361         tcg_shift = tcg_constant_i32(fracbits);
8362     }
8363 
8364     if (size == MO_64) {
8365         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8366         TCGv_i64 tcg_double = tcg_temp_new_i64();
8367 
8368         for (pass = 0; pass < elements; pass++) {
8369             read_vec_element(s, tcg_int64, rn, pass, mop);
8370 
8371             if (is_signed) {
8372                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8373                                      tcg_shift, tcg_fpst);
8374             } else {
8375                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8376                                      tcg_shift, tcg_fpst);
8377             }
8378             if (elements == 1) {
8379                 write_fp_dreg(s, rd, tcg_double);
8380             } else {
8381                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8382             }
8383         }
8384     } else {
8385         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8386         TCGv_i32 tcg_float = tcg_temp_new_i32();
8387 
8388         for (pass = 0; pass < elements; pass++) {
8389             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8390 
8391             switch (size) {
8392             case MO_32:
8393                 if (fracbits) {
8394                     if (is_signed) {
8395                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8396                                              tcg_shift, tcg_fpst);
8397                     } else {
8398                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8399                                              tcg_shift, tcg_fpst);
8400                     }
8401                 } else {
8402                     if (is_signed) {
8403                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8404                     } else {
8405                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8406                     }
8407                 }
8408                 break;
8409             case MO_16:
8410                 if (fracbits) {
8411                     if (is_signed) {
8412                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8413                                              tcg_shift, tcg_fpst);
8414                     } else {
8415                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8416                                              tcg_shift, tcg_fpst);
8417                     }
8418                 } else {
8419                     if (is_signed) {
8420                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8421                     } else {
8422                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8423                     }
8424                 }
8425                 break;
8426             default:
8427                 g_assert_not_reached();
8428             }
8429 
8430             if (elements == 1) {
8431                 write_fp_sreg(s, rd, tcg_float);
8432             } else {
8433                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8434             }
8435         }
8436     }
8437 
8438     clear_vec_high(s, elements << size == 16, rd);
8439 }
8440 
8441 /* UCVTF/SCVTF - Integer to FP conversion */
8442 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8443                                          bool is_q, bool is_u,
8444                                          int immh, int immb, int opcode,
8445                                          int rn, int rd)
8446 {
8447     int size, elements, fracbits;
8448     int immhb = immh << 3 | immb;
8449 
8450     if (immh & 8) {
8451         size = MO_64;
8452         if (!is_scalar && !is_q) {
8453             unallocated_encoding(s);
8454             return;
8455         }
8456     } else if (immh & 4) {
8457         size = MO_32;
8458     } else if (immh & 2) {
8459         size = MO_16;
8460         if (!dc_isar_feature(aa64_fp16, s)) {
8461             unallocated_encoding(s);
8462             return;
8463         }
8464     } else {
8465         /* immh == 0 would be a failure of the decode logic */
8466         g_assert(immh == 1);
8467         unallocated_encoding(s);
8468         return;
8469     }
8470 
8471     if (is_scalar) {
8472         elements = 1;
8473     } else {
8474         elements = (8 << is_q) >> size;
8475     }
8476     fracbits = (16 << size) - immhb;
8477 
8478     if (!fp_access_check(s)) {
8479         return;
8480     }
8481 
8482     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8483 }
8484 
8485 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8486 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8487                                          bool is_q, bool is_u,
8488                                          int immh, int immb, int rn, int rd)
8489 {
8490     int immhb = immh << 3 | immb;
8491     int pass, size, fracbits;
8492     TCGv_ptr tcg_fpstatus;
8493     TCGv_i32 tcg_rmode, tcg_shift;
8494 
8495     if (immh & 0x8) {
8496         size = MO_64;
8497         if (!is_scalar && !is_q) {
8498             unallocated_encoding(s);
8499             return;
8500         }
8501     } else if (immh & 0x4) {
8502         size = MO_32;
8503     } else if (immh & 0x2) {
8504         size = MO_16;
8505         if (!dc_isar_feature(aa64_fp16, s)) {
8506             unallocated_encoding(s);
8507             return;
8508         }
8509     } else {
8510         /* Should have split out AdvSIMD modified immediate earlier.  */
8511         assert(immh == 1);
8512         unallocated_encoding(s);
8513         return;
8514     }
8515 
8516     if (!fp_access_check(s)) {
8517         return;
8518     }
8519 
8520     assert(!(is_scalar && is_q));
8521 
8522     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8523     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8524     fracbits = (16 << size) - immhb;
8525     tcg_shift = tcg_constant_i32(fracbits);
8526 
8527     if (size == MO_64) {
8528         int maxpass = is_scalar ? 1 : 2;
8529 
8530         for (pass = 0; pass < maxpass; pass++) {
8531             TCGv_i64 tcg_op = tcg_temp_new_i64();
8532 
8533             read_vec_element(s, tcg_op, rn, pass, MO_64);
8534             if (is_u) {
8535                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8536             } else {
8537                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8538             }
8539             write_vec_element(s, tcg_op, rd, pass, MO_64);
8540         }
8541         clear_vec_high(s, is_q, rd);
8542     } else {
8543         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8544         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8545 
8546         switch (size) {
8547         case MO_16:
8548             if (is_u) {
8549                 fn = gen_helper_vfp_touhh;
8550             } else {
8551                 fn = gen_helper_vfp_toshh;
8552             }
8553             break;
8554         case MO_32:
8555             if (is_u) {
8556                 fn = gen_helper_vfp_touls;
8557             } else {
8558                 fn = gen_helper_vfp_tosls;
8559             }
8560             break;
8561         default:
8562             g_assert_not_reached();
8563         }
8564 
8565         for (pass = 0; pass < maxpass; pass++) {
8566             TCGv_i32 tcg_op = tcg_temp_new_i32();
8567 
8568             read_vec_element_i32(s, tcg_op, rn, pass, size);
8569             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8570             if (is_scalar) {
8571                 write_fp_sreg(s, rd, tcg_op);
8572             } else {
8573                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8574             }
8575         }
8576         if (!is_scalar) {
8577             clear_vec_high(s, is_q, rd);
8578         }
8579     }
8580 
8581     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8582 }
8583 
8584 /* AdvSIMD scalar shift by immediate
8585  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8586  * +-----+---+-------------+------+------+--------+---+------+------+
8587  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8588  * +-----+---+-------------+------+------+--------+---+------+------+
8589  *
8590  * This is the scalar version so it works on a fixed sized registers
8591  */
8592 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8593 {
8594     int rd = extract32(insn, 0, 5);
8595     int rn = extract32(insn, 5, 5);
8596     int opcode = extract32(insn, 11, 5);
8597     int immb = extract32(insn, 16, 3);
8598     int immh = extract32(insn, 19, 4);
8599     bool is_u = extract32(insn, 29, 1);
8600 
8601     if (immh == 0) {
8602         unallocated_encoding(s);
8603         return;
8604     }
8605 
8606     switch (opcode) {
8607     case 0x08: /* SRI */
8608         if (!is_u) {
8609             unallocated_encoding(s);
8610             return;
8611         }
8612         /* fall through */
8613     case 0x00: /* SSHR / USHR */
8614     case 0x02: /* SSRA / USRA */
8615     case 0x04: /* SRSHR / URSHR */
8616     case 0x06: /* SRSRA / URSRA */
8617         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8618         break;
8619     case 0x0a: /* SHL / SLI */
8620         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8621         break;
8622     case 0x1c: /* SCVTF, UCVTF */
8623         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8624                                      opcode, rn, rd);
8625         break;
8626     case 0x10: /* SQSHRUN, SQSHRUN2 */
8627     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8628         if (!is_u) {
8629             unallocated_encoding(s);
8630             return;
8631         }
8632         handle_vec_simd_sqshrn(s, true, false, false, true,
8633                                immh, immb, opcode, rn, rd);
8634         break;
8635     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8636     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8637         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8638                                immh, immb, opcode, rn, rd);
8639         break;
8640     case 0xc: /* SQSHLU */
8641         if (!is_u) {
8642             unallocated_encoding(s);
8643             return;
8644         }
8645         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8646         break;
8647     case 0xe: /* SQSHL, UQSHL */
8648         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8649         break;
8650     case 0x1f: /* FCVTZS, FCVTZU */
8651         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8652         break;
8653     default:
8654         unallocated_encoding(s);
8655         break;
8656     }
8657 }
8658 
8659 /* AdvSIMD scalar three different
8660  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8661  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8662  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8663  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8664  */
8665 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8666 {
8667     bool is_u = extract32(insn, 29, 1);
8668     int size = extract32(insn, 22, 2);
8669     int opcode = extract32(insn, 12, 4);
8670     int rm = extract32(insn, 16, 5);
8671     int rn = extract32(insn, 5, 5);
8672     int rd = extract32(insn, 0, 5);
8673 
8674     if (is_u) {
8675         unallocated_encoding(s);
8676         return;
8677     }
8678 
8679     switch (opcode) {
8680     case 0x9: /* SQDMLAL, SQDMLAL2 */
8681     case 0xb: /* SQDMLSL, SQDMLSL2 */
8682     case 0xd: /* SQDMULL, SQDMULL2 */
8683         if (size == 0 || size == 3) {
8684             unallocated_encoding(s);
8685             return;
8686         }
8687         break;
8688     default:
8689         unallocated_encoding(s);
8690         return;
8691     }
8692 
8693     if (!fp_access_check(s)) {
8694         return;
8695     }
8696 
8697     if (size == 2) {
8698         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8699         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8700         TCGv_i64 tcg_res = tcg_temp_new_i64();
8701 
8702         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8703         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8704 
8705         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8706         gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res);
8707 
8708         switch (opcode) {
8709         case 0xd: /* SQDMULL, SQDMULL2 */
8710             break;
8711         case 0xb: /* SQDMLSL, SQDMLSL2 */
8712             tcg_gen_neg_i64(tcg_res, tcg_res);
8713             /* fall through */
8714         case 0x9: /* SQDMLAL, SQDMLAL2 */
8715             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8716             gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env,
8717                                               tcg_res, tcg_op1);
8718             break;
8719         default:
8720             g_assert_not_reached();
8721         }
8722 
8723         write_fp_dreg(s, rd, tcg_res);
8724     } else {
8725         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8726         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8727         TCGv_i64 tcg_res = tcg_temp_new_i64();
8728 
8729         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8730         gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res);
8731 
8732         switch (opcode) {
8733         case 0xd: /* SQDMULL, SQDMULL2 */
8734             break;
8735         case 0xb: /* SQDMLSL, SQDMLSL2 */
8736             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8737             /* fall through */
8738         case 0x9: /* SQDMLAL, SQDMLAL2 */
8739         {
8740             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8741             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8742             gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env,
8743                                               tcg_res, tcg_op3);
8744             break;
8745         }
8746         default:
8747             g_assert_not_reached();
8748         }
8749 
8750         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8751         write_fp_dreg(s, rd, tcg_res);
8752     }
8753 }
8754 
8755 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8756                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8757 {
8758     /* Handle 64x64->64 opcodes which are shared between the scalar
8759      * and vector 3-same groups. We cover every opcode where size == 3
8760      * is valid in either the three-reg-same (integer, not pairwise)
8761      * or scalar-three-reg-same groups.
8762      */
8763     TCGCond cond;
8764 
8765     switch (opcode) {
8766     case 0x1: /* SQADD */
8767         if (u) {
8768             gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8769         } else {
8770             gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8771         }
8772         break;
8773     case 0x5: /* SQSUB */
8774         if (u) {
8775             gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8776         } else {
8777             gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8778         }
8779         break;
8780     case 0x6: /* CMGT, CMHI */
8781         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8782     do_cmop:
8783         /* 64 bit integer comparison, result = test ? -1 : 0. */
8784         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8785         break;
8786     case 0x7: /* CMGE, CMHS */
8787         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8788         goto do_cmop;
8789     case 0x11: /* CMTST, CMEQ */
8790         if (u) {
8791             cond = TCG_COND_EQ;
8792             goto do_cmop;
8793         }
8794         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8795         break;
8796     case 0x8: /* SSHL, USHL */
8797         if (u) {
8798             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8799         } else {
8800             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8801         }
8802         break;
8803     case 0x9: /* SQSHL, UQSHL */
8804         if (u) {
8805             gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8806         } else {
8807             gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8808         }
8809         break;
8810     case 0xa: /* SRSHL, URSHL */
8811         if (u) {
8812             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8813         } else {
8814             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8815         }
8816         break;
8817     case 0xb: /* SQRSHL, UQRSHL */
8818         if (u) {
8819             gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8820         } else {
8821             gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8822         }
8823         break;
8824     case 0x10: /* ADD, SUB */
8825         if (u) {
8826             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8827         } else {
8828             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8829         }
8830         break;
8831     default:
8832         g_assert_not_reached();
8833     }
8834 }
8835 
8836 /* Handle the 3-same-operands float operations; shared by the scalar
8837  * and vector encodings. The caller must filter out any encodings
8838  * not allocated for the encoding it is dealing with.
8839  */
8840 static void handle_3same_float(DisasContext *s, int size, int elements,
8841                                int fpopcode, int rd, int rn, int rm)
8842 {
8843     int pass;
8844     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8845 
8846     for (pass = 0; pass < elements; pass++) {
8847         if (size) {
8848             /* Double */
8849             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8850             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8851             TCGv_i64 tcg_res = tcg_temp_new_i64();
8852 
8853             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8854             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8855 
8856             switch (fpopcode) {
8857             case 0x39: /* FMLS */
8858                 /* As usual for ARM, separate negation for fused multiply-add */
8859                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8860                 /* fall through */
8861             case 0x19: /* FMLA */
8862                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8863                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8864                                        tcg_res, fpst);
8865                 break;
8866             case 0x18: /* FMAXNM */
8867                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8868                 break;
8869             case 0x1a: /* FADD */
8870                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8871                 break;
8872             case 0x1b: /* FMULX */
8873                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8874                 break;
8875             case 0x1c: /* FCMEQ */
8876                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8877                 break;
8878             case 0x1e: /* FMAX */
8879                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8880                 break;
8881             case 0x1f: /* FRECPS */
8882                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8883                 break;
8884             case 0x38: /* FMINNM */
8885                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8886                 break;
8887             case 0x3a: /* FSUB */
8888                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8889                 break;
8890             case 0x3e: /* FMIN */
8891                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8892                 break;
8893             case 0x3f: /* FRSQRTS */
8894                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8895                 break;
8896             case 0x5b: /* FMUL */
8897                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8898                 break;
8899             case 0x5c: /* FCMGE */
8900                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8901                 break;
8902             case 0x5d: /* FACGE */
8903                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8904                 break;
8905             case 0x5f: /* FDIV */
8906                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8907                 break;
8908             case 0x7a: /* FABD */
8909                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8910                 gen_helper_vfp_absd(tcg_res, tcg_res);
8911                 break;
8912             case 0x7c: /* FCMGT */
8913                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8914                 break;
8915             case 0x7d: /* FACGT */
8916                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8917                 break;
8918             default:
8919                 g_assert_not_reached();
8920             }
8921 
8922             write_vec_element(s, tcg_res, rd, pass, MO_64);
8923         } else {
8924             /* Single */
8925             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8926             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8927             TCGv_i32 tcg_res = tcg_temp_new_i32();
8928 
8929             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8930             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8931 
8932             switch (fpopcode) {
8933             case 0x39: /* FMLS */
8934                 /* As usual for ARM, separate negation for fused multiply-add */
8935                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8936                 /* fall through */
8937             case 0x19: /* FMLA */
8938                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8939                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8940                                        tcg_res, fpst);
8941                 break;
8942             case 0x1a: /* FADD */
8943                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8944                 break;
8945             case 0x1b: /* FMULX */
8946                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8947                 break;
8948             case 0x1c: /* FCMEQ */
8949                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8950                 break;
8951             case 0x1e: /* FMAX */
8952                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8953                 break;
8954             case 0x1f: /* FRECPS */
8955                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8956                 break;
8957             case 0x18: /* FMAXNM */
8958                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8959                 break;
8960             case 0x38: /* FMINNM */
8961                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8962                 break;
8963             case 0x3a: /* FSUB */
8964                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8965                 break;
8966             case 0x3e: /* FMIN */
8967                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8968                 break;
8969             case 0x3f: /* FRSQRTS */
8970                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8971                 break;
8972             case 0x5b: /* FMUL */
8973                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8974                 break;
8975             case 0x5c: /* FCMGE */
8976                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8977                 break;
8978             case 0x5d: /* FACGE */
8979                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8980                 break;
8981             case 0x5f: /* FDIV */
8982                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8983                 break;
8984             case 0x7a: /* FABD */
8985                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8986                 gen_helper_vfp_abss(tcg_res, tcg_res);
8987                 break;
8988             case 0x7c: /* FCMGT */
8989                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8990                 break;
8991             case 0x7d: /* FACGT */
8992                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8993                 break;
8994             default:
8995                 g_assert_not_reached();
8996             }
8997 
8998             if (elements == 1) {
8999                 /* scalar single so clear high part */
9000                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9001 
9002                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9003                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9004             } else {
9005                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9006             }
9007         }
9008     }
9009 
9010     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9011 }
9012 
9013 /* AdvSIMD scalar three same
9014  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9015  * +-----+---+-----------+------+---+------+--------+---+------+------+
9016  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9017  * +-----+---+-----------+------+---+------+--------+---+------+------+
9018  */
9019 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9020 {
9021     int rd = extract32(insn, 0, 5);
9022     int rn = extract32(insn, 5, 5);
9023     int opcode = extract32(insn, 11, 5);
9024     int rm = extract32(insn, 16, 5);
9025     int size = extract32(insn, 22, 2);
9026     bool u = extract32(insn, 29, 1);
9027     TCGv_i64 tcg_rd;
9028 
9029     if (opcode >= 0x18) {
9030         /* Floating point: U, size[1] and opcode indicate operation */
9031         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9032         switch (fpopcode) {
9033         case 0x1b: /* FMULX */
9034         case 0x1f: /* FRECPS */
9035         case 0x3f: /* FRSQRTS */
9036         case 0x5d: /* FACGE */
9037         case 0x7d: /* FACGT */
9038         case 0x1c: /* FCMEQ */
9039         case 0x5c: /* FCMGE */
9040         case 0x7c: /* FCMGT */
9041         case 0x7a: /* FABD */
9042             break;
9043         default:
9044             unallocated_encoding(s);
9045             return;
9046         }
9047 
9048         if (!fp_access_check(s)) {
9049             return;
9050         }
9051 
9052         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9053         return;
9054     }
9055 
9056     switch (opcode) {
9057     case 0x1: /* SQADD, UQADD */
9058     case 0x5: /* SQSUB, UQSUB */
9059     case 0x9: /* SQSHL, UQSHL */
9060     case 0xb: /* SQRSHL, UQRSHL */
9061         break;
9062     case 0x8: /* SSHL, USHL */
9063     case 0xa: /* SRSHL, URSHL */
9064     case 0x6: /* CMGT, CMHI */
9065     case 0x7: /* CMGE, CMHS */
9066     case 0x11: /* CMTST, CMEQ */
9067     case 0x10: /* ADD, SUB (vector) */
9068         if (size != 3) {
9069             unallocated_encoding(s);
9070             return;
9071         }
9072         break;
9073     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9074         if (size != 1 && size != 2) {
9075             unallocated_encoding(s);
9076             return;
9077         }
9078         break;
9079     default:
9080         unallocated_encoding(s);
9081         return;
9082     }
9083 
9084     if (!fp_access_check(s)) {
9085         return;
9086     }
9087 
9088     tcg_rd = tcg_temp_new_i64();
9089 
9090     if (size == 3) {
9091         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9092         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9093 
9094         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9095     } else {
9096         /* Do a single operation on the lowest element in the vector.
9097          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9098          * no side effects for all these operations.
9099          * OPTME: special-purpose helpers would avoid doing some
9100          * unnecessary work in the helper for the 8 and 16 bit cases.
9101          */
9102         NeonGenTwoOpEnvFn *genenvfn;
9103         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9104         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9105         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9106 
9107         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9108         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9109 
9110         switch (opcode) {
9111         case 0x1: /* SQADD, UQADD */
9112         {
9113             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9114                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9115                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9116                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9117             };
9118             genenvfn = fns[size][u];
9119             break;
9120         }
9121         case 0x5: /* SQSUB, UQSUB */
9122         {
9123             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9124                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9125                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9126                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9127             };
9128             genenvfn = fns[size][u];
9129             break;
9130         }
9131         case 0x9: /* SQSHL, UQSHL */
9132         {
9133             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9134                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9135                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9136                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9137             };
9138             genenvfn = fns[size][u];
9139             break;
9140         }
9141         case 0xb: /* SQRSHL, UQRSHL */
9142         {
9143             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9144                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9145                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9146                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9147             };
9148             genenvfn = fns[size][u];
9149             break;
9150         }
9151         case 0x16: /* SQDMULH, SQRDMULH */
9152         {
9153             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9154                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9155                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9156             };
9157             assert(size == 1 || size == 2);
9158             genenvfn = fns[size - 1][u];
9159             break;
9160         }
9161         default:
9162             g_assert_not_reached();
9163         }
9164 
9165         genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm);
9166         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9167     }
9168 
9169     write_fp_dreg(s, rd, tcg_rd);
9170 }
9171 
9172 /* AdvSIMD scalar three same FP16
9173  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9174  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9175  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9176  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9177  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9178  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9179  */
9180 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9181                                                   uint32_t insn)
9182 {
9183     int rd = extract32(insn, 0, 5);
9184     int rn = extract32(insn, 5, 5);
9185     int opcode = extract32(insn, 11, 3);
9186     int rm = extract32(insn, 16, 5);
9187     bool u = extract32(insn, 29, 1);
9188     bool a = extract32(insn, 23, 1);
9189     int fpopcode = opcode | (a << 3) |  (u << 4);
9190     TCGv_ptr fpst;
9191     TCGv_i32 tcg_op1;
9192     TCGv_i32 tcg_op2;
9193     TCGv_i32 tcg_res;
9194 
9195     switch (fpopcode) {
9196     case 0x03: /* FMULX */
9197     case 0x04: /* FCMEQ (reg) */
9198     case 0x07: /* FRECPS */
9199     case 0x0f: /* FRSQRTS */
9200     case 0x14: /* FCMGE (reg) */
9201     case 0x15: /* FACGE */
9202     case 0x1a: /* FABD */
9203     case 0x1c: /* FCMGT (reg) */
9204     case 0x1d: /* FACGT */
9205         break;
9206     default:
9207         unallocated_encoding(s);
9208         return;
9209     }
9210 
9211     if (!dc_isar_feature(aa64_fp16, s)) {
9212         unallocated_encoding(s);
9213     }
9214 
9215     if (!fp_access_check(s)) {
9216         return;
9217     }
9218 
9219     fpst = fpstatus_ptr(FPST_FPCR_F16);
9220 
9221     tcg_op1 = read_fp_hreg(s, rn);
9222     tcg_op2 = read_fp_hreg(s, rm);
9223     tcg_res = tcg_temp_new_i32();
9224 
9225     switch (fpopcode) {
9226     case 0x03: /* FMULX */
9227         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9228         break;
9229     case 0x04: /* FCMEQ (reg) */
9230         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9231         break;
9232     case 0x07: /* FRECPS */
9233         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9234         break;
9235     case 0x0f: /* FRSQRTS */
9236         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9237         break;
9238     case 0x14: /* FCMGE (reg) */
9239         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9240         break;
9241     case 0x15: /* FACGE */
9242         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9243         break;
9244     case 0x1a: /* FABD */
9245         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9246         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9247         break;
9248     case 0x1c: /* FCMGT (reg) */
9249         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9250         break;
9251     case 0x1d: /* FACGT */
9252         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9253         break;
9254     default:
9255         g_assert_not_reached();
9256     }
9257 
9258     write_fp_sreg(s, rd, tcg_res);
9259 }
9260 
9261 /* AdvSIMD scalar three same extra
9262  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9263  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9264  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9265  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9266  */
9267 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9268                                                    uint32_t insn)
9269 {
9270     int rd = extract32(insn, 0, 5);
9271     int rn = extract32(insn, 5, 5);
9272     int opcode = extract32(insn, 11, 4);
9273     int rm = extract32(insn, 16, 5);
9274     int size = extract32(insn, 22, 2);
9275     bool u = extract32(insn, 29, 1);
9276     TCGv_i32 ele1, ele2, ele3;
9277     TCGv_i64 res;
9278     bool feature;
9279 
9280     switch (u * 16 + opcode) {
9281     case 0x10: /* SQRDMLAH (vector) */
9282     case 0x11: /* SQRDMLSH (vector) */
9283         if (size != 1 && size != 2) {
9284             unallocated_encoding(s);
9285             return;
9286         }
9287         feature = dc_isar_feature(aa64_rdm, s);
9288         break;
9289     default:
9290         unallocated_encoding(s);
9291         return;
9292     }
9293     if (!feature) {
9294         unallocated_encoding(s);
9295         return;
9296     }
9297     if (!fp_access_check(s)) {
9298         return;
9299     }
9300 
9301     /* Do a single operation on the lowest element in the vector.
9302      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9303      * with no side effects for all these operations.
9304      * OPTME: special-purpose helpers would avoid doing some
9305      * unnecessary work in the helper for the 16 bit cases.
9306      */
9307     ele1 = tcg_temp_new_i32();
9308     ele2 = tcg_temp_new_i32();
9309     ele3 = tcg_temp_new_i32();
9310 
9311     read_vec_element_i32(s, ele1, rn, 0, size);
9312     read_vec_element_i32(s, ele2, rm, 0, size);
9313     read_vec_element_i32(s, ele3, rd, 0, size);
9314 
9315     switch (opcode) {
9316     case 0x0: /* SQRDMLAH */
9317         if (size == 1) {
9318             gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3);
9319         } else {
9320             gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3);
9321         }
9322         break;
9323     case 0x1: /* SQRDMLSH */
9324         if (size == 1) {
9325             gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3);
9326         } else {
9327             gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3);
9328         }
9329         break;
9330     default:
9331         g_assert_not_reached();
9332     }
9333 
9334     res = tcg_temp_new_i64();
9335     tcg_gen_extu_i32_i64(res, ele3);
9336     write_fp_dreg(s, rd, res);
9337 }
9338 
9339 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9340                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9341                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9342 {
9343     /* Handle 64->64 opcodes which are shared between the scalar and
9344      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9345      * is valid in either group and also the double-precision fp ops.
9346      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9347      * requires them.
9348      */
9349     TCGCond cond;
9350 
9351     switch (opcode) {
9352     case 0x4: /* CLS, CLZ */
9353         if (u) {
9354             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9355         } else {
9356             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9357         }
9358         break;
9359     case 0x5: /* NOT */
9360         /* This opcode is shared with CNT and RBIT but we have earlier
9361          * enforced that size == 3 if and only if this is the NOT insn.
9362          */
9363         tcg_gen_not_i64(tcg_rd, tcg_rn);
9364         break;
9365     case 0x7: /* SQABS, SQNEG */
9366         if (u) {
9367             gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
9368         } else {
9369             gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
9370         }
9371         break;
9372     case 0xa: /* CMLT */
9373         cond = TCG_COND_LT;
9374     do_cmop:
9375         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9376         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
9377         break;
9378     case 0x8: /* CMGT, CMGE */
9379         cond = u ? TCG_COND_GE : TCG_COND_GT;
9380         goto do_cmop;
9381     case 0x9: /* CMEQ, CMLE */
9382         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9383         goto do_cmop;
9384     case 0xb: /* ABS, NEG */
9385         if (u) {
9386             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9387         } else {
9388             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9389         }
9390         break;
9391     case 0x2f: /* FABS */
9392         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9393         break;
9394     case 0x6f: /* FNEG */
9395         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9396         break;
9397     case 0x7f: /* FSQRT */
9398         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
9399         break;
9400     case 0x1a: /* FCVTNS */
9401     case 0x1b: /* FCVTMS */
9402     case 0x1c: /* FCVTAS */
9403     case 0x3a: /* FCVTPS */
9404     case 0x3b: /* FCVTZS */
9405         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9406         break;
9407     case 0x5a: /* FCVTNU */
9408     case 0x5b: /* FCVTMU */
9409     case 0x5c: /* FCVTAU */
9410     case 0x7a: /* FCVTPU */
9411     case 0x7b: /* FCVTZU */
9412         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9413         break;
9414     case 0x18: /* FRINTN */
9415     case 0x19: /* FRINTM */
9416     case 0x38: /* FRINTP */
9417     case 0x39: /* FRINTZ */
9418     case 0x58: /* FRINTA */
9419     case 0x79: /* FRINTI */
9420         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9421         break;
9422     case 0x59: /* FRINTX */
9423         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9424         break;
9425     case 0x1e: /* FRINT32Z */
9426     case 0x5e: /* FRINT32X */
9427         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9428         break;
9429     case 0x1f: /* FRINT64Z */
9430     case 0x5f: /* FRINT64X */
9431         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9432         break;
9433     default:
9434         g_assert_not_reached();
9435     }
9436 }
9437 
9438 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9439                                    bool is_scalar, bool is_u, bool is_q,
9440                                    int size, int rn, int rd)
9441 {
9442     bool is_double = (size == MO_64);
9443     TCGv_ptr fpst;
9444 
9445     if (!fp_access_check(s)) {
9446         return;
9447     }
9448 
9449     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9450 
9451     if (is_double) {
9452         TCGv_i64 tcg_op = tcg_temp_new_i64();
9453         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9454         TCGv_i64 tcg_res = tcg_temp_new_i64();
9455         NeonGenTwoDoubleOpFn *genfn;
9456         bool swap = false;
9457         int pass;
9458 
9459         switch (opcode) {
9460         case 0x2e: /* FCMLT (zero) */
9461             swap = true;
9462             /* fallthrough */
9463         case 0x2c: /* FCMGT (zero) */
9464             genfn = gen_helper_neon_cgt_f64;
9465             break;
9466         case 0x2d: /* FCMEQ (zero) */
9467             genfn = gen_helper_neon_ceq_f64;
9468             break;
9469         case 0x6d: /* FCMLE (zero) */
9470             swap = true;
9471             /* fall through */
9472         case 0x6c: /* FCMGE (zero) */
9473             genfn = gen_helper_neon_cge_f64;
9474             break;
9475         default:
9476             g_assert_not_reached();
9477         }
9478 
9479         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9480             read_vec_element(s, tcg_op, rn, pass, MO_64);
9481             if (swap) {
9482                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9483             } else {
9484                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9485             }
9486             write_vec_element(s, tcg_res, rd, pass, MO_64);
9487         }
9488 
9489         clear_vec_high(s, !is_scalar, rd);
9490     } else {
9491         TCGv_i32 tcg_op = tcg_temp_new_i32();
9492         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9493         TCGv_i32 tcg_res = tcg_temp_new_i32();
9494         NeonGenTwoSingleOpFn *genfn;
9495         bool swap = false;
9496         int pass, maxpasses;
9497 
9498         if (size == MO_16) {
9499             switch (opcode) {
9500             case 0x2e: /* FCMLT (zero) */
9501                 swap = true;
9502                 /* fall through */
9503             case 0x2c: /* FCMGT (zero) */
9504                 genfn = gen_helper_advsimd_cgt_f16;
9505                 break;
9506             case 0x2d: /* FCMEQ (zero) */
9507                 genfn = gen_helper_advsimd_ceq_f16;
9508                 break;
9509             case 0x6d: /* FCMLE (zero) */
9510                 swap = true;
9511                 /* fall through */
9512             case 0x6c: /* FCMGE (zero) */
9513                 genfn = gen_helper_advsimd_cge_f16;
9514                 break;
9515             default:
9516                 g_assert_not_reached();
9517             }
9518         } else {
9519             switch (opcode) {
9520             case 0x2e: /* FCMLT (zero) */
9521                 swap = true;
9522                 /* fall through */
9523             case 0x2c: /* FCMGT (zero) */
9524                 genfn = gen_helper_neon_cgt_f32;
9525                 break;
9526             case 0x2d: /* FCMEQ (zero) */
9527                 genfn = gen_helper_neon_ceq_f32;
9528                 break;
9529             case 0x6d: /* FCMLE (zero) */
9530                 swap = true;
9531                 /* fall through */
9532             case 0x6c: /* FCMGE (zero) */
9533                 genfn = gen_helper_neon_cge_f32;
9534                 break;
9535             default:
9536                 g_assert_not_reached();
9537             }
9538         }
9539 
9540         if (is_scalar) {
9541             maxpasses = 1;
9542         } else {
9543             int vector_size = 8 << is_q;
9544             maxpasses = vector_size >> size;
9545         }
9546 
9547         for (pass = 0; pass < maxpasses; pass++) {
9548             read_vec_element_i32(s, tcg_op, rn, pass, size);
9549             if (swap) {
9550                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9551             } else {
9552                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9553             }
9554             if (is_scalar) {
9555                 write_fp_sreg(s, rd, tcg_res);
9556             } else {
9557                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9558             }
9559         }
9560 
9561         if (!is_scalar) {
9562             clear_vec_high(s, is_q, rd);
9563         }
9564     }
9565 }
9566 
9567 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9568                                     bool is_scalar, bool is_u, bool is_q,
9569                                     int size, int rn, int rd)
9570 {
9571     bool is_double = (size == 3);
9572     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9573 
9574     if (is_double) {
9575         TCGv_i64 tcg_op = tcg_temp_new_i64();
9576         TCGv_i64 tcg_res = tcg_temp_new_i64();
9577         int pass;
9578 
9579         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9580             read_vec_element(s, tcg_op, rn, pass, MO_64);
9581             switch (opcode) {
9582             case 0x3d: /* FRECPE */
9583                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9584                 break;
9585             case 0x3f: /* FRECPX */
9586                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9587                 break;
9588             case 0x7d: /* FRSQRTE */
9589                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9590                 break;
9591             default:
9592                 g_assert_not_reached();
9593             }
9594             write_vec_element(s, tcg_res, rd, pass, MO_64);
9595         }
9596         clear_vec_high(s, !is_scalar, rd);
9597     } else {
9598         TCGv_i32 tcg_op = tcg_temp_new_i32();
9599         TCGv_i32 tcg_res = tcg_temp_new_i32();
9600         int pass, maxpasses;
9601 
9602         if (is_scalar) {
9603             maxpasses = 1;
9604         } else {
9605             maxpasses = is_q ? 4 : 2;
9606         }
9607 
9608         for (pass = 0; pass < maxpasses; pass++) {
9609             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9610 
9611             switch (opcode) {
9612             case 0x3c: /* URECPE */
9613                 gen_helper_recpe_u32(tcg_res, tcg_op);
9614                 break;
9615             case 0x3d: /* FRECPE */
9616                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9617                 break;
9618             case 0x3f: /* FRECPX */
9619                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9620                 break;
9621             case 0x7d: /* FRSQRTE */
9622                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9623                 break;
9624             default:
9625                 g_assert_not_reached();
9626             }
9627 
9628             if (is_scalar) {
9629                 write_fp_sreg(s, rd, tcg_res);
9630             } else {
9631                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9632             }
9633         }
9634         if (!is_scalar) {
9635             clear_vec_high(s, is_q, rd);
9636         }
9637     }
9638 }
9639 
9640 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9641                                 int opcode, bool u, bool is_q,
9642                                 int size, int rn, int rd)
9643 {
9644     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9645      * in the source becomes a size element in the destination).
9646      */
9647     int pass;
9648     TCGv_i32 tcg_res[2];
9649     int destelt = is_q ? 2 : 0;
9650     int passes = scalar ? 1 : 2;
9651 
9652     if (scalar) {
9653         tcg_res[1] = tcg_constant_i32(0);
9654     }
9655 
9656     for (pass = 0; pass < passes; pass++) {
9657         TCGv_i64 tcg_op = tcg_temp_new_i64();
9658         NeonGenNarrowFn *genfn = NULL;
9659         NeonGenNarrowEnvFn *genenvfn = NULL;
9660 
9661         if (scalar) {
9662             read_vec_element(s, tcg_op, rn, pass, size + 1);
9663         } else {
9664             read_vec_element(s, tcg_op, rn, pass, MO_64);
9665         }
9666         tcg_res[pass] = tcg_temp_new_i32();
9667 
9668         switch (opcode) {
9669         case 0x12: /* XTN, SQXTUN */
9670         {
9671             static NeonGenNarrowFn * const xtnfns[3] = {
9672                 gen_helper_neon_narrow_u8,
9673                 gen_helper_neon_narrow_u16,
9674                 tcg_gen_extrl_i64_i32,
9675             };
9676             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9677                 gen_helper_neon_unarrow_sat8,
9678                 gen_helper_neon_unarrow_sat16,
9679                 gen_helper_neon_unarrow_sat32,
9680             };
9681             if (u) {
9682                 genenvfn = sqxtunfns[size];
9683             } else {
9684                 genfn = xtnfns[size];
9685             }
9686             break;
9687         }
9688         case 0x14: /* SQXTN, UQXTN */
9689         {
9690             static NeonGenNarrowEnvFn * const fns[3][2] = {
9691                 { gen_helper_neon_narrow_sat_s8,
9692                   gen_helper_neon_narrow_sat_u8 },
9693                 { gen_helper_neon_narrow_sat_s16,
9694                   gen_helper_neon_narrow_sat_u16 },
9695                 { gen_helper_neon_narrow_sat_s32,
9696                   gen_helper_neon_narrow_sat_u32 },
9697             };
9698             genenvfn = fns[size][u];
9699             break;
9700         }
9701         case 0x16: /* FCVTN, FCVTN2 */
9702             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9703             if (size == 2) {
9704                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
9705             } else {
9706                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9707                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9708                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9709                 TCGv_i32 ahp = get_ahp_flag();
9710 
9711                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9712                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9713                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9714                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9715             }
9716             break;
9717         case 0x36: /* BFCVTN, BFCVTN2 */
9718             {
9719                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9720                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9721             }
9722             break;
9723         case 0x56:  /* FCVTXN, FCVTXN2 */
9724             /* 64 bit to 32 bit float conversion
9725              * with von Neumann rounding (round to odd)
9726              */
9727             assert(size == 2);
9728             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
9729             break;
9730         default:
9731             g_assert_not_reached();
9732         }
9733 
9734         if (genfn) {
9735             genfn(tcg_res[pass], tcg_op);
9736         } else if (genenvfn) {
9737             genenvfn(tcg_res[pass], tcg_env, tcg_op);
9738         }
9739     }
9740 
9741     for (pass = 0; pass < 2; pass++) {
9742         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9743     }
9744     clear_vec_high(s, is_q, rd);
9745 }
9746 
9747 /* Remaining saturating accumulating ops */
9748 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9749                                 bool is_q, int size, int rn, int rd)
9750 {
9751     bool is_double = (size == 3);
9752 
9753     if (is_double) {
9754         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9755         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9756         int pass;
9757 
9758         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9759             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9760             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9761 
9762             if (is_u) { /* USQADD */
9763                 gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9764             } else { /* SUQADD */
9765                 gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9766             }
9767             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9768         }
9769         clear_vec_high(s, !is_scalar, rd);
9770     } else {
9771         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9772         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9773         int pass, maxpasses;
9774 
9775         if (is_scalar) {
9776             maxpasses = 1;
9777         } else {
9778             maxpasses = is_q ? 4 : 2;
9779         }
9780 
9781         for (pass = 0; pass < maxpasses; pass++) {
9782             if (is_scalar) {
9783                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9784                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9785             } else {
9786                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9787                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9788             }
9789 
9790             if (is_u) { /* USQADD */
9791                 switch (size) {
9792                 case 0:
9793                     gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9794                     break;
9795                 case 1:
9796                     gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9797                     break;
9798                 case 2:
9799                     gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9800                     break;
9801                 default:
9802                     g_assert_not_reached();
9803                 }
9804             } else { /* SUQADD */
9805                 switch (size) {
9806                 case 0:
9807                     gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9808                     break;
9809                 case 1:
9810                     gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9811                     break;
9812                 case 2:
9813                     gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9814                     break;
9815                 default:
9816                     g_assert_not_reached();
9817                 }
9818             }
9819 
9820             if (is_scalar) {
9821                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9822             }
9823             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9824         }
9825         clear_vec_high(s, is_q, rd);
9826     }
9827 }
9828 
9829 /* AdvSIMD scalar two reg misc
9830  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9831  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9832  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9833  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9834  */
9835 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9836 {
9837     int rd = extract32(insn, 0, 5);
9838     int rn = extract32(insn, 5, 5);
9839     int opcode = extract32(insn, 12, 5);
9840     int size = extract32(insn, 22, 2);
9841     bool u = extract32(insn, 29, 1);
9842     bool is_fcvt = false;
9843     int rmode;
9844     TCGv_i32 tcg_rmode;
9845     TCGv_ptr tcg_fpstatus;
9846 
9847     switch (opcode) {
9848     case 0x3: /* USQADD / SUQADD*/
9849         if (!fp_access_check(s)) {
9850             return;
9851         }
9852         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9853         return;
9854     case 0x7: /* SQABS / SQNEG */
9855         break;
9856     case 0xa: /* CMLT */
9857         if (u) {
9858             unallocated_encoding(s);
9859             return;
9860         }
9861         /* fall through */
9862     case 0x8: /* CMGT, CMGE */
9863     case 0x9: /* CMEQ, CMLE */
9864     case 0xb: /* ABS, NEG */
9865         if (size != 3) {
9866             unallocated_encoding(s);
9867             return;
9868         }
9869         break;
9870     case 0x12: /* SQXTUN */
9871         if (!u) {
9872             unallocated_encoding(s);
9873             return;
9874         }
9875         /* fall through */
9876     case 0x14: /* SQXTN, UQXTN */
9877         if (size == 3) {
9878             unallocated_encoding(s);
9879             return;
9880         }
9881         if (!fp_access_check(s)) {
9882             return;
9883         }
9884         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9885         return;
9886     case 0xc ... 0xf:
9887     case 0x16 ... 0x1d:
9888     case 0x1f:
9889         /* Floating point: U, size[1] and opcode indicate operation;
9890          * size[0] indicates single or double precision.
9891          */
9892         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9893         size = extract32(size, 0, 1) ? 3 : 2;
9894         switch (opcode) {
9895         case 0x2c: /* FCMGT (zero) */
9896         case 0x2d: /* FCMEQ (zero) */
9897         case 0x2e: /* FCMLT (zero) */
9898         case 0x6c: /* FCMGE (zero) */
9899         case 0x6d: /* FCMLE (zero) */
9900             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9901             return;
9902         case 0x1d: /* SCVTF */
9903         case 0x5d: /* UCVTF */
9904         {
9905             bool is_signed = (opcode == 0x1d);
9906             if (!fp_access_check(s)) {
9907                 return;
9908             }
9909             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9910             return;
9911         }
9912         case 0x3d: /* FRECPE */
9913         case 0x3f: /* FRECPX */
9914         case 0x7d: /* FRSQRTE */
9915             if (!fp_access_check(s)) {
9916                 return;
9917             }
9918             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9919             return;
9920         case 0x1a: /* FCVTNS */
9921         case 0x1b: /* FCVTMS */
9922         case 0x3a: /* FCVTPS */
9923         case 0x3b: /* FCVTZS */
9924         case 0x5a: /* FCVTNU */
9925         case 0x5b: /* FCVTMU */
9926         case 0x7a: /* FCVTPU */
9927         case 0x7b: /* FCVTZU */
9928             is_fcvt = true;
9929             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9930             break;
9931         case 0x1c: /* FCVTAS */
9932         case 0x5c: /* FCVTAU */
9933             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9934             is_fcvt = true;
9935             rmode = FPROUNDING_TIEAWAY;
9936             break;
9937         case 0x56: /* FCVTXN, FCVTXN2 */
9938             if (size == 2) {
9939                 unallocated_encoding(s);
9940                 return;
9941             }
9942             if (!fp_access_check(s)) {
9943                 return;
9944             }
9945             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9946             return;
9947         default:
9948             unallocated_encoding(s);
9949             return;
9950         }
9951         break;
9952     default:
9953         unallocated_encoding(s);
9954         return;
9955     }
9956 
9957     if (!fp_access_check(s)) {
9958         return;
9959     }
9960 
9961     if (is_fcvt) {
9962         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
9963         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9964     } else {
9965         tcg_fpstatus = NULL;
9966         tcg_rmode = NULL;
9967     }
9968 
9969     if (size == 3) {
9970         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9971         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9972 
9973         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9974         write_fp_dreg(s, rd, tcg_rd);
9975     } else {
9976         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9977         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9978 
9979         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9980 
9981         switch (opcode) {
9982         case 0x7: /* SQABS, SQNEG */
9983         {
9984             NeonGenOneOpEnvFn *genfn;
9985             static NeonGenOneOpEnvFn * const fns[3][2] = {
9986                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9987                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9988                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9989             };
9990             genfn = fns[size][u];
9991             genfn(tcg_rd, tcg_env, tcg_rn);
9992             break;
9993         }
9994         case 0x1a: /* FCVTNS */
9995         case 0x1b: /* FCVTMS */
9996         case 0x1c: /* FCVTAS */
9997         case 0x3a: /* FCVTPS */
9998         case 0x3b: /* FCVTZS */
9999             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10000                                  tcg_fpstatus);
10001             break;
10002         case 0x5a: /* FCVTNU */
10003         case 0x5b: /* FCVTMU */
10004         case 0x5c: /* FCVTAU */
10005         case 0x7a: /* FCVTPU */
10006         case 0x7b: /* FCVTZU */
10007             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10008                                  tcg_fpstatus);
10009             break;
10010         default:
10011             g_assert_not_reached();
10012         }
10013 
10014         write_fp_sreg(s, rd, tcg_rd);
10015     }
10016 
10017     if (is_fcvt) {
10018         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10019     }
10020 }
10021 
10022 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10023 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10024                                  int immh, int immb, int opcode, int rn, int rd)
10025 {
10026     int size = 32 - clz32(immh) - 1;
10027     int immhb = immh << 3 | immb;
10028     int shift = 2 * (8 << size) - immhb;
10029     GVecGen2iFn *gvec_fn;
10030 
10031     if (extract32(immh, 3, 1) && !is_q) {
10032         unallocated_encoding(s);
10033         return;
10034     }
10035     tcg_debug_assert(size <= 3);
10036 
10037     if (!fp_access_check(s)) {
10038         return;
10039     }
10040 
10041     switch (opcode) {
10042     case 0x02: /* SSRA / USRA (accumulate) */
10043         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10044         break;
10045 
10046     case 0x08: /* SRI */
10047         gvec_fn = gen_gvec_sri;
10048         break;
10049 
10050     case 0x00: /* SSHR / USHR */
10051         if (is_u) {
10052             if (shift == 8 << size) {
10053                 /* Shift count the same size as element size produces zero.  */
10054                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10055                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10056                 return;
10057             }
10058             gvec_fn = tcg_gen_gvec_shri;
10059         } else {
10060             /* Shift count the same size as element size produces all sign.  */
10061             if (shift == 8 << size) {
10062                 shift -= 1;
10063             }
10064             gvec_fn = tcg_gen_gvec_sari;
10065         }
10066         break;
10067 
10068     case 0x04: /* SRSHR / URSHR (rounding) */
10069         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10070         break;
10071 
10072     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10073         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10074         break;
10075 
10076     default:
10077         g_assert_not_reached();
10078     }
10079 
10080     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10081 }
10082 
10083 /* SHL/SLI - Vector shift left */
10084 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10085                                  int immh, int immb, int opcode, int rn, int rd)
10086 {
10087     int size = 32 - clz32(immh) - 1;
10088     int immhb = immh << 3 | immb;
10089     int shift = immhb - (8 << size);
10090 
10091     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10092     assert(size >= 0 && size <= 3);
10093 
10094     if (extract32(immh, 3, 1) && !is_q) {
10095         unallocated_encoding(s);
10096         return;
10097     }
10098 
10099     if (!fp_access_check(s)) {
10100         return;
10101     }
10102 
10103     if (insert) {
10104         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10105     } else {
10106         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10107     }
10108 }
10109 
10110 /* USHLL/SHLL - Vector shift left with widening */
10111 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10112                                  int immh, int immb, int opcode, int rn, int rd)
10113 {
10114     int size = 32 - clz32(immh) - 1;
10115     int immhb = immh << 3 | immb;
10116     int shift = immhb - (8 << size);
10117     int dsize = 64;
10118     int esize = 8 << size;
10119     int elements = dsize/esize;
10120     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10121     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10122     int i;
10123 
10124     if (size >= 3) {
10125         unallocated_encoding(s);
10126         return;
10127     }
10128 
10129     if (!fp_access_check(s)) {
10130         return;
10131     }
10132 
10133     /* For the LL variants the store is larger than the load,
10134      * so if rd == rn we would overwrite parts of our input.
10135      * So load everything right now and use shifts in the main loop.
10136      */
10137     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10138 
10139     for (i = 0; i < elements; i++) {
10140         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10141         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10142         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10143         write_vec_element(s, tcg_rd, rd, i, size + 1);
10144     }
10145 }
10146 
10147 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10148 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10149                                  int immh, int immb, int opcode, int rn, int rd)
10150 {
10151     int immhb = immh << 3 | immb;
10152     int size = 32 - clz32(immh) - 1;
10153     int dsize = 64;
10154     int esize = 8 << size;
10155     int elements = dsize/esize;
10156     int shift = (2 * esize) - immhb;
10157     bool round = extract32(opcode, 0, 1);
10158     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10159     TCGv_i64 tcg_round;
10160     int i;
10161 
10162     if (extract32(immh, 3, 1)) {
10163         unallocated_encoding(s);
10164         return;
10165     }
10166 
10167     if (!fp_access_check(s)) {
10168         return;
10169     }
10170 
10171     tcg_rn = tcg_temp_new_i64();
10172     tcg_rd = tcg_temp_new_i64();
10173     tcg_final = tcg_temp_new_i64();
10174     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10175 
10176     if (round) {
10177         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10178     } else {
10179         tcg_round = NULL;
10180     }
10181 
10182     for (i = 0; i < elements; i++) {
10183         read_vec_element(s, tcg_rn, rn, i, size+1);
10184         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10185                                 false, true, size+1, shift);
10186 
10187         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10188     }
10189 
10190     if (!is_q) {
10191         write_vec_element(s, tcg_final, rd, 0, MO_64);
10192     } else {
10193         write_vec_element(s, tcg_final, rd, 1, MO_64);
10194     }
10195 
10196     clear_vec_high(s, is_q, rd);
10197 }
10198 
10199 
10200 /* AdvSIMD shift by immediate
10201  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10202  * +---+---+---+-------------+------+------+--------+---+------+------+
10203  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10204  * +---+---+---+-------------+------+------+--------+---+------+------+
10205  */
10206 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10207 {
10208     int rd = extract32(insn, 0, 5);
10209     int rn = extract32(insn, 5, 5);
10210     int opcode = extract32(insn, 11, 5);
10211     int immb = extract32(insn, 16, 3);
10212     int immh = extract32(insn, 19, 4);
10213     bool is_u = extract32(insn, 29, 1);
10214     bool is_q = extract32(insn, 30, 1);
10215 
10216     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10217     assert(immh != 0);
10218 
10219     switch (opcode) {
10220     case 0x08: /* SRI */
10221         if (!is_u) {
10222             unallocated_encoding(s);
10223             return;
10224         }
10225         /* fall through */
10226     case 0x00: /* SSHR / USHR */
10227     case 0x02: /* SSRA / USRA (accumulate) */
10228     case 0x04: /* SRSHR / URSHR (rounding) */
10229     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10230         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10231         break;
10232     case 0x0a: /* SHL / SLI */
10233         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10234         break;
10235     case 0x10: /* SHRN */
10236     case 0x11: /* RSHRN / SQRSHRUN */
10237         if (is_u) {
10238             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10239                                    opcode, rn, rd);
10240         } else {
10241             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10242         }
10243         break;
10244     case 0x12: /* SQSHRN / UQSHRN */
10245     case 0x13: /* SQRSHRN / UQRSHRN */
10246         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10247                                opcode, rn, rd);
10248         break;
10249     case 0x14: /* SSHLL / USHLL */
10250         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10251         break;
10252     case 0x1c: /* SCVTF / UCVTF */
10253         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10254                                      opcode, rn, rd);
10255         break;
10256     case 0xc: /* SQSHLU */
10257         if (!is_u) {
10258             unallocated_encoding(s);
10259             return;
10260         }
10261         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10262         break;
10263     case 0xe: /* SQSHL, UQSHL */
10264         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10265         break;
10266     case 0x1f: /* FCVTZS/ FCVTZU */
10267         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10268         return;
10269     default:
10270         unallocated_encoding(s);
10271         return;
10272     }
10273 }
10274 
10275 /* Generate code to do a "long" addition or subtraction, ie one done in
10276  * TCGv_i64 on vector lanes twice the width specified by size.
10277  */
10278 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10279                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10280 {
10281     static NeonGenTwo64OpFn * const fns[3][2] = {
10282         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10283         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10284         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10285     };
10286     NeonGenTwo64OpFn *genfn;
10287     assert(size < 3);
10288 
10289     genfn = fns[size][is_sub];
10290     genfn(tcg_res, tcg_op1, tcg_op2);
10291 }
10292 
10293 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10294                                 int opcode, int rd, int rn, int rm)
10295 {
10296     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10297     TCGv_i64 tcg_res[2];
10298     int pass, accop;
10299 
10300     tcg_res[0] = tcg_temp_new_i64();
10301     tcg_res[1] = tcg_temp_new_i64();
10302 
10303     /* Does this op do an adding accumulate, a subtracting accumulate,
10304      * or no accumulate at all?
10305      */
10306     switch (opcode) {
10307     case 5:
10308     case 8:
10309     case 9:
10310         accop = 1;
10311         break;
10312     case 10:
10313     case 11:
10314         accop = -1;
10315         break;
10316     default:
10317         accop = 0;
10318         break;
10319     }
10320 
10321     if (accop != 0) {
10322         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10323         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10324     }
10325 
10326     /* size == 2 means two 32x32->64 operations; this is worth special
10327      * casing because we can generally handle it inline.
10328      */
10329     if (size == 2) {
10330         for (pass = 0; pass < 2; pass++) {
10331             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10332             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10333             TCGv_i64 tcg_passres;
10334             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10335 
10336             int elt = pass + is_q * 2;
10337 
10338             read_vec_element(s, tcg_op1, rn, elt, memop);
10339             read_vec_element(s, tcg_op2, rm, elt, memop);
10340 
10341             if (accop == 0) {
10342                 tcg_passres = tcg_res[pass];
10343             } else {
10344                 tcg_passres = tcg_temp_new_i64();
10345             }
10346 
10347             switch (opcode) {
10348             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10349                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10350                 break;
10351             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10352                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10353                 break;
10354             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10355             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10356             {
10357                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10358                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10359 
10360                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10361                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10362                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10363                                     tcg_passres,
10364                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10365                 break;
10366             }
10367             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10368             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10369             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10370                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10371                 break;
10372             case 9: /* SQDMLAL, SQDMLAL2 */
10373             case 11: /* SQDMLSL, SQDMLSL2 */
10374             case 13: /* SQDMULL, SQDMULL2 */
10375                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10376                 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
10377                                                   tcg_passres, tcg_passres);
10378                 break;
10379             default:
10380                 g_assert_not_reached();
10381             }
10382 
10383             if (opcode == 9 || opcode == 11) {
10384                 /* saturating accumulate ops */
10385                 if (accop < 0) {
10386                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10387                 }
10388                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
10389                                                   tcg_res[pass], tcg_passres);
10390             } else if (accop > 0) {
10391                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10392             } else if (accop < 0) {
10393                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10394             }
10395         }
10396     } else {
10397         /* size 0 or 1, generally helper functions */
10398         for (pass = 0; pass < 2; pass++) {
10399             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10400             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10401             TCGv_i64 tcg_passres;
10402             int elt = pass + is_q * 2;
10403 
10404             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10405             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10406 
10407             if (accop == 0) {
10408                 tcg_passres = tcg_res[pass];
10409             } else {
10410                 tcg_passres = tcg_temp_new_i64();
10411             }
10412 
10413             switch (opcode) {
10414             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10415             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10416             {
10417                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10418                 static NeonGenWidenFn * const widenfns[2][2] = {
10419                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10420                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10421                 };
10422                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10423 
10424                 widenfn(tcg_op2_64, tcg_op2);
10425                 widenfn(tcg_passres, tcg_op1);
10426                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10427                               tcg_passres, tcg_op2_64);
10428                 break;
10429             }
10430             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10431             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10432                 if (size == 0) {
10433                     if (is_u) {
10434                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10435                     } else {
10436                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10437                     }
10438                 } else {
10439                     if (is_u) {
10440                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10441                     } else {
10442                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10443                     }
10444                 }
10445                 break;
10446             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10447             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10448             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10449                 if (size == 0) {
10450                     if (is_u) {
10451                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10452                     } else {
10453                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10454                     }
10455                 } else {
10456                     if (is_u) {
10457                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10458                     } else {
10459                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10460                     }
10461                 }
10462                 break;
10463             case 9: /* SQDMLAL, SQDMLAL2 */
10464             case 11: /* SQDMLSL, SQDMLSL2 */
10465             case 13: /* SQDMULL, SQDMULL2 */
10466                 assert(size == 1);
10467                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10468                 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
10469                                                   tcg_passres, tcg_passres);
10470                 break;
10471             default:
10472                 g_assert_not_reached();
10473             }
10474 
10475             if (accop != 0) {
10476                 if (opcode == 9 || opcode == 11) {
10477                     /* saturating accumulate ops */
10478                     if (accop < 0) {
10479                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10480                     }
10481                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
10482                                                       tcg_res[pass],
10483                                                       tcg_passres);
10484                 } else {
10485                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10486                                   tcg_res[pass], tcg_passres);
10487                 }
10488             }
10489         }
10490     }
10491 
10492     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10493     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10494 }
10495 
10496 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10497                             int opcode, int rd, int rn, int rm)
10498 {
10499     TCGv_i64 tcg_res[2];
10500     int part = is_q ? 2 : 0;
10501     int pass;
10502 
10503     for (pass = 0; pass < 2; pass++) {
10504         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10505         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10506         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10507         static NeonGenWidenFn * const widenfns[3][2] = {
10508             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10509             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10510             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10511         };
10512         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10513 
10514         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10515         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10516         widenfn(tcg_op2_wide, tcg_op2);
10517         tcg_res[pass] = tcg_temp_new_i64();
10518         gen_neon_addl(size, (opcode == 3),
10519                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10520     }
10521 
10522     for (pass = 0; pass < 2; pass++) {
10523         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10524     }
10525 }
10526 
10527 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10528 {
10529     tcg_gen_addi_i64(in, in, 1U << 31);
10530     tcg_gen_extrh_i64_i32(res, in);
10531 }
10532 
10533 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10534                                  int opcode, int rd, int rn, int rm)
10535 {
10536     TCGv_i32 tcg_res[2];
10537     int part = is_q ? 2 : 0;
10538     int pass;
10539 
10540     for (pass = 0; pass < 2; pass++) {
10541         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10542         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10543         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10544         static NeonGenNarrowFn * const narrowfns[3][2] = {
10545             { gen_helper_neon_narrow_high_u8,
10546               gen_helper_neon_narrow_round_high_u8 },
10547             { gen_helper_neon_narrow_high_u16,
10548               gen_helper_neon_narrow_round_high_u16 },
10549             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10550         };
10551         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10552 
10553         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10554         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10555 
10556         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10557 
10558         tcg_res[pass] = tcg_temp_new_i32();
10559         gennarrow(tcg_res[pass], tcg_wideres);
10560     }
10561 
10562     for (pass = 0; pass < 2; pass++) {
10563         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10564     }
10565     clear_vec_high(s, is_q, rd);
10566 }
10567 
10568 /* AdvSIMD three different
10569  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10570  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10571  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10572  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10573  */
10574 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10575 {
10576     /* Instructions in this group fall into three basic classes
10577      * (in each case with the operation working on each element in
10578      * the input vectors):
10579      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10580      *     128 bit input)
10581      * (2) wide 64 x 128 -> 128
10582      * (3) narrowing 128 x 128 -> 64
10583      * Here we do initial decode, catch unallocated cases and
10584      * dispatch to separate functions for each class.
10585      */
10586     int is_q = extract32(insn, 30, 1);
10587     int is_u = extract32(insn, 29, 1);
10588     int size = extract32(insn, 22, 2);
10589     int opcode = extract32(insn, 12, 4);
10590     int rm = extract32(insn, 16, 5);
10591     int rn = extract32(insn, 5, 5);
10592     int rd = extract32(insn, 0, 5);
10593 
10594     switch (opcode) {
10595     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10596     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10597         /* 64 x 128 -> 128 */
10598         if (size == 3) {
10599             unallocated_encoding(s);
10600             return;
10601         }
10602         if (!fp_access_check(s)) {
10603             return;
10604         }
10605         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10606         break;
10607     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10608     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10609         /* 128 x 128 -> 64 */
10610         if (size == 3) {
10611             unallocated_encoding(s);
10612             return;
10613         }
10614         if (!fp_access_check(s)) {
10615             return;
10616         }
10617         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10618         break;
10619     case 14: /* PMULL, PMULL2 */
10620         if (is_u) {
10621             unallocated_encoding(s);
10622             return;
10623         }
10624         switch (size) {
10625         case 0: /* PMULL.P8 */
10626             if (!fp_access_check(s)) {
10627                 return;
10628             }
10629             /* The Q field specifies lo/hi half input for this insn.  */
10630             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10631                              gen_helper_neon_pmull_h);
10632             break;
10633 
10634         case 3: /* PMULL.P64 */
10635             if (!dc_isar_feature(aa64_pmull, s)) {
10636                 unallocated_encoding(s);
10637                 return;
10638             }
10639             if (!fp_access_check(s)) {
10640                 return;
10641             }
10642             /* The Q field specifies lo/hi half input for this insn.  */
10643             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10644                              gen_helper_gvec_pmull_q);
10645             break;
10646 
10647         default:
10648             unallocated_encoding(s);
10649             break;
10650         }
10651         return;
10652     case 9: /* SQDMLAL, SQDMLAL2 */
10653     case 11: /* SQDMLSL, SQDMLSL2 */
10654     case 13: /* SQDMULL, SQDMULL2 */
10655         if (is_u || size == 0) {
10656             unallocated_encoding(s);
10657             return;
10658         }
10659         /* fall through */
10660     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10661     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10662     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10663     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10664     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10665     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10666     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10667         /* 64 x 64 -> 128 */
10668         if (size == 3) {
10669             unallocated_encoding(s);
10670             return;
10671         }
10672         if (!fp_access_check(s)) {
10673             return;
10674         }
10675 
10676         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10677         break;
10678     default:
10679         /* opcode 15 not allocated */
10680         unallocated_encoding(s);
10681         break;
10682     }
10683 }
10684 
10685 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10686 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10687 {
10688     int rd = extract32(insn, 0, 5);
10689     int rn = extract32(insn, 5, 5);
10690     int rm = extract32(insn, 16, 5);
10691     int size = extract32(insn, 22, 2);
10692     bool is_u = extract32(insn, 29, 1);
10693     bool is_q = extract32(insn, 30, 1);
10694 
10695     if (!fp_access_check(s)) {
10696         return;
10697     }
10698 
10699     switch (size + 4 * is_u) {
10700     case 0: /* AND */
10701         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10702         return;
10703     case 1: /* BIC */
10704         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10705         return;
10706     case 2: /* ORR */
10707         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10708         return;
10709     case 3: /* ORN */
10710         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10711         return;
10712     case 4: /* EOR */
10713         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10714         return;
10715 
10716     case 5: /* BSL bitwise select */
10717         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10718         return;
10719     case 6: /* BIT, bitwise insert if true */
10720         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10721         return;
10722     case 7: /* BIF, bitwise insert if false */
10723         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10724         return;
10725 
10726     default:
10727         g_assert_not_reached();
10728     }
10729 }
10730 
10731 /* Pairwise op subgroup of C3.6.16.
10732  *
10733  * This is called directly or via the handle_3same_float for float pairwise
10734  * operations where the opcode and size are calculated differently.
10735  */
10736 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10737                                    int size, int rn, int rm, int rd)
10738 {
10739     TCGv_ptr fpst;
10740     int pass;
10741 
10742     /* Floating point operations need fpst */
10743     if (opcode >= 0x58) {
10744         fpst = fpstatus_ptr(FPST_FPCR);
10745     } else {
10746         fpst = NULL;
10747     }
10748 
10749     if (!fp_access_check(s)) {
10750         return;
10751     }
10752 
10753     /* These operations work on the concatenated rm:rn, with each pair of
10754      * adjacent elements being operated on to produce an element in the result.
10755      */
10756     if (size == 3) {
10757         TCGv_i64 tcg_res[2];
10758 
10759         for (pass = 0; pass < 2; pass++) {
10760             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10761             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10762             int passreg = (pass == 0) ? rn : rm;
10763 
10764             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10765             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10766             tcg_res[pass] = tcg_temp_new_i64();
10767 
10768             switch (opcode) {
10769             case 0x17: /* ADDP */
10770                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10771                 break;
10772             case 0x58: /* FMAXNMP */
10773                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10774                 break;
10775             case 0x5a: /* FADDP */
10776                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10777                 break;
10778             case 0x5e: /* FMAXP */
10779                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10780                 break;
10781             case 0x78: /* FMINNMP */
10782                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10783                 break;
10784             case 0x7e: /* FMINP */
10785                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10786                 break;
10787             default:
10788                 g_assert_not_reached();
10789             }
10790         }
10791 
10792         for (pass = 0; pass < 2; pass++) {
10793             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10794         }
10795     } else {
10796         int maxpass = is_q ? 4 : 2;
10797         TCGv_i32 tcg_res[4];
10798 
10799         for (pass = 0; pass < maxpass; pass++) {
10800             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10801             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10802             NeonGenTwoOpFn *genfn = NULL;
10803             int passreg = pass < (maxpass / 2) ? rn : rm;
10804             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10805 
10806             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10807             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10808             tcg_res[pass] = tcg_temp_new_i32();
10809 
10810             switch (opcode) {
10811             case 0x17: /* ADDP */
10812             {
10813                 static NeonGenTwoOpFn * const fns[3] = {
10814                     gen_helper_neon_padd_u8,
10815                     gen_helper_neon_padd_u16,
10816                     tcg_gen_add_i32,
10817                 };
10818                 genfn = fns[size];
10819                 break;
10820             }
10821             case 0x14: /* SMAXP, UMAXP */
10822             {
10823                 static NeonGenTwoOpFn * const fns[3][2] = {
10824                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10825                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10826                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10827                 };
10828                 genfn = fns[size][u];
10829                 break;
10830             }
10831             case 0x15: /* SMINP, UMINP */
10832             {
10833                 static NeonGenTwoOpFn * const fns[3][2] = {
10834                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10835                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10836                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10837                 };
10838                 genfn = fns[size][u];
10839                 break;
10840             }
10841             /* The FP operations are all on single floats (32 bit) */
10842             case 0x58: /* FMAXNMP */
10843                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10844                 break;
10845             case 0x5a: /* FADDP */
10846                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10847                 break;
10848             case 0x5e: /* FMAXP */
10849                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10850                 break;
10851             case 0x78: /* FMINNMP */
10852                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10853                 break;
10854             case 0x7e: /* FMINP */
10855                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10856                 break;
10857             default:
10858                 g_assert_not_reached();
10859             }
10860 
10861             /* FP ops called directly, otherwise call now */
10862             if (genfn) {
10863                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10864             }
10865         }
10866 
10867         for (pass = 0; pass < maxpass; pass++) {
10868             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10869         }
10870         clear_vec_high(s, is_q, rd);
10871     }
10872 }
10873 
10874 /* Floating point op subgroup of C3.6.16. */
10875 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10876 {
10877     /* For floating point ops, the U, size[1] and opcode bits
10878      * together indicate the operation. size[0] indicates single
10879      * or double.
10880      */
10881     int fpopcode = extract32(insn, 11, 5)
10882         | (extract32(insn, 23, 1) << 5)
10883         | (extract32(insn, 29, 1) << 6);
10884     int is_q = extract32(insn, 30, 1);
10885     int size = extract32(insn, 22, 1);
10886     int rm = extract32(insn, 16, 5);
10887     int rn = extract32(insn, 5, 5);
10888     int rd = extract32(insn, 0, 5);
10889 
10890     int datasize = is_q ? 128 : 64;
10891     int esize = 32 << size;
10892     int elements = datasize / esize;
10893 
10894     if (size == 1 && !is_q) {
10895         unallocated_encoding(s);
10896         return;
10897     }
10898 
10899     switch (fpopcode) {
10900     case 0x58: /* FMAXNMP */
10901     case 0x5a: /* FADDP */
10902     case 0x5e: /* FMAXP */
10903     case 0x78: /* FMINNMP */
10904     case 0x7e: /* FMINP */
10905         if (size && !is_q) {
10906             unallocated_encoding(s);
10907             return;
10908         }
10909         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10910                                rn, rm, rd);
10911         return;
10912     case 0x1b: /* FMULX */
10913     case 0x1f: /* FRECPS */
10914     case 0x3f: /* FRSQRTS */
10915     case 0x5d: /* FACGE */
10916     case 0x7d: /* FACGT */
10917     case 0x19: /* FMLA */
10918     case 0x39: /* FMLS */
10919     case 0x18: /* FMAXNM */
10920     case 0x1a: /* FADD */
10921     case 0x1c: /* FCMEQ */
10922     case 0x1e: /* FMAX */
10923     case 0x38: /* FMINNM */
10924     case 0x3a: /* FSUB */
10925     case 0x3e: /* FMIN */
10926     case 0x5b: /* FMUL */
10927     case 0x5c: /* FCMGE */
10928     case 0x5f: /* FDIV */
10929     case 0x7a: /* FABD */
10930     case 0x7c: /* FCMGT */
10931         if (!fp_access_check(s)) {
10932             return;
10933         }
10934         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10935         return;
10936 
10937     case 0x1d: /* FMLAL  */
10938     case 0x3d: /* FMLSL  */
10939     case 0x59: /* FMLAL2 */
10940     case 0x79: /* FMLSL2 */
10941         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
10942             unallocated_encoding(s);
10943             return;
10944         }
10945         if (fp_access_check(s)) {
10946             int is_s = extract32(insn, 23, 1);
10947             int is_2 = extract32(insn, 29, 1);
10948             int data = (is_2 << 1) | is_s;
10949             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
10950                                vec_full_reg_offset(s, rn),
10951                                vec_full_reg_offset(s, rm), tcg_env,
10952                                is_q ? 16 : 8, vec_full_reg_size(s),
10953                                data, gen_helper_gvec_fmlal_a64);
10954         }
10955         return;
10956 
10957     default:
10958         unallocated_encoding(s);
10959         return;
10960     }
10961 }
10962 
10963 /* Integer op subgroup of C3.6.16. */
10964 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10965 {
10966     int is_q = extract32(insn, 30, 1);
10967     int u = extract32(insn, 29, 1);
10968     int size = extract32(insn, 22, 2);
10969     int opcode = extract32(insn, 11, 5);
10970     int rm = extract32(insn, 16, 5);
10971     int rn = extract32(insn, 5, 5);
10972     int rd = extract32(insn, 0, 5);
10973     int pass;
10974     TCGCond cond;
10975 
10976     switch (opcode) {
10977     case 0x13: /* MUL, PMUL */
10978         if (u && size != 0) {
10979             unallocated_encoding(s);
10980             return;
10981         }
10982         /* fall through */
10983     case 0x0: /* SHADD, UHADD */
10984     case 0x2: /* SRHADD, URHADD */
10985     case 0x4: /* SHSUB, UHSUB */
10986     case 0xc: /* SMAX, UMAX */
10987     case 0xd: /* SMIN, UMIN */
10988     case 0xe: /* SABD, UABD */
10989     case 0xf: /* SABA, UABA */
10990     case 0x12: /* MLA, MLS */
10991         if (size == 3) {
10992             unallocated_encoding(s);
10993             return;
10994         }
10995         break;
10996     case 0x16: /* SQDMULH, SQRDMULH */
10997         if (size == 0 || size == 3) {
10998             unallocated_encoding(s);
10999             return;
11000         }
11001         break;
11002     default:
11003         if (size == 3 && !is_q) {
11004             unallocated_encoding(s);
11005             return;
11006         }
11007         break;
11008     }
11009 
11010     if (!fp_access_check(s)) {
11011         return;
11012     }
11013 
11014     switch (opcode) {
11015     case 0x01: /* SQADD, UQADD */
11016         if (u) {
11017             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11018         } else {
11019             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11020         }
11021         return;
11022     case 0x05: /* SQSUB, UQSUB */
11023         if (u) {
11024             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11025         } else {
11026             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11027         }
11028         return;
11029     case 0x08: /* SSHL, USHL */
11030         if (u) {
11031             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11032         } else {
11033             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11034         }
11035         return;
11036     case 0x0c: /* SMAX, UMAX */
11037         if (u) {
11038             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11039         } else {
11040             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11041         }
11042         return;
11043     case 0x0d: /* SMIN, UMIN */
11044         if (u) {
11045             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11046         } else {
11047             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11048         }
11049         return;
11050     case 0xe: /* SABD, UABD */
11051         if (u) {
11052             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11053         } else {
11054             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11055         }
11056         return;
11057     case 0xf: /* SABA, UABA */
11058         if (u) {
11059             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11060         } else {
11061             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11062         }
11063         return;
11064     case 0x10: /* ADD, SUB */
11065         if (u) {
11066             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11067         } else {
11068             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11069         }
11070         return;
11071     case 0x13: /* MUL, PMUL */
11072         if (!u) { /* MUL */
11073             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11074         } else {  /* PMUL */
11075             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11076         }
11077         return;
11078     case 0x12: /* MLA, MLS */
11079         if (u) {
11080             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11081         } else {
11082             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11083         }
11084         return;
11085     case 0x16: /* SQDMULH, SQRDMULH */
11086         {
11087             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11088                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11089                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11090             };
11091             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11092         }
11093         return;
11094     case 0x11:
11095         if (!u) { /* CMTST */
11096             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11097             return;
11098         }
11099         /* else CMEQ */
11100         cond = TCG_COND_EQ;
11101         goto do_gvec_cmp;
11102     case 0x06: /* CMGT, CMHI */
11103         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11104         goto do_gvec_cmp;
11105     case 0x07: /* CMGE, CMHS */
11106         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11107     do_gvec_cmp:
11108         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11109                          vec_full_reg_offset(s, rn),
11110                          vec_full_reg_offset(s, rm),
11111                          is_q ? 16 : 8, vec_full_reg_size(s));
11112         return;
11113     }
11114 
11115     if (size == 3) {
11116         assert(is_q);
11117         for (pass = 0; pass < 2; pass++) {
11118             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11119             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11120             TCGv_i64 tcg_res = tcg_temp_new_i64();
11121 
11122             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11123             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11124 
11125             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11126 
11127             write_vec_element(s, tcg_res, rd, pass, MO_64);
11128         }
11129     } else {
11130         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11131             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11132             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11133             TCGv_i32 tcg_res = tcg_temp_new_i32();
11134             NeonGenTwoOpFn *genfn = NULL;
11135             NeonGenTwoOpEnvFn *genenvfn = NULL;
11136 
11137             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11138             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11139 
11140             switch (opcode) {
11141             case 0x0: /* SHADD, UHADD */
11142             {
11143                 static NeonGenTwoOpFn * const fns[3][2] = {
11144                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11145                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11146                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11147                 };
11148                 genfn = fns[size][u];
11149                 break;
11150             }
11151             case 0x2: /* SRHADD, URHADD */
11152             {
11153                 static NeonGenTwoOpFn * const fns[3][2] = {
11154                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11155                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11156                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11157                 };
11158                 genfn = fns[size][u];
11159                 break;
11160             }
11161             case 0x4: /* SHSUB, UHSUB */
11162             {
11163                 static NeonGenTwoOpFn * const fns[3][2] = {
11164                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11165                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11166                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11167                 };
11168                 genfn = fns[size][u];
11169                 break;
11170             }
11171             case 0x9: /* SQSHL, UQSHL */
11172             {
11173                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11174                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11175                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11176                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11177                 };
11178                 genenvfn = fns[size][u];
11179                 break;
11180             }
11181             case 0xa: /* SRSHL, URSHL */
11182             {
11183                 static NeonGenTwoOpFn * const fns[3][2] = {
11184                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11185                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11186                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11187                 };
11188                 genfn = fns[size][u];
11189                 break;
11190             }
11191             case 0xb: /* SQRSHL, UQRSHL */
11192             {
11193                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11194                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11195                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11196                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11197                 };
11198                 genenvfn = fns[size][u];
11199                 break;
11200             }
11201             default:
11202                 g_assert_not_reached();
11203             }
11204 
11205             if (genenvfn) {
11206                 genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2);
11207             } else {
11208                 genfn(tcg_res, tcg_op1, tcg_op2);
11209             }
11210 
11211             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11212         }
11213     }
11214     clear_vec_high(s, is_q, rd);
11215 }
11216 
11217 /* AdvSIMD three same
11218  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11219  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11220  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11221  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11222  */
11223 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11224 {
11225     int opcode = extract32(insn, 11, 5);
11226 
11227     switch (opcode) {
11228     case 0x3: /* logic ops */
11229         disas_simd_3same_logic(s, insn);
11230         break;
11231     case 0x17: /* ADDP */
11232     case 0x14: /* SMAXP, UMAXP */
11233     case 0x15: /* SMINP, UMINP */
11234     {
11235         /* Pairwise operations */
11236         int is_q = extract32(insn, 30, 1);
11237         int u = extract32(insn, 29, 1);
11238         int size = extract32(insn, 22, 2);
11239         int rm = extract32(insn, 16, 5);
11240         int rn = extract32(insn, 5, 5);
11241         int rd = extract32(insn, 0, 5);
11242         if (opcode == 0x17) {
11243             if (u || (size == 3 && !is_q)) {
11244                 unallocated_encoding(s);
11245                 return;
11246             }
11247         } else {
11248             if (size == 3) {
11249                 unallocated_encoding(s);
11250                 return;
11251             }
11252         }
11253         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11254         break;
11255     }
11256     case 0x18 ... 0x31:
11257         /* floating point ops, sz[1] and U are part of opcode */
11258         disas_simd_3same_float(s, insn);
11259         break;
11260     default:
11261         disas_simd_3same_int(s, insn);
11262         break;
11263     }
11264 }
11265 
11266 /*
11267  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11268  *
11269  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11270  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11271  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11272  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11273  *
11274  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11275  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11276  *
11277  */
11278 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11279 {
11280     int opcode = extract32(insn, 11, 3);
11281     int u = extract32(insn, 29, 1);
11282     int a = extract32(insn, 23, 1);
11283     int is_q = extract32(insn, 30, 1);
11284     int rm = extract32(insn, 16, 5);
11285     int rn = extract32(insn, 5, 5);
11286     int rd = extract32(insn, 0, 5);
11287     /*
11288      * For these floating point ops, the U, a and opcode bits
11289      * together indicate the operation.
11290      */
11291     int fpopcode = opcode | (a << 3) | (u << 4);
11292     int datasize = is_q ? 128 : 64;
11293     int elements = datasize / 16;
11294     bool pairwise;
11295     TCGv_ptr fpst;
11296     int pass;
11297 
11298     switch (fpopcode) {
11299     case 0x0: /* FMAXNM */
11300     case 0x1: /* FMLA */
11301     case 0x2: /* FADD */
11302     case 0x3: /* FMULX */
11303     case 0x4: /* FCMEQ */
11304     case 0x6: /* FMAX */
11305     case 0x7: /* FRECPS */
11306     case 0x8: /* FMINNM */
11307     case 0x9: /* FMLS */
11308     case 0xa: /* FSUB */
11309     case 0xe: /* FMIN */
11310     case 0xf: /* FRSQRTS */
11311     case 0x13: /* FMUL */
11312     case 0x14: /* FCMGE */
11313     case 0x15: /* FACGE */
11314     case 0x17: /* FDIV */
11315     case 0x1a: /* FABD */
11316     case 0x1c: /* FCMGT */
11317     case 0x1d: /* FACGT */
11318         pairwise = false;
11319         break;
11320     case 0x10: /* FMAXNMP */
11321     case 0x12: /* FADDP */
11322     case 0x16: /* FMAXP */
11323     case 0x18: /* FMINNMP */
11324     case 0x1e: /* FMINP */
11325         pairwise = true;
11326         break;
11327     default:
11328         unallocated_encoding(s);
11329         return;
11330     }
11331 
11332     if (!dc_isar_feature(aa64_fp16, s)) {
11333         unallocated_encoding(s);
11334         return;
11335     }
11336 
11337     if (!fp_access_check(s)) {
11338         return;
11339     }
11340 
11341     fpst = fpstatus_ptr(FPST_FPCR_F16);
11342 
11343     if (pairwise) {
11344         int maxpass = is_q ? 8 : 4;
11345         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11346         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11347         TCGv_i32 tcg_res[8];
11348 
11349         for (pass = 0; pass < maxpass; pass++) {
11350             int passreg = pass < (maxpass / 2) ? rn : rm;
11351             int passelt = (pass << 1) & (maxpass - 1);
11352 
11353             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11354             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11355             tcg_res[pass] = tcg_temp_new_i32();
11356 
11357             switch (fpopcode) {
11358             case 0x10: /* FMAXNMP */
11359                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11360                                            fpst);
11361                 break;
11362             case 0x12: /* FADDP */
11363                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11364                 break;
11365             case 0x16: /* FMAXP */
11366                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11367                 break;
11368             case 0x18: /* FMINNMP */
11369                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11370                                            fpst);
11371                 break;
11372             case 0x1e: /* FMINP */
11373                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11374                 break;
11375             default:
11376                 g_assert_not_reached();
11377             }
11378         }
11379 
11380         for (pass = 0; pass < maxpass; pass++) {
11381             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11382         }
11383     } else {
11384         for (pass = 0; pass < elements; pass++) {
11385             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11386             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11387             TCGv_i32 tcg_res = tcg_temp_new_i32();
11388 
11389             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11390             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11391 
11392             switch (fpopcode) {
11393             case 0x0: /* FMAXNM */
11394                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11395                 break;
11396             case 0x1: /* FMLA */
11397                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11398                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11399                                            fpst);
11400                 break;
11401             case 0x2: /* FADD */
11402                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11403                 break;
11404             case 0x3: /* FMULX */
11405                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11406                 break;
11407             case 0x4: /* FCMEQ */
11408                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11409                 break;
11410             case 0x6: /* FMAX */
11411                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11412                 break;
11413             case 0x7: /* FRECPS */
11414                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11415                 break;
11416             case 0x8: /* FMINNM */
11417                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11418                 break;
11419             case 0x9: /* FMLS */
11420                 /* As usual for ARM, separate negation for fused multiply-add */
11421                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11422                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11423                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11424                                            fpst);
11425                 break;
11426             case 0xa: /* FSUB */
11427                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11428                 break;
11429             case 0xe: /* FMIN */
11430                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11431                 break;
11432             case 0xf: /* FRSQRTS */
11433                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11434                 break;
11435             case 0x13: /* FMUL */
11436                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11437                 break;
11438             case 0x14: /* FCMGE */
11439                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11440                 break;
11441             case 0x15: /* FACGE */
11442                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11443                 break;
11444             case 0x17: /* FDIV */
11445                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11446                 break;
11447             case 0x1a: /* FABD */
11448                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11449                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11450                 break;
11451             case 0x1c: /* FCMGT */
11452                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11453                 break;
11454             case 0x1d: /* FACGT */
11455                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11456                 break;
11457             default:
11458                 g_assert_not_reached();
11459             }
11460 
11461             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11462         }
11463     }
11464 
11465     clear_vec_high(s, is_q, rd);
11466 }
11467 
11468 /* AdvSIMD three same extra
11469  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11470  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11471  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11472  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11473  */
11474 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11475 {
11476     int rd = extract32(insn, 0, 5);
11477     int rn = extract32(insn, 5, 5);
11478     int opcode = extract32(insn, 11, 4);
11479     int rm = extract32(insn, 16, 5);
11480     int size = extract32(insn, 22, 2);
11481     bool u = extract32(insn, 29, 1);
11482     bool is_q = extract32(insn, 30, 1);
11483     bool feature;
11484     int rot;
11485 
11486     switch (u * 16 + opcode) {
11487     case 0x10: /* SQRDMLAH (vector) */
11488     case 0x11: /* SQRDMLSH (vector) */
11489         if (size != 1 && size != 2) {
11490             unallocated_encoding(s);
11491             return;
11492         }
11493         feature = dc_isar_feature(aa64_rdm, s);
11494         break;
11495     case 0x02: /* SDOT (vector) */
11496     case 0x12: /* UDOT (vector) */
11497         if (size != MO_32) {
11498             unallocated_encoding(s);
11499             return;
11500         }
11501         feature = dc_isar_feature(aa64_dp, s);
11502         break;
11503     case 0x03: /* USDOT */
11504         if (size != MO_32) {
11505             unallocated_encoding(s);
11506             return;
11507         }
11508         feature = dc_isar_feature(aa64_i8mm, s);
11509         break;
11510     case 0x04: /* SMMLA */
11511     case 0x14: /* UMMLA */
11512     case 0x05: /* USMMLA */
11513         if (!is_q || size != MO_32) {
11514             unallocated_encoding(s);
11515             return;
11516         }
11517         feature = dc_isar_feature(aa64_i8mm, s);
11518         break;
11519     case 0x18: /* FCMLA, #0 */
11520     case 0x19: /* FCMLA, #90 */
11521     case 0x1a: /* FCMLA, #180 */
11522     case 0x1b: /* FCMLA, #270 */
11523     case 0x1c: /* FCADD, #90 */
11524     case 0x1e: /* FCADD, #270 */
11525         if (size == 0
11526             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11527             || (size == 3 && !is_q)) {
11528             unallocated_encoding(s);
11529             return;
11530         }
11531         feature = dc_isar_feature(aa64_fcma, s);
11532         break;
11533     case 0x1d: /* BFMMLA */
11534         if (size != MO_16 || !is_q) {
11535             unallocated_encoding(s);
11536             return;
11537         }
11538         feature = dc_isar_feature(aa64_bf16, s);
11539         break;
11540     case 0x1f:
11541         switch (size) {
11542         case 1: /* BFDOT */
11543         case 3: /* BFMLAL{B,T} */
11544             feature = dc_isar_feature(aa64_bf16, s);
11545             break;
11546         default:
11547             unallocated_encoding(s);
11548             return;
11549         }
11550         break;
11551     default:
11552         unallocated_encoding(s);
11553         return;
11554     }
11555     if (!feature) {
11556         unallocated_encoding(s);
11557         return;
11558     }
11559     if (!fp_access_check(s)) {
11560         return;
11561     }
11562 
11563     switch (opcode) {
11564     case 0x0: /* SQRDMLAH (vector) */
11565         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11566         return;
11567 
11568     case 0x1: /* SQRDMLSH (vector) */
11569         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11570         return;
11571 
11572     case 0x2: /* SDOT / UDOT */
11573         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11574                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11575         return;
11576 
11577     case 0x3: /* USDOT */
11578         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11579         return;
11580 
11581     case 0x04: /* SMMLA, UMMLA */
11582         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11583                          u ? gen_helper_gvec_ummla_b
11584                          : gen_helper_gvec_smmla_b);
11585         return;
11586     case 0x05: /* USMMLA */
11587         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11588         return;
11589 
11590     case 0x8: /* FCMLA, #0 */
11591     case 0x9: /* FCMLA, #90 */
11592     case 0xa: /* FCMLA, #180 */
11593     case 0xb: /* FCMLA, #270 */
11594         rot = extract32(opcode, 0, 2);
11595         switch (size) {
11596         case 1:
11597             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11598                               gen_helper_gvec_fcmlah);
11599             break;
11600         case 2:
11601             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11602                               gen_helper_gvec_fcmlas);
11603             break;
11604         case 3:
11605             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11606                               gen_helper_gvec_fcmlad);
11607             break;
11608         default:
11609             g_assert_not_reached();
11610         }
11611         return;
11612 
11613     case 0xc: /* FCADD, #90 */
11614     case 0xe: /* FCADD, #270 */
11615         rot = extract32(opcode, 1, 1);
11616         switch (size) {
11617         case 1:
11618             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11619                               gen_helper_gvec_fcaddh);
11620             break;
11621         case 2:
11622             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11623                               gen_helper_gvec_fcadds);
11624             break;
11625         case 3:
11626             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11627                               gen_helper_gvec_fcaddd);
11628             break;
11629         default:
11630             g_assert_not_reached();
11631         }
11632         return;
11633 
11634     case 0xd: /* BFMMLA */
11635         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11636         return;
11637     case 0xf:
11638         switch (size) {
11639         case 1: /* BFDOT */
11640             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11641             break;
11642         case 3: /* BFMLAL{B,T} */
11643             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11644                               gen_helper_gvec_bfmlal);
11645             break;
11646         default:
11647             g_assert_not_reached();
11648         }
11649         return;
11650 
11651     default:
11652         g_assert_not_reached();
11653     }
11654 }
11655 
11656 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11657                                   int size, int rn, int rd)
11658 {
11659     /* Handle 2-reg-misc ops which are widening (so each size element
11660      * in the source becomes a 2*size element in the destination.
11661      * The only instruction like this is FCVTL.
11662      */
11663     int pass;
11664 
11665     if (size == 3) {
11666         /* 32 -> 64 bit fp conversion */
11667         TCGv_i64 tcg_res[2];
11668         int srcelt = is_q ? 2 : 0;
11669 
11670         for (pass = 0; pass < 2; pass++) {
11671             TCGv_i32 tcg_op = tcg_temp_new_i32();
11672             tcg_res[pass] = tcg_temp_new_i64();
11673 
11674             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11675             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
11676         }
11677         for (pass = 0; pass < 2; pass++) {
11678             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11679         }
11680     } else {
11681         /* 16 -> 32 bit fp conversion */
11682         int srcelt = is_q ? 4 : 0;
11683         TCGv_i32 tcg_res[4];
11684         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11685         TCGv_i32 ahp = get_ahp_flag();
11686 
11687         for (pass = 0; pass < 4; pass++) {
11688             tcg_res[pass] = tcg_temp_new_i32();
11689 
11690             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11691             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11692                                            fpst, ahp);
11693         }
11694         for (pass = 0; pass < 4; pass++) {
11695             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11696         }
11697     }
11698 }
11699 
11700 static void handle_rev(DisasContext *s, int opcode, bool u,
11701                        bool is_q, int size, int rn, int rd)
11702 {
11703     int op = (opcode << 1) | u;
11704     int opsz = op + size;
11705     int grp_size = 3 - opsz;
11706     int dsize = is_q ? 128 : 64;
11707     int i;
11708 
11709     if (opsz >= 3) {
11710         unallocated_encoding(s);
11711         return;
11712     }
11713 
11714     if (!fp_access_check(s)) {
11715         return;
11716     }
11717 
11718     if (size == 0) {
11719         /* Special case bytes, use bswap op on each group of elements */
11720         int groups = dsize / (8 << grp_size);
11721 
11722         for (i = 0; i < groups; i++) {
11723             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11724 
11725             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11726             switch (grp_size) {
11727             case MO_16:
11728                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11729                 break;
11730             case MO_32:
11731                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11732                 break;
11733             case MO_64:
11734                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11735                 break;
11736             default:
11737                 g_assert_not_reached();
11738             }
11739             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11740         }
11741         clear_vec_high(s, is_q, rd);
11742     } else {
11743         int revmask = (1 << grp_size) - 1;
11744         int esize = 8 << size;
11745         int elements = dsize / esize;
11746         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11747         TCGv_i64 tcg_rd[2];
11748 
11749         for (i = 0; i < 2; i++) {
11750             tcg_rd[i] = tcg_temp_new_i64();
11751             tcg_gen_movi_i64(tcg_rd[i], 0);
11752         }
11753 
11754         for (i = 0; i < elements; i++) {
11755             int e_rev = (i & 0xf) ^ revmask;
11756             int w = (e_rev * esize) / 64;
11757             int o = (e_rev * esize) % 64;
11758 
11759             read_vec_element(s, tcg_rn, rn, i, size);
11760             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11761         }
11762 
11763         for (i = 0; i < 2; i++) {
11764             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11765         }
11766         clear_vec_high(s, true, rd);
11767     }
11768 }
11769 
11770 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11771                                   bool is_q, int size, int rn, int rd)
11772 {
11773     /* Implement the pairwise operations from 2-misc:
11774      * SADDLP, UADDLP, SADALP, UADALP.
11775      * These all add pairs of elements in the input to produce a
11776      * double-width result element in the output (possibly accumulating).
11777      */
11778     bool accum = (opcode == 0x6);
11779     int maxpass = is_q ? 2 : 1;
11780     int pass;
11781     TCGv_i64 tcg_res[2];
11782 
11783     if (size == 2) {
11784         /* 32 + 32 -> 64 op */
11785         MemOp memop = size + (u ? 0 : MO_SIGN);
11786 
11787         for (pass = 0; pass < maxpass; pass++) {
11788             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11789             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11790 
11791             tcg_res[pass] = tcg_temp_new_i64();
11792 
11793             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11794             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11795             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11796             if (accum) {
11797                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11798                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11799             }
11800         }
11801     } else {
11802         for (pass = 0; pass < maxpass; pass++) {
11803             TCGv_i64 tcg_op = tcg_temp_new_i64();
11804             NeonGenOne64OpFn *genfn;
11805             static NeonGenOne64OpFn * const fns[2][2] = {
11806                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11807                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11808             };
11809 
11810             genfn = fns[size][u];
11811 
11812             tcg_res[pass] = tcg_temp_new_i64();
11813 
11814             read_vec_element(s, tcg_op, rn, pass, MO_64);
11815             genfn(tcg_res[pass], tcg_op);
11816 
11817             if (accum) {
11818                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11819                 if (size == 0) {
11820                     gen_helper_neon_addl_u16(tcg_res[pass],
11821                                              tcg_res[pass], tcg_op);
11822                 } else {
11823                     gen_helper_neon_addl_u32(tcg_res[pass],
11824                                              tcg_res[pass], tcg_op);
11825                 }
11826             }
11827         }
11828     }
11829     if (!is_q) {
11830         tcg_res[1] = tcg_constant_i64(0);
11831     }
11832     for (pass = 0; pass < 2; pass++) {
11833         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11834     }
11835 }
11836 
11837 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11838 {
11839     /* Implement SHLL and SHLL2 */
11840     int pass;
11841     int part = is_q ? 2 : 0;
11842     TCGv_i64 tcg_res[2];
11843 
11844     for (pass = 0; pass < 2; pass++) {
11845         static NeonGenWidenFn * const widenfns[3] = {
11846             gen_helper_neon_widen_u8,
11847             gen_helper_neon_widen_u16,
11848             tcg_gen_extu_i32_i64,
11849         };
11850         NeonGenWidenFn *widenfn = widenfns[size];
11851         TCGv_i32 tcg_op = tcg_temp_new_i32();
11852 
11853         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11854         tcg_res[pass] = tcg_temp_new_i64();
11855         widenfn(tcg_res[pass], tcg_op);
11856         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11857     }
11858 
11859     for (pass = 0; pass < 2; pass++) {
11860         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11861     }
11862 }
11863 
11864 /* AdvSIMD two reg misc
11865  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11866  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11867  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11868  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11869  */
11870 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11871 {
11872     int size = extract32(insn, 22, 2);
11873     int opcode = extract32(insn, 12, 5);
11874     bool u = extract32(insn, 29, 1);
11875     bool is_q = extract32(insn, 30, 1);
11876     int rn = extract32(insn, 5, 5);
11877     int rd = extract32(insn, 0, 5);
11878     bool need_fpstatus = false;
11879     int rmode = -1;
11880     TCGv_i32 tcg_rmode;
11881     TCGv_ptr tcg_fpstatus;
11882 
11883     switch (opcode) {
11884     case 0x0: /* REV64, REV32 */
11885     case 0x1: /* REV16 */
11886         handle_rev(s, opcode, u, is_q, size, rn, rd);
11887         return;
11888     case 0x5: /* CNT, NOT, RBIT */
11889         if (u && size == 0) {
11890             /* NOT */
11891             break;
11892         } else if (u && size == 1) {
11893             /* RBIT */
11894             break;
11895         } else if (!u && size == 0) {
11896             /* CNT */
11897             break;
11898         }
11899         unallocated_encoding(s);
11900         return;
11901     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11902     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11903         if (size == 3) {
11904             unallocated_encoding(s);
11905             return;
11906         }
11907         if (!fp_access_check(s)) {
11908             return;
11909         }
11910 
11911         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11912         return;
11913     case 0x4: /* CLS, CLZ */
11914         if (size == 3) {
11915             unallocated_encoding(s);
11916             return;
11917         }
11918         break;
11919     case 0x2: /* SADDLP, UADDLP */
11920     case 0x6: /* SADALP, UADALP */
11921         if (size == 3) {
11922             unallocated_encoding(s);
11923             return;
11924         }
11925         if (!fp_access_check(s)) {
11926             return;
11927         }
11928         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11929         return;
11930     case 0x13: /* SHLL, SHLL2 */
11931         if (u == 0 || size == 3) {
11932             unallocated_encoding(s);
11933             return;
11934         }
11935         if (!fp_access_check(s)) {
11936             return;
11937         }
11938         handle_shll(s, is_q, size, rn, rd);
11939         return;
11940     case 0xa: /* CMLT */
11941         if (u == 1) {
11942             unallocated_encoding(s);
11943             return;
11944         }
11945         /* fall through */
11946     case 0x8: /* CMGT, CMGE */
11947     case 0x9: /* CMEQ, CMLE */
11948     case 0xb: /* ABS, NEG */
11949         if (size == 3 && !is_q) {
11950             unallocated_encoding(s);
11951             return;
11952         }
11953         break;
11954     case 0x3: /* SUQADD, USQADD */
11955         if (size == 3 && !is_q) {
11956             unallocated_encoding(s);
11957             return;
11958         }
11959         if (!fp_access_check(s)) {
11960             return;
11961         }
11962         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11963         return;
11964     case 0x7: /* SQABS, SQNEG */
11965         if (size == 3 && !is_q) {
11966             unallocated_encoding(s);
11967             return;
11968         }
11969         break;
11970     case 0xc ... 0xf:
11971     case 0x16 ... 0x1f:
11972     {
11973         /* Floating point: U, size[1] and opcode indicate operation;
11974          * size[0] indicates single or double precision.
11975          */
11976         int is_double = extract32(size, 0, 1);
11977         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11978         size = is_double ? 3 : 2;
11979         switch (opcode) {
11980         case 0x2f: /* FABS */
11981         case 0x6f: /* FNEG */
11982             if (size == 3 && !is_q) {
11983                 unallocated_encoding(s);
11984                 return;
11985             }
11986             break;
11987         case 0x1d: /* SCVTF */
11988         case 0x5d: /* UCVTF */
11989         {
11990             bool is_signed = (opcode == 0x1d) ? true : false;
11991             int elements = is_double ? 2 : is_q ? 4 : 2;
11992             if (is_double && !is_q) {
11993                 unallocated_encoding(s);
11994                 return;
11995             }
11996             if (!fp_access_check(s)) {
11997                 return;
11998             }
11999             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12000             return;
12001         }
12002         case 0x2c: /* FCMGT (zero) */
12003         case 0x2d: /* FCMEQ (zero) */
12004         case 0x2e: /* FCMLT (zero) */
12005         case 0x6c: /* FCMGE (zero) */
12006         case 0x6d: /* FCMLE (zero) */
12007             if (size == 3 && !is_q) {
12008                 unallocated_encoding(s);
12009                 return;
12010             }
12011             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12012             return;
12013         case 0x7f: /* FSQRT */
12014             if (size == 3 && !is_q) {
12015                 unallocated_encoding(s);
12016                 return;
12017             }
12018             break;
12019         case 0x1a: /* FCVTNS */
12020         case 0x1b: /* FCVTMS */
12021         case 0x3a: /* FCVTPS */
12022         case 0x3b: /* FCVTZS */
12023         case 0x5a: /* FCVTNU */
12024         case 0x5b: /* FCVTMU */
12025         case 0x7a: /* FCVTPU */
12026         case 0x7b: /* FCVTZU */
12027             need_fpstatus = true;
12028             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12029             if (size == 3 && !is_q) {
12030                 unallocated_encoding(s);
12031                 return;
12032             }
12033             break;
12034         case 0x5c: /* FCVTAU */
12035         case 0x1c: /* FCVTAS */
12036             need_fpstatus = true;
12037             rmode = FPROUNDING_TIEAWAY;
12038             if (size == 3 && !is_q) {
12039                 unallocated_encoding(s);
12040                 return;
12041             }
12042             break;
12043         case 0x3c: /* URECPE */
12044             if (size == 3) {
12045                 unallocated_encoding(s);
12046                 return;
12047             }
12048             /* fall through */
12049         case 0x3d: /* FRECPE */
12050         case 0x7d: /* FRSQRTE */
12051             if (size == 3 && !is_q) {
12052                 unallocated_encoding(s);
12053                 return;
12054             }
12055             if (!fp_access_check(s)) {
12056                 return;
12057             }
12058             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12059             return;
12060         case 0x56: /* FCVTXN, FCVTXN2 */
12061             if (size == 2) {
12062                 unallocated_encoding(s);
12063                 return;
12064             }
12065             /* fall through */
12066         case 0x16: /* FCVTN, FCVTN2 */
12067             /* handle_2misc_narrow does a 2*size -> size operation, but these
12068              * instructions encode the source size rather than dest size.
12069              */
12070             if (!fp_access_check(s)) {
12071                 return;
12072             }
12073             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12074             return;
12075         case 0x36: /* BFCVTN, BFCVTN2 */
12076             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12077                 unallocated_encoding(s);
12078                 return;
12079             }
12080             if (!fp_access_check(s)) {
12081                 return;
12082             }
12083             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12084             return;
12085         case 0x17: /* FCVTL, FCVTL2 */
12086             if (!fp_access_check(s)) {
12087                 return;
12088             }
12089             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12090             return;
12091         case 0x18: /* FRINTN */
12092         case 0x19: /* FRINTM */
12093         case 0x38: /* FRINTP */
12094         case 0x39: /* FRINTZ */
12095             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12096             /* fall through */
12097         case 0x59: /* FRINTX */
12098         case 0x79: /* FRINTI */
12099             need_fpstatus = true;
12100             if (size == 3 && !is_q) {
12101                 unallocated_encoding(s);
12102                 return;
12103             }
12104             break;
12105         case 0x58: /* FRINTA */
12106             rmode = FPROUNDING_TIEAWAY;
12107             need_fpstatus = true;
12108             if (size == 3 && !is_q) {
12109                 unallocated_encoding(s);
12110                 return;
12111             }
12112             break;
12113         case 0x7c: /* URSQRTE */
12114             if (size == 3) {
12115                 unallocated_encoding(s);
12116                 return;
12117             }
12118             break;
12119         case 0x1e: /* FRINT32Z */
12120         case 0x1f: /* FRINT64Z */
12121             rmode = FPROUNDING_ZERO;
12122             /* fall through */
12123         case 0x5e: /* FRINT32X */
12124         case 0x5f: /* FRINT64X */
12125             need_fpstatus = true;
12126             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12127                 unallocated_encoding(s);
12128                 return;
12129             }
12130             break;
12131         default:
12132             unallocated_encoding(s);
12133             return;
12134         }
12135         break;
12136     }
12137     default:
12138         unallocated_encoding(s);
12139         return;
12140     }
12141 
12142     if (!fp_access_check(s)) {
12143         return;
12144     }
12145 
12146     if (need_fpstatus || rmode >= 0) {
12147         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12148     } else {
12149         tcg_fpstatus = NULL;
12150     }
12151     if (rmode >= 0) {
12152         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12153     } else {
12154         tcg_rmode = NULL;
12155     }
12156 
12157     switch (opcode) {
12158     case 0x5:
12159         if (u && size == 0) { /* NOT */
12160             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12161             return;
12162         }
12163         break;
12164     case 0x8: /* CMGT, CMGE */
12165         if (u) {
12166             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12167         } else {
12168             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12169         }
12170         return;
12171     case 0x9: /* CMEQ, CMLE */
12172         if (u) {
12173             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12174         } else {
12175             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12176         }
12177         return;
12178     case 0xa: /* CMLT */
12179         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12180         return;
12181     case 0xb:
12182         if (u) { /* ABS, NEG */
12183             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12184         } else {
12185             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12186         }
12187         return;
12188     }
12189 
12190     if (size == 3) {
12191         /* All 64-bit element operations can be shared with scalar 2misc */
12192         int pass;
12193 
12194         /* Coverity claims (size == 3 && !is_q) has been eliminated
12195          * from all paths leading to here.
12196          */
12197         tcg_debug_assert(is_q);
12198         for (pass = 0; pass < 2; pass++) {
12199             TCGv_i64 tcg_op = tcg_temp_new_i64();
12200             TCGv_i64 tcg_res = tcg_temp_new_i64();
12201 
12202             read_vec_element(s, tcg_op, rn, pass, MO_64);
12203 
12204             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12205                             tcg_rmode, tcg_fpstatus);
12206 
12207             write_vec_element(s, tcg_res, rd, pass, MO_64);
12208         }
12209     } else {
12210         int pass;
12211 
12212         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12213             TCGv_i32 tcg_op = tcg_temp_new_i32();
12214             TCGv_i32 tcg_res = tcg_temp_new_i32();
12215 
12216             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12217 
12218             if (size == 2) {
12219                 /* Special cases for 32 bit elements */
12220                 switch (opcode) {
12221                 case 0x4: /* CLS */
12222                     if (u) {
12223                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12224                     } else {
12225                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12226                     }
12227                     break;
12228                 case 0x7: /* SQABS, SQNEG */
12229                     if (u) {
12230                         gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
12231                     } else {
12232                         gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
12233                     }
12234                     break;
12235                 case 0x2f: /* FABS */
12236                     gen_helper_vfp_abss(tcg_res, tcg_op);
12237                     break;
12238                 case 0x6f: /* FNEG */
12239                     gen_helper_vfp_negs(tcg_res, tcg_op);
12240                     break;
12241                 case 0x7f: /* FSQRT */
12242                     gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
12243                     break;
12244                 case 0x1a: /* FCVTNS */
12245                 case 0x1b: /* FCVTMS */
12246                 case 0x1c: /* FCVTAS */
12247                 case 0x3a: /* FCVTPS */
12248                 case 0x3b: /* FCVTZS */
12249                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12250                                          tcg_constant_i32(0), tcg_fpstatus);
12251                     break;
12252                 case 0x5a: /* FCVTNU */
12253                 case 0x5b: /* FCVTMU */
12254                 case 0x5c: /* FCVTAU */
12255                 case 0x7a: /* FCVTPU */
12256                 case 0x7b: /* FCVTZU */
12257                     gen_helper_vfp_touls(tcg_res, tcg_op,
12258                                          tcg_constant_i32(0), tcg_fpstatus);
12259                     break;
12260                 case 0x18: /* FRINTN */
12261                 case 0x19: /* FRINTM */
12262                 case 0x38: /* FRINTP */
12263                 case 0x39: /* FRINTZ */
12264                 case 0x58: /* FRINTA */
12265                 case 0x79: /* FRINTI */
12266                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12267                     break;
12268                 case 0x59: /* FRINTX */
12269                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12270                     break;
12271                 case 0x7c: /* URSQRTE */
12272                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12273                     break;
12274                 case 0x1e: /* FRINT32Z */
12275                 case 0x5e: /* FRINT32X */
12276                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12277                     break;
12278                 case 0x1f: /* FRINT64Z */
12279                 case 0x5f: /* FRINT64X */
12280                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12281                     break;
12282                 default:
12283                     g_assert_not_reached();
12284                 }
12285             } else {
12286                 /* Use helpers for 8 and 16 bit elements */
12287                 switch (opcode) {
12288                 case 0x5: /* CNT, RBIT */
12289                     /* For these two insns size is part of the opcode specifier
12290                      * (handled earlier); they always operate on byte elements.
12291                      */
12292                     if (u) {
12293                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12294                     } else {
12295                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12296                     }
12297                     break;
12298                 case 0x7: /* SQABS, SQNEG */
12299                 {
12300                     NeonGenOneOpEnvFn *genfn;
12301                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12302                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12303                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12304                     };
12305                     genfn = fns[size][u];
12306                     genfn(tcg_res, tcg_env, tcg_op);
12307                     break;
12308                 }
12309                 case 0x4: /* CLS, CLZ */
12310                     if (u) {
12311                         if (size == 0) {
12312                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12313                         } else {
12314                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12315                         }
12316                     } else {
12317                         if (size == 0) {
12318                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12319                         } else {
12320                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12321                         }
12322                     }
12323                     break;
12324                 default:
12325                     g_assert_not_reached();
12326                 }
12327             }
12328 
12329             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12330         }
12331     }
12332     clear_vec_high(s, is_q, rd);
12333 
12334     if (tcg_rmode) {
12335         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12336     }
12337 }
12338 
12339 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12340  *
12341  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12342  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12343  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12344  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12345  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12346  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12347  *
12348  * This actually covers two groups where scalar access is governed by
12349  * bit 28. A bunch of the instructions (float to integral) only exist
12350  * in the vector form and are un-allocated for the scalar decode. Also
12351  * in the scalar decode Q is always 1.
12352  */
12353 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12354 {
12355     int fpop, opcode, a, u;
12356     int rn, rd;
12357     bool is_q;
12358     bool is_scalar;
12359     bool only_in_vector = false;
12360 
12361     int pass;
12362     TCGv_i32 tcg_rmode = NULL;
12363     TCGv_ptr tcg_fpstatus = NULL;
12364     bool need_fpst = true;
12365     int rmode = -1;
12366 
12367     if (!dc_isar_feature(aa64_fp16, s)) {
12368         unallocated_encoding(s);
12369         return;
12370     }
12371 
12372     rd = extract32(insn, 0, 5);
12373     rn = extract32(insn, 5, 5);
12374 
12375     a = extract32(insn, 23, 1);
12376     u = extract32(insn, 29, 1);
12377     is_scalar = extract32(insn, 28, 1);
12378     is_q = extract32(insn, 30, 1);
12379 
12380     opcode = extract32(insn, 12, 5);
12381     fpop = deposit32(opcode, 5, 1, a);
12382     fpop = deposit32(fpop, 6, 1, u);
12383 
12384     switch (fpop) {
12385     case 0x1d: /* SCVTF */
12386     case 0x5d: /* UCVTF */
12387     {
12388         int elements;
12389 
12390         if (is_scalar) {
12391             elements = 1;
12392         } else {
12393             elements = (is_q ? 8 : 4);
12394         }
12395 
12396         if (!fp_access_check(s)) {
12397             return;
12398         }
12399         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12400         return;
12401     }
12402     break;
12403     case 0x2c: /* FCMGT (zero) */
12404     case 0x2d: /* FCMEQ (zero) */
12405     case 0x2e: /* FCMLT (zero) */
12406     case 0x6c: /* FCMGE (zero) */
12407     case 0x6d: /* FCMLE (zero) */
12408         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12409         return;
12410     case 0x3d: /* FRECPE */
12411     case 0x3f: /* FRECPX */
12412         break;
12413     case 0x18: /* FRINTN */
12414         only_in_vector = true;
12415         rmode = FPROUNDING_TIEEVEN;
12416         break;
12417     case 0x19: /* FRINTM */
12418         only_in_vector = true;
12419         rmode = FPROUNDING_NEGINF;
12420         break;
12421     case 0x38: /* FRINTP */
12422         only_in_vector = true;
12423         rmode = FPROUNDING_POSINF;
12424         break;
12425     case 0x39: /* FRINTZ */
12426         only_in_vector = true;
12427         rmode = FPROUNDING_ZERO;
12428         break;
12429     case 0x58: /* FRINTA */
12430         only_in_vector = true;
12431         rmode = FPROUNDING_TIEAWAY;
12432         break;
12433     case 0x59: /* FRINTX */
12434     case 0x79: /* FRINTI */
12435         only_in_vector = true;
12436         /* current rounding mode */
12437         break;
12438     case 0x1a: /* FCVTNS */
12439         rmode = FPROUNDING_TIEEVEN;
12440         break;
12441     case 0x1b: /* FCVTMS */
12442         rmode = FPROUNDING_NEGINF;
12443         break;
12444     case 0x1c: /* FCVTAS */
12445         rmode = FPROUNDING_TIEAWAY;
12446         break;
12447     case 0x3a: /* FCVTPS */
12448         rmode = FPROUNDING_POSINF;
12449         break;
12450     case 0x3b: /* FCVTZS */
12451         rmode = FPROUNDING_ZERO;
12452         break;
12453     case 0x5a: /* FCVTNU */
12454         rmode = FPROUNDING_TIEEVEN;
12455         break;
12456     case 0x5b: /* FCVTMU */
12457         rmode = FPROUNDING_NEGINF;
12458         break;
12459     case 0x5c: /* FCVTAU */
12460         rmode = FPROUNDING_TIEAWAY;
12461         break;
12462     case 0x7a: /* FCVTPU */
12463         rmode = FPROUNDING_POSINF;
12464         break;
12465     case 0x7b: /* FCVTZU */
12466         rmode = FPROUNDING_ZERO;
12467         break;
12468     case 0x2f: /* FABS */
12469     case 0x6f: /* FNEG */
12470         need_fpst = false;
12471         break;
12472     case 0x7d: /* FRSQRTE */
12473     case 0x7f: /* FSQRT (vector) */
12474         break;
12475     default:
12476         unallocated_encoding(s);
12477         return;
12478     }
12479 
12480 
12481     /* Check additional constraints for the scalar encoding */
12482     if (is_scalar) {
12483         if (!is_q) {
12484             unallocated_encoding(s);
12485             return;
12486         }
12487         /* FRINTxx is only in the vector form */
12488         if (only_in_vector) {
12489             unallocated_encoding(s);
12490             return;
12491         }
12492     }
12493 
12494     if (!fp_access_check(s)) {
12495         return;
12496     }
12497 
12498     if (rmode >= 0 || need_fpst) {
12499         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12500     }
12501 
12502     if (rmode >= 0) {
12503         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12504     }
12505 
12506     if (is_scalar) {
12507         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12508         TCGv_i32 tcg_res = tcg_temp_new_i32();
12509 
12510         switch (fpop) {
12511         case 0x1a: /* FCVTNS */
12512         case 0x1b: /* FCVTMS */
12513         case 0x1c: /* FCVTAS */
12514         case 0x3a: /* FCVTPS */
12515         case 0x3b: /* FCVTZS */
12516             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12517             break;
12518         case 0x3d: /* FRECPE */
12519             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12520             break;
12521         case 0x3f: /* FRECPX */
12522             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12523             break;
12524         case 0x5a: /* FCVTNU */
12525         case 0x5b: /* FCVTMU */
12526         case 0x5c: /* FCVTAU */
12527         case 0x7a: /* FCVTPU */
12528         case 0x7b: /* FCVTZU */
12529             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12530             break;
12531         case 0x6f: /* FNEG */
12532             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12533             break;
12534         case 0x7d: /* FRSQRTE */
12535             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12536             break;
12537         default:
12538             g_assert_not_reached();
12539         }
12540 
12541         /* limit any sign extension going on */
12542         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12543         write_fp_sreg(s, rd, tcg_res);
12544     } else {
12545         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12546             TCGv_i32 tcg_op = tcg_temp_new_i32();
12547             TCGv_i32 tcg_res = tcg_temp_new_i32();
12548 
12549             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12550 
12551             switch (fpop) {
12552             case 0x1a: /* FCVTNS */
12553             case 0x1b: /* FCVTMS */
12554             case 0x1c: /* FCVTAS */
12555             case 0x3a: /* FCVTPS */
12556             case 0x3b: /* FCVTZS */
12557                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12558                 break;
12559             case 0x3d: /* FRECPE */
12560                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12561                 break;
12562             case 0x5a: /* FCVTNU */
12563             case 0x5b: /* FCVTMU */
12564             case 0x5c: /* FCVTAU */
12565             case 0x7a: /* FCVTPU */
12566             case 0x7b: /* FCVTZU */
12567                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12568                 break;
12569             case 0x18: /* FRINTN */
12570             case 0x19: /* FRINTM */
12571             case 0x38: /* FRINTP */
12572             case 0x39: /* FRINTZ */
12573             case 0x58: /* FRINTA */
12574             case 0x79: /* FRINTI */
12575                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12576                 break;
12577             case 0x59: /* FRINTX */
12578                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12579                 break;
12580             case 0x2f: /* FABS */
12581                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12582                 break;
12583             case 0x6f: /* FNEG */
12584                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12585                 break;
12586             case 0x7d: /* FRSQRTE */
12587                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12588                 break;
12589             case 0x7f: /* FSQRT */
12590                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12591                 break;
12592             default:
12593                 g_assert_not_reached();
12594             }
12595 
12596             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12597         }
12598 
12599         clear_vec_high(s, is_q, rd);
12600     }
12601 
12602     if (tcg_rmode) {
12603         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12604     }
12605 }
12606 
12607 /* AdvSIMD scalar x indexed element
12608  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12609  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12610  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12611  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12612  * AdvSIMD vector x indexed element
12613  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12614  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12615  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12616  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12617  */
12618 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12619 {
12620     /* This encoding has two kinds of instruction:
12621      *  normal, where we perform elt x idxelt => elt for each
12622      *     element in the vector
12623      *  long, where we perform elt x idxelt and generate a result of
12624      *     double the width of the input element
12625      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12626      */
12627     bool is_scalar = extract32(insn, 28, 1);
12628     bool is_q = extract32(insn, 30, 1);
12629     bool u = extract32(insn, 29, 1);
12630     int size = extract32(insn, 22, 2);
12631     int l = extract32(insn, 21, 1);
12632     int m = extract32(insn, 20, 1);
12633     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12634     int rm = extract32(insn, 16, 4);
12635     int opcode = extract32(insn, 12, 4);
12636     int h = extract32(insn, 11, 1);
12637     int rn = extract32(insn, 5, 5);
12638     int rd = extract32(insn, 0, 5);
12639     bool is_long = false;
12640     int is_fp = 0;
12641     bool is_fp16 = false;
12642     int index;
12643     TCGv_ptr fpst;
12644 
12645     switch (16 * u + opcode) {
12646     case 0x08: /* MUL */
12647     case 0x10: /* MLA */
12648     case 0x14: /* MLS */
12649         if (is_scalar) {
12650             unallocated_encoding(s);
12651             return;
12652         }
12653         break;
12654     case 0x02: /* SMLAL, SMLAL2 */
12655     case 0x12: /* UMLAL, UMLAL2 */
12656     case 0x06: /* SMLSL, SMLSL2 */
12657     case 0x16: /* UMLSL, UMLSL2 */
12658     case 0x0a: /* SMULL, SMULL2 */
12659     case 0x1a: /* UMULL, UMULL2 */
12660         if (is_scalar) {
12661             unallocated_encoding(s);
12662             return;
12663         }
12664         is_long = true;
12665         break;
12666     case 0x03: /* SQDMLAL, SQDMLAL2 */
12667     case 0x07: /* SQDMLSL, SQDMLSL2 */
12668     case 0x0b: /* SQDMULL, SQDMULL2 */
12669         is_long = true;
12670         break;
12671     case 0x0c: /* SQDMULH */
12672     case 0x0d: /* SQRDMULH */
12673         break;
12674     case 0x01: /* FMLA */
12675     case 0x05: /* FMLS */
12676     case 0x09: /* FMUL */
12677     case 0x19: /* FMULX */
12678         is_fp = 1;
12679         break;
12680     case 0x1d: /* SQRDMLAH */
12681     case 0x1f: /* SQRDMLSH */
12682         if (!dc_isar_feature(aa64_rdm, s)) {
12683             unallocated_encoding(s);
12684             return;
12685         }
12686         break;
12687     case 0x0e: /* SDOT */
12688     case 0x1e: /* UDOT */
12689         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12690             unallocated_encoding(s);
12691             return;
12692         }
12693         break;
12694     case 0x0f:
12695         switch (size) {
12696         case 0: /* SUDOT */
12697         case 2: /* USDOT */
12698             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12699                 unallocated_encoding(s);
12700                 return;
12701             }
12702             size = MO_32;
12703             break;
12704         case 1: /* BFDOT */
12705             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12706                 unallocated_encoding(s);
12707                 return;
12708             }
12709             size = MO_32;
12710             break;
12711         case 3: /* BFMLAL{B,T} */
12712             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12713                 unallocated_encoding(s);
12714                 return;
12715             }
12716             /* can't set is_fp without other incorrect size checks */
12717             size = MO_16;
12718             break;
12719         default:
12720             unallocated_encoding(s);
12721             return;
12722         }
12723         break;
12724     case 0x11: /* FCMLA #0 */
12725     case 0x13: /* FCMLA #90 */
12726     case 0x15: /* FCMLA #180 */
12727     case 0x17: /* FCMLA #270 */
12728         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12729             unallocated_encoding(s);
12730             return;
12731         }
12732         is_fp = 2;
12733         break;
12734     case 0x00: /* FMLAL */
12735     case 0x04: /* FMLSL */
12736     case 0x18: /* FMLAL2 */
12737     case 0x1c: /* FMLSL2 */
12738         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12739             unallocated_encoding(s);
12740             return;
12741         }
12742         size = MO_16;
12743         /* is_fp, but we pass tcg_env not fp_status.  */
12744         break;
12745     default:
12746         unallocated_encoding(s);
12747         return;
12748     }
12749 
12750     switch (is_fp) {
12751     case 1: /* normal fp */
12752         /* convert insn encoded size to MemOp size */
12753         switch (size) {
12754         case 0: /* half-precision */
12755             size = MO_16;
12756             is_fp16 = true;
12757             break;
12758         case MO_32: /* single precision */
12759         case MO_64: /* double precision */
12760             break;
12761         default:
12762             unallocated_encoding(s);
12763             return;
12764         }
12765         break;
12766 
12767     case 2: /* complex fp */
12768         /* Each indexable element is a complex pair.  */
12769         size += 1;
12770         switch (size) {
12771         case MO_32:
12772             if (h && !is_q) {
12773                 unallocated_encoding(s);
12774                 return;
12775             }
12776             is_fp16 = true;
12777             break;
12778         case MO_64:
12779             break;
12780         default:
12781             unallocated_encoding(s);
12782             return;
12783         }
12784         break;
12785 
12786     default: /* integer */
12787         switch (size) {
12788         case MO_8:
12789         case MO_64:
12790             unallocated_encoding(s);
12791             return;
12792         }
12793         break;
12794     }
12795     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12796         unallocated_encoding(s);
12797         return;
12798     }
12799 
12800     /* Given MemOp size, adjust register and indexing.  */
12801     switch (size) {
12802     case MO_16:
12803         index = h << 2 | l << 1 | m;
12804         break;
12805     case MO_32:
12806         index = h << 1 | l;
12807         rm |= m << 4;
12808         break;
12809     case MO_64:
12810         if (l || !is_q) {
12811             unallocated_encoding(s);
12812             return;
12813         }
12814         index = h;
12815         rm |= m << 4;
12816         break;
12817     default:
12818         g_assert_not_reached();
12819     }
12820 
12821     if (!fp_access_check(s)) {
12822         return;
12823     }
12824 
12825     if (is_fp) {
12826         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12827     } else {
12828         fpst = NULL;
12829     }
12830 
12831     switch (16 * u + opcode) {
12832     case 0x0e: /* SDOT */
12833     case 0x1e: /* UDOT */
12834         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12835                          u ? gen_helper_gvec_udot_idx_b
12836                          : gen_helper_gvec_sdot_idx_b);
12837         return;
12838     case 0x0f:
12839         switch (extract32(insn, 22, 2)) {
12840         case 0: /* SUDOT */
12841             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12842                              gen_helper_gvec_sudot_idx_b);
12843             return;
12844         case 1: /* BFDOT */
12845             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12846                              gen_helper_gvec_bfdot_idx);
12847             return;
12848         case 2: /* USDOT */
12849             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12850                              gen_helper_gvec_usdot_idx_b);
12851             return;
12852         case 3: /* BFMLAL{B,T} */
12853             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12854                               gen_helper_gvec_bfmlal_idx);
12855             return;
12856         }
12857         g_assert_not_reached();
12858     case 0x11: /* FCMLA #0 */
12859     case 0x13: /* FCMLA #90 */
12860     case 0x15: /* FCMLA #180 */
12861     case 0x17: /* FCMLA #270 */
12862         {
12863             int rot = extract32(insn, 13, 2);
12864             int data = (index << 2) | rot;
12865             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12866                                vec_full_reg_offset(s, rn),
12867                                vec_full_reg_offset(s, rm),
12868                                vec_full_reg_offset(s, rd), fpst,
12869                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12870                                size == MO_64
12871                                ? gen_helper_gvec_fcmlas_idx
12872                                : gen_helper_gvec_fcmlah_idx);
12873         }
12874         return;
12875 
12876     case 0x00: /* FMLAL */
12877     case 0x04: /* FMLSL */
12878     case 0x18: /* FMLAL2 */
12879     case 0x1c: /* FMLSL2 */
12880         {
12881             int is_s = extract32(opcode, 2, 1);
12882             int is_2 = u;
12883             int data = (index << 2) | (is_2 << 1) | is_s;
12884             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12885                                vec_full_reg_offset(s, rn),
12886                                vec_full_reg_offset(s, rm), tcg_env,
12887                                is_q ? 16 : 8, vec_full_reg_size(s),
12888                                data, gen_helper_gvec_fmlal_idx_a64);
12889         }
12890         return;
12891 
12892     case 0x08: /* MUL */
12893         if (!is_long && !is_scalar) {
12894             static gen_helper_gvec_3 * const fns[3] = {
12895                 gen_helper_gvec_mul_idx_h,
12896                 gen_helper_gvec_mul_idx_s,
12897                 gen_helper_gvec_mul_idx_d,
12898             };
12899             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
12900                                vec_full_reg_offset(s, rn),
12901                                vec_full_reg_offset(s, rm),
12902                                is_q ? 16 : 8, vec_full_reg_size(s),
12903                                index, fns[size - 1]);
12904             return;
12905         }
12906         break;
12907 
12908     case 0x10: /* MLA */
12909         if (!is_long && !is_scalar) {
12910             static gen_helper_gvec_4 * const fns[3] = {
12911                 gen_helper_gvec_mla_idx_h,
12912                 gen_helper_gvec_mla_idx_s,
12913                 gen_helper_gvec_mla_idx_d,
12914             };
12915             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
12916                                vec_full_reg_offset(s, rn),
12917                                vec_full_reg_offset(s, rm),
12918                                vec_full_reg_offset(s, rd),
12919                                is_q ? 16 : 8, vec_full_reg_size(s),
12920                                index, fns[size - 1]);
12921             return;
12922         }
12923         break;
12924 
12925     case 0x14: /* MLS */
12926         if (!is_long && !is_scalar) {
12927             static gen_helper_gvec_4 * const fns[3] = {
12928                 gen_helper_gvec_mls_idx_h,
12929                 gen_helper_gvec_mls_idx_s,
12930                 gen_helper_gvec_mls_idx_d,
12931             };
12932             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
12933                                vec_full_reg_offset(s, rn),
12934                                vec_full_reg_offset(s, rm),
12935                                vec_full_reg_offset(s, rd),
12936                                is_q ? 16 : 8, vec_full_reg_size(s),
12937                                index, fns[size - 1]);
12938             return;
12939         }
12940         break;
12941     }
12942 
12943     if (size == 3) {
12944         TCGv_i64 tcg_idx = tcg_temp_new_i64();
12945         int pass;
12946 
12947         assert(is_fp && is_q && !is_long);
12948 
12949         read_vec_element(s, tcg_idx, rm, index, MO_64);
12950 
12951         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12952             TCGv_i64 tcg_op = tcg_temp_new_i64();
12953             TCGv_i64 tcg_res = tcg_temp_new_i64();
12954 
12955             read_vec_element(s, tcg_op, rn, pass, MO_64);
12956 
12957             switch (16 * u + opcode) {
12958             case 0x05: /* FMLS */
12959                 /* As usual for ARM, separate negation for fused multiply-add */
12960                 gen_helper_vfp_negd(tcg_op, tcg_op);
12961                 /* fall through */
12962             case 0x01: /* FMLA */
12963                 read_vec_element(s, tcg_res, rd, pass, MO_64);
12964                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12965                 break;
12966             case 0x09: /* FMUL */
12967                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12968                 break;
12969             case 0x19: /* FMULX */
12970                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12971                 break;
12972             default:
12973                 g_assert_not_reached();
12974             }
12975 
12976             write_vec_element(s, tcg_res, rd, pass, MO_64);
12977         }
12978 
12979         clear_vec_high(s, !is_scalar, rd);
12980     } else if (!is_long) {
12981         /* 32 bit floating point, or 16 or 32 bit integer.
12982          * For the 16 bit scalar case we use the usual Neon helpers and
12983          * rely on the fact that 0 op 0 == 0 with no side effects.
12984          */
12985         TCGv_i32 tcg_idx = tcg_temp_new_i32();
12986         int pass, maxpasses;
12987 
12988         if (is_scalar) {
12989             maxpasses = 1;
12990         } else {
12991             maxpasses = is_q ? 4 : 2;
12992         }
12993 
12994         read_vec_element_i32(s, tcg_idx, rm, index, size);
12995 
12996         if (size == 1 && !is_scalar) {
12997             /* The simplest way to handle the 16x16 indexed ops is to duplicate
12998              * the index into both halves of the 32 bit tcg_idx and then use
12999              * the usual Neon helpers.
13000              */
13001             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13002         }
13003 
13004         for (pass = 0; pass < maxpasses; pass++) {
13005             TCGv_i32 tcg_op = tcg_temp_new_i32();
13006             TCGv_i32 tcg_res = tcg_temp_new_i32();
13007 
13008             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13009 
13010             switch (16 * u + opcode) {
13011             case 0x08: /* MUL */
13012             case 0x10: /* MLA */
13013             case 0x14: /* MLS */
13014             {
13015                 static NeonGenTwoOpFn * const fns[2][2] = {
13016                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13017                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13018                 };
13019                 NeonGenTwoOpFn *genfn;
13020                 bool is_sub = opcode == 0x4;
13021 
13022                 if (size == 1) {
13023                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13024                 } else {
13025                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13026                 }
13027                 if (opcode == 0x8) {
13028                     break;
13029                 }
13030                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13031                 genfn = fns[size - 1][is_sub];
13032                 genfn(tcg_res, tcg_op, tcg_res);
13033                 break;
13034             }
13035             case 0x05: /* FMLS */
13036             case 0x01: /* FMLA */
13037                 read_vec_element_i32(s, tcg_res, rd, pass,
13038                                      is_scalar ? size : MO_32);
13039                 switch (size) {
13040                 case 1:
13041                     if (opcode == 0x5) {
13042                         /* As usual for ARM, separate negation for fused
13043                          * multiply-add */
13044                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13045                     }
13046                     if (is_scalar) {
13047                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13048                                                    tcg_res, fpst);
13049                     } else {
13050                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13051                                                     tcg_res, fpst);
13052                     }
13053                     break;
13054                 case 2:
13055                     if (opcode == 0x5) {
13056                         /* As usual for ARM, separate negation for
13057                          * fused multiply-add */
13058                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13059                     }
13060                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13061                                            tcg_res, fpst);
13062                     break;
13063                 default:
13064                     g_assert_not_reached();
13065                 }
13066                 break;
13067             case 0x09: /* FMUL */
13068                 switch (size) {
13069                 case 1:
13070                     if (is_scalar) {
13071                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13072                                                 tcg_idx, fpst);
13073                     } else {
13074                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13075                                                  tcg_idx, fpst);
13076                     }
13077                     break;
13078                 case 2:
13079                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13080                     break;
13081                 default:
13082                     g_assert_not_reached();
13083                 }
13084                 break;
13085             case 0x19: /* FMULX */
13086                 switch (size) {
13087                 case 1:
13088                     if (is_scalar) {
13089                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13090                                                  tcg_idx, fpst);
13091                     } else {
13092                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13093                                                   tcg_idx, fpst);
13094                     }
13095                     break;
13096                 case 2:
13097                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13098                     break;
13099                 default:
13100                     g_assert_not_reached();
13101                 }
13102                 break;
13103             case 0x0c: /* SQDMULH */
13104                 if (size == 1) {
13105                     gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
13106                                                tcg_op, tcg_idx);
13107                 } else {
13108                     gen_helper_neon_qdmulh_s32(tcg_res, tcg_env,
13109                                                tcg_op, tcg_idx);
13110                 }
13111                 break;
13112             case 0x0d: /* SQRDMULH */
13113                 if (size == 1) {
13114                     gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env,
13115                                                 tcg_op, tcg_idx);
13116                 } else {
13117                     gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env,
13118                                                 tcg_op, tcg_idx);
13119                 }
13120                 break;
13121             case 0x1d: /* SQRDMLAH */
13122                 read_vec_element_i32(s, tcg_res, rd, pass,
13123                                      is_scalar ? size : MO_32);
13124                 if (size == 1) {
13125                     gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env,
13126                                                 tcg_op, tcg_idx, tcg_res);
13127                 } else {
13128                     gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env,
13129                                                 tcg_op, tcg_idx, tcg_res);
13130                 }
13131                 break;
13132             case 0x1f: /* SQRDMLSH */
13133                 read_vec_element_i32(s, tcg_res, rd, pass,
13134                                      is_scalar ? size : MO_32);
13135                 if (size == 1) {
13136                     gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env,
13137                                                 tcg_op, tcg_idx, tcg_res);
13138                 } else {
13139                     gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env,
13140                                                 tcg_op, tcg_idx, tcg_res);
13141                 }
13142                 break;
13143             default:
13144                 g_assert_not_reached();
13145             }
13146 
13147             if (is_scalar) {
13148                 write_fp_sreg(s, rd, tcg_res);
13149             } else {
13150                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13151             }
13152         }
13153 
13154         clear_vec_high(s, is_q, rd);
13155     } else {
13156         /* long ops: 16x16->32 or 32x32->64 */
13157         TCGv_i64 tcg_res[2];
13158         int pass;
13159         bool satop = extract32(opcode, 0, 1);
13160         MemOp memop = MO_32;
13161 
13162         if (satop || !u) {
13163             memop |= MO_SIGN;
13164         }
13165 
13166         if (size == 2) {
13167             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13168 
13169             read_vec_element(s, tcg_idx, rm, index, memop);
13170 
13171             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13172                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13173                 TCGv_i64 tcg_passres;
13174                 int passelt;
13175 
13176                 if (is_scalar) {
13177                     passelt = 0;
13178                 } else {
13179                     passelt = pass + (is_q * 2);
13180                 }
13181 
13182                 read_vec_element(s, tcg_op, rn, passelt, memop);
13183 
13184                 tcg_res[pass] = tcg_temp_new_i64();
13185 
13186                 if (opcode == 0xa || opcode == 0xb) {
13187                     /* Non-accumulating ops */
13188                     tcg_passres = tcg_res[pass];
13189                 } else {
13190                     tcg_passres = tcg_temp_new_i64();
13191                 }
13192 
13193                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13194 
13195                 if (satop) {
13196                     /* saturating, doubling */
13197                     gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
13198                                                       tcg_passres, tcg_passres);
13199                 }
13200 
13201                 if (opcode == 0xa || opcode == 0xb) {
13202                     continue;
13203                 }
13204 
13205                 /* Accumulating op: handle accumulate step */
13206                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13207 
13208                 switch (opcode) {
13209                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13210                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13211                     break;
13212                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13213                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13214                     break;
13215                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13216                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13217                     /* fall through */
13218                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13219                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
13220                                                       tcg_res[pass],
13221                                                       tcg_passres);
13222                     break;
13223                 default:
13224                     g_assert_not_reached();
13225                 }
13226             }
13227 
13228             clear_vec_high(s, !is_scalar, rd);
13229         } else {
13230             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13231 
13232             assert(size == 1);
13233             read_vec_element_i32(s, tcg_idx, rm, index, size);
13234 
13235             if (!is_scalar) {
13236                 /* The simplest way to handle the 16x16 indexed ops is to
13237                  * duplicate the index into both halves of the 32 bit tcg_idx
13238                  * and then use the usual Neon helpers.
13239                  */
13240                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13241             }
13242 
13243             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13244                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13245                 TCGv_i64 tcg_passres;
13246 
13247                 if (is_scalar) {
13248                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13249                 } else {
13250                     read_vec_element_i32(s, tcg_op, rn,
13251                                          pass + (is_q * 2), MO_32);
13252                 }
13253 
13254                 tcg_res[pass] = tcg_temp_new_i64();
13255 
13256                 if (opcode == 0xa || opcode == 0xb) {
13257                     /* Non-accumulating ops */
13258                     tcg_passres = tcg_res[pass];
13259                 } else {
13260                     tcg_passres = tcg_temp_new_i64();
13261                 }
13262 
13263                 if (memop & MO_SIGN) {
13264                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13265                 } else {
13266                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13267                 }
13268                 if (satop) {
13269                     gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
13270                                                       tcg_passres, tcg_passres);
13271                 }
13272 
13273                 if (opcode == 0xa || opcode == 0xb) {
13274                     continue;
13275                 }
13276 
13277                 /* Accumulating op: handle accumulate step */
13278                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13279 
13280                 switch (opcode) {
13281                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13282                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13283                                              tcg_passres);
13284                     break;
13285                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13286                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13287                                              tcg_passres);
13288                     break;
13289                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13290                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13291                     /* fall through */
13292                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13293                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
13294                                                       tcg_res[pass],
13295                                                       tcg_passres);
13296                     break;
13297                 default:
13298                     g_assert_not_reached();
13299                 }
13300             }
13301 
13302             if (is_scalar) {
13303                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13304             }
13305         }
13306 
13307         if (is_scalar) {
13308             tcg_res[1] = tcg_constant_i64(0);
13309         }
13310 
13311         for (pass = 0; pass < 2; pass++) {
13312             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13313         }
13314     }
13315 }
13316 
13317 /* Crypto AES
13318  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13319  * +-----------------+------+-----------+--------+-----+------+------+
13320  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13321  * +-----------------+------+-----------+--------+-----+------+------+
13322  */
13323 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13324 {
13325     int size = extract32(insn, 22, 2);
13326     int opcode = extract32(insn, 12, 5);
13327     int rn = extract32(insn, 5, 5);
13328     int rd = extract32(insn, 0, 5);
13329     gen_helper_gvec_2 *genfn2 = NULL;
13330     gen_helper_gvec_3 *genfn3 = NULL;
13331 
13332     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13333         unallocated_encoding(s);
13334         return;
13335     }
13336 
13337     switch (opcode) {
13338     case 0x4: /* AESE */
13339         genfn3 = gen_helper_crypto_aese;
13340         break;
13341     case 0x6: /* AESMC */
13342         genfn2 = gen_helper_crypto_aesmc;
13343         break;
13344     case 0x5: /* AESD */
13345         genfn3 = gen_helper_crypto_aesd;
13346         break;
13347     case 0x7: /* AESIMC */
13348         genfn2 = gen_helper_crypto_aesimc;
13349         break;
13350     default:
13351         unallocated_encoding(s);
13352         return;
13353     }
13354 
13355     if (!fp_access_check(s)) {
13356         return;
13357     }
13358     if (genfn2) {
13359         gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
13360     } else {
13361         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
13362     }
13363 }
13364 
13365 /* Crypto three-reg SHA
13366  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13367  * +-----------------+------+---+------+---+--------+-----+------+------+
13368  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13369  * +-----------------+------+---+------+---+--------+-----+------+------+
13370  */
13371 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13372 {
13373     int size = extract32(insn, 22, 2);
13374     int opcode = extract32(insn, 12, 3);
13375     int rm = extract32(insn, 16, 5);
13376     int rn = extract32(insn, 5, 5);
13377     int rd = extract32(insn, 0, 5);
13378     gen_helper_gvec_3 *genfn;
13379     bool feature;
13380 
13381     if (size != 0) {
13382         unallocated_encoding(s);
13383         return;
13384     }
13385 
13386     switch (opcode) {
13387     case 0: /* SHA1C */
13388         genfn = gen_helper_crypto_sha1c;
13389         feature = dc_isar_feature(aa64_sha1, s);
13390         break;
13391     case 1: /* SHA1P */
13392         genfn = gen_helper_crypto_sha1p;
13393         feature = dc_isar_feature(aa64_sha1, s);
13394         break;
13395     case 2: /* SHA1M */
13396         genfn = gen_helper_crypto_sha1m;
13397         feature = dc_isar_feature(aa64_sha1, s);
13398         break;
13399     case 3: /* SHA1SU0 */
13400         genfn = gen_helper_crypto_sha1su0;
13401         feature = dc_isar_feature(aa64_sha1, s);
13402         break;
13403     case 4: /* SHA256H */
13404         genfn = gen_helper_crypto_sha256h;
13405         feature = dc_isar_feature(aa64_sha256, s);
13406         break;
13407     case 5: /* SHA256H2 */
13408         genfn = gen_helper_crypto_sha256h2;
13409         feature = dc_isar_feature(aa64_sha256, s);
13410         break;
13411     case 6: /* SHA256SU1 */
13412         genfn = gen_helper_crypto_sha256su1;
13413         feature = dc_isar_feature(aa64_sha256, s);
13414         break;
13415     default:
13416         unallocated_encoding(s);
13417         return;
13418     }
13419 
13420     if (!feature) {
13421         unallocated_encoding(s);
13422         return;
13423     }
13424 
13425     if (!fp_access_check(s)) {
13426         return;
13427     }
13428     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13429 }
13430 
13431 /* Crypto two-reg SHA
13432  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13433  * +-----------------+------+-----------+--------+-----+------+------+
13434  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13435  * +-----------------+------+-----------+--------+-----+------+------+
13436  */
13437 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13438 {
13439     int size = extract32(insn, 22, 2);
13440     int opcode = extract32(insn, 12, 5);
13441     int rn = extract32(insn, 5, 5);
13442     int rd = extract32(insn, 0, 5);
13443     gen_helper_gvec_2 *genfn;
13444     bool feature;
13445 
13446     if (size != 0) {
13447         unallocated_encoding(s);
13448         return;
13449     }
13450 
13451     switch (opcode) {
13452     case 0: /* SHA1H */
13453         feature = dc_isar_feature(aa64_sha1, s);
13454         genfn = gen_helper_crypto_sha1h;
13455         break;
13456     case 1: /* SHA1SU1 */
13457         feature = dc_isar_feature(aa64_sha1, s);
13458         genfn = gen_helper_crypto_sha1su1;
13459         break;
13460     case 2: /* SHA256SU0 */
13461         feature = dc_isar_feature(aa64_sha256, s);
13462         genfn = gen_helper_crypto_sha256su0;
13463         break;
13464     default:
13465         unallocated_encoding(s);
13466         return;
13467     }
13468 
13469     if (!feature) {
13470         unallocated_encoding(s);
13471         return;
13472     }
13473 
13474     if (!fp_access_check(s)) {
13475         return;
13476     }
13477     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13478 }
13479 
13480 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13481 {
13482     tcg_gen_rotli_i64(d, m, 1);
13483     tcg_gen_xor_i64(d, d, n);
13484 }
13485 
13486 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13487 {
13488     tcg_gen_rotli_vec(vece, d, m, 1);
13489     tcg_gen_xor_vec(vece, d, d, n);
13490 }
13491 
13492 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13493                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13494 {
13495     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13496     static const GVecGen3 op = {
13497         .fni8 = gen_rax1_i64,
13498         .fniv = gen_rax1_vec,
13499         .opt_opc = vecop_list,
13500         .fno = gen_helper_crypto_rax1,
13501         .vece = MO_64,
13502     };
13503     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13504 }
13505 
13506 /* Crypto three-reg SHA512
13507  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13508  * +-----------------------+------+---+---+-----+--------+------+------+
13509  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13510  * +-----------------------+------+---+---+-----+--------+------+------+
13511  */
13512 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13513 {
13514     int opcode = extract32(insn, 10, 2);
13515     int o =  extract32(insn, 14, 1);
13516     int rm = extract32(insn, 16, 5);
13517     int rn = extract32(insn, 5, 5);
13518     int rd = extract32(insn, 0, 5);
13519     bool feature;
13520     gen_helper_gvec_3 *oolfn = NULL;
13521     GVecGen3Fn *gvecfn = NULL;
13522 
13523     if (o == 0) {
13524         switch (opcode) {
13525         case 0: /* SHA512H */
13526             feature = dc_isar_feature(aa64_sha512, s);
13527             oolfn = gen_helper_crypto_sha512h;
13528             break;
13529         case 1: /* SHA512H2 */
13530             feature = dc_isar_feature(aa64_sha512, s);
13531             oolfn = gen_helper_crypto_sha512h2;
13532             break;
13533         case 2: /* SHA512SU1 */
13534             feature = dc_isar_feature(aa64_sha512, s);
13535             oolfn = gen_helper_crypto_sha512su1;
13536             break;
13537         case 3: /* RAX1 */
13538             feature = dc_isar_feature(aa64_sha3, s);
13539             gvecfn = gen_gvec_rax1;
13540             break;
13541         default:
13542             g_assert_not_reached();
13543         }
13544     } else {
13545         switch (opcode) {
13546         case 0: /* SM3PARTW1 */
13547             feature = dc_isar_feature(aa64_sm3, s);
13548             oolfn = gen_helper_crypto_sm3partw1;
13549             break;
13550         case 1: /* SM3PARTW2 */
13551             feature = dc_isar_feature(aa64_sm3, s);
13552             oolfn = gen_helper_crypto_sm3partw2;
13553             break;
13554         case 2: /* SM4EKEY */
13555             feature = dc_isar_feature(aa64_sm4, s);
13556             oolfn = gen_helper_crypto_sm4ekey;
13557             break;
13558         default:
13559             unallocated_encoding(s);
13560             return;
13561         }
13562     }
13563 
13564     if (!feature) {
13565         unallocated_encoding(s);
13566         return;
13567     }
13568 
13569     if (!fp_access_check(s)) {
13570         return;
13571     }
13572 
13573     if (oolfn) {
13574         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13575     } else {
13576         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13577     }
13578 }
13579 
13580 /* Crypto two-reg SHA512
13581  *  31                                     12  11  10  9    5 4    0
13582  * +-----------------------------------------+--------+------+------+
13583  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13584  * +-----------------------------------------+--------+------+------+
13585  */
13586 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13587 {
13588     int opcode = extract32(insn, 10, 2);
13589     int rn = extract32(insn, 5, 5);
13590     int rd = extract32(insn, 0, 5);
13591     bool feature;
13592 
13593     switch (opcode) {
13594     case 0: /* SHA512SU0 */
13595         feature = dc_isar_feature(aa64_sha512, s);
13596         break;
13597     case 1: /* SM4E */
13598         feature = dc_isar_feature(aa64_sm4, s);
13599         break;
13600     default:
13601         unallocated_encoding(s);
13602         return;
13603     }
13604 
13605     if (!feature) {
13606         unallocated_encoding(s);
13607         return;
13608     }
13609 
13610     if (!fp_access_check(s)) {
13611         return;
13612     }
13613 
13614     switch (opcode) {
13615     case 0: /* SHA512SU0 */
13616         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13617         break;
13618     case 1: /* SM4E */
13619         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13620         break;
13621     default:
13622         g_assert_not_reached();
13623     }
13624 }
13625 
13626 /* Crypto four-register
13627  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13628  * +-------------------+-----+------+---+------+------+------+
13629  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13630  * +-------------------+-----+------+---+------+------+------+
13631  */
13632 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13633 {
13634     int op0 = extract32(insn, 21, 2);
13635     int rm = extract32(insn, 16, 5);
13636     int ra = extract32(insn, 10, 5);
13637     int rn = extract32(insn, 5, 5);
13638     int rd = extract32(insn, 0, 5);
13639     bool feature;
13640 
13641     switch (op0) {
13642     case 0: /* EOR3 */
13643     case 1: /* BCAX */
13644         feature = dc_isar_feature(aa64_sha3, s);
13645         break;
13646     case 2: /* SM3SS1 */
13647         feature = dc_isar_feature(aa64_sm3, s);
13648         break;
13649     default:
13650         unallocated_encoding(s);
13651         return;
13652     }
13653 
13654     if (!feature) {
13655         unallocated_encoding(s);
13656         return;
13657     }
13658 
13659     if (!fp_access_check(s)) {
13660         return;
13661     }
13662 
13663     if (op0 < 2) {
13664         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13665         int pass;
13666 
13667         tcg_op1 = tcg_temp_new_i64();
13668         tcg_op2 = tcg_temp_new_i64();
13669         tcg_op3 = tcg_temp_new_i64();
13670         tcg_res[0] = tcg_temp_new_i64();
13671         tcg_res[1] = tcg_temp_new_i64();
13672 
13673         for (pass = 0; pass < 2; pass++) {
13674             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13675             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13676             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13677 
13678             if (op0 == 0) {
13679                 /* EOR3 */
13680                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13681             } else {
13682                 /* BCAX */
13683                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13684             }
13685             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13686         }
13687         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13688         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13689     } else {
13690         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13691 
13692         tcg_op1 = tcg_temp_new_i32();
13693         tcg_op2 = tcg_temp_new_i32();
13694         tcg_op3 = tcg_temp_new_i32();
13695         tcg_res = tcg_temp_new_i32();
13696         tcg_zero = tcg_constant_i32(0);
13697 
13698         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13699         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13700         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13701 
13702         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13703         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13704         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13705         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13706 
13707         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13708         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13709         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13710         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13711     }
13712 }
13713 
13714 /* Crypto XAR
13715  *  31                   21 20  16 15    10 9    5 4    0
13716  * +-----------------------+------+--------+------+------+
13717  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13718  * +-----------------------+------+--------+------+------+
13719  */
13720 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13721 {
13722     int rm = extract32(insn, 16, 5);
13723     int imm6 = extract32(insn, 10, 6);
13724     int rn = extract32(insn, 5, 5);
13725     int rd = extract32(insn, 0, 5);
13726 
13727     if (!dc_isar_feature(aa64_sha3, s)) {
13728         unallocated_encoding(s);
13729         return;
13730     }
13731 
13732     if (!fp_access_check(s)) {
13733         return;
13734     }
13735 
13736     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13737                  vec_full_reg_offset(s, rn),
13738                  vec_full_reg_offset(s, rm), imm6, 16,
13739                  vec_full_reg_size(s));
13740 }
13741 
13742 /* Crypto three-reg imm2
13743  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13744  * +-----------------------+------+-----+------+--------+------+------+
13745  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13746  * +-----------------------+------+-----+------+--------+------+------+
13747  */
13748 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13749 {
13750     static gen_helper_gvec_3 * const fns[4] = {
13751         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13752         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13753     };
13754     int opcode = extract32(insn, 10, 2);
13755     int imm2 = extract32(insn, 12, 2);
13756     int rm = extract32(insn, 16, 5);
13757     int rn = extract32(insn, 5, 5);
13758     int rd = extract32(insn, 0, 5);
13759 
13760     if (!dc_isar_feature(aa64_sm3, s)) {
13761         unallocated_encoding(s);
13762         return;
13763     }
13764 
13765     if (!fp_access_check(s)) {
13766         return;
13767     }
13768 
13769     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13770 }
13771 
13772 /* C3.6 Data processing - SIMD, inc Crypto
13773  *
13774  * As the decode gets a little complex we are using a table based
13775  * approach for this part of the decode.
13776  */
13777 static const AArch64DecodeTable data_proc_simd[] = {
13778     /* pattern  ,  mask     ,  fn                        */
13779     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13780     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13781     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13782     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13783     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13784     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13785     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13786     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13787     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13788     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13789     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13790     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13791     { 0x2e000000, 0xbf208400, disas_simd_ext },
13792     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13793     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13794     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13795     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13796     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13797     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13798     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13799     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13800     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13801     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13802     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13803     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13804     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13805     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13806     { 0xce800000, 0xffe00000, disas_crypto_xar },
13807     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13808     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13809     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13810     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13811     { 0x00000000, 0x00000000, NULL }
13812 };
13813 
13814 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13815 {
13816     /* Note that this is called with all non-FP cases from
13817      * table C3-6 so it must UNDEF for entries not specifically
13818      * allocated to instructions in that table.
13819      */
13820     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13821     if (fn) {
13822         fn(s, insn);
13823     } else {
13824         unallocated_encoding(s);
13825     }
13826 }
13827 
13828 /* C3.6 Data processing - SIMD and floating point */
13829 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13830 {
13831     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13832         disas_data_proc_fp(s, insn);
13833     } else {
13834         /* SIMD, including crypto */
13835         disas_data_proc_simd(s, insn);
13836     }
13837 }
13838 
13839 static bool trans_OK(DisasContext *s, arg_OK *a)
13840 {
13841     return true;
13842 }
13843 
13844 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13845 {
13846     s->is_nonstreaming = true;
13847     return true;
13848 }
13849 
13850 /**
13851  * is_guarded_page:
13852  * @env: The cpu environment
13853  * @s: The DisasContext
13854  *
13855  * Return true if the page is guarded.
13856  */
13857 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13858 {
13859     uint64_t addr = s->base.pc_first;
13860 #ifdef CONFIG_USER_ONLY
13861     return page_get_flags(addr) & PAGE_BTI;
13862 #else
13863     CPUTLBEntryFull *full;
13864     void *host;
13865     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13866     int flags;
13867 
13868     /*
13869      * We test this immediately after reading an insn, which means
13870      * that the TLB entry must be present and valid, and thus this
13871      * access will never raise an exception.
13872      */
13873     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
13874                               false, &host, &full, 0);
13875     assert(!(flags & TLB_INVALID_MASK));
13876 
13877     return full->extra.arm.guarded;
13878 #endif
13879 }
13880 
13881 /**
13882  * btype_destination_ok:
13883  * @insn: The instruction at the branch destination
13884  * @bt: SCTLR_ELx.BT
13885  * @btype: PSTATE.BTYPE, and is non-zero
13886  *
13887  * On a guarded page, there are a limited number of insns
13888  * that may be present at the branch target:
13889  *   - branch target identifiers,
13890  *   - paciasp, pacibsp,
13891  *   - BRK insn
13892  *   - HLT insn
13893  * Anything else causes a Branch Target Exception.
13894  *
13895  * Return true if the branch is compatible, false to raise BTITRAP.
13896  */
13897 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13898 {
13899     if ((insn & 0xfffff01fu) == 0xd503201fu) {
13900         /* HINT space */
13901         switch (extract32(insn, 5, 7)) {
13902         case 0b011001: /* PACIASP */
13903         case 0b011011: /* PACIBSP */
13904             /*
13905              * If SCTLR_ELx.BT, then PACI*SP are not compatible
13906              * with btype == 3.  Otherwise all btype are ok.
13907              */
13908             return !bt || btype != 3;
13909         case 0b100000: /* BTI */
13910             /* Not compatible with any btype.  */
13911             return false;
13912         case 0b100010: /* BTI c */
13913             /* Not compatible with btype == 3 */
13914             return btype != 3;
13915         case 0b100100: /* BTI j */
13916             /* Not compatible with btype == 2 */
13917             return btype != 2;
13918         case 0b100110: /* BTI jc */
13919             /* Compatible with any btype.  */
13920             return true;
13921         }
13922     } else {
13923         switch (insn & 0xffe0001fu) {
13924         case 0xd4200000u: /* BRK */
13925         case 0xd4400000u: /* HLT */
13926             /* Give priority to the breakpoint exception.  */
13927             return true;
13928         }
13929     }
13930     return false;
13931 }
13932 
13933 /* C3.1 A64 instruction index by encoding */
13934 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
13935 {
13936     switch (extract32(insn, 25, 4)) {
13937     case 0x5:
13938     case 0xd:      /* Data processing - register */
13939         disas_data_proc_reg(s, insn);
13940         break;
13941     case 0x7:
13942     case 0xf:      /* Data processing - SIMD and floating point */
13943         disas_data_proc_simd_fp(s, insn);
13944         break;
13945     default:
13946         unallocated_encoding(s);
13947         break;
13948     }
13949 }
13950 
13951 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13952                                           CPUState *cpu)
13953 {
13954     DisasContext *dc = container_of(dcbase, DisasContext, base);
13955     CPUARMState *env = cpu_env(cpu);
13956     ARMCPU *arm_cpu = env_archcpu(env);
13957     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
13958     int bound, core_mmu_idx;
13959 
13960     dc->isar = &arm_cpu->isar;
13961     dc->condjmp = 0;
13962     dc->pc_save = dc->base.pc_first;
13963     dc->aarch64 = true;
13964     dc->thumb = false;
13965     dc->sctlr_b = 0;
13966     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
13967     dc->condexec_mask = 0;
13968     dc->condexec_cond = 0;
13969     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
13970     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
13971     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
13972     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
13973     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
13974     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13975 #if !defined(CONFIG_USER_ONLY)
13976     dc->user = (dc->current_el == 0);
13977 #endif
13978     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
13979     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
13980     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
13981     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
13982     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
13983     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
13984     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
13985     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
13986     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
13987     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
13988     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
13989     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
13990     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
13991     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
13992     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
13993     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
13994     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
13995     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
13996     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
13997     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
13998     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
13999     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
14000     dc->vec_len = 0;
14001     dc->vec_stride = 0;
14002     dc->cp_regs = arm_cpu->cp_regs;
14003     dc->features = env->features;
14004     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14005     dc->gm_blocksize = arm_cpu->gm_blocksize;
14006 
14007 #ifdef CONFIG_USER_ONLY
14008     /* In sve_probe_page, we assume TBI is enabled. */
14009     tcg_debug_assert(dc->tbid & 1);
14010 #endif
14011 
14012     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14013 
14014     /* Single step state. The code-generation logic here is:
14015      *  SS_ACTIVE == 0:
14016      *   generate code with no special handling for single-stepping (except
14017      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14018      *   this happens anyway because those changes are all system register or
14019      *   PSTATE writes).
14020      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14021      *   emit code for one insn
14022      *   emit code to clear PSTATE.SS
14023      *   emit code to generate software step exception for completed step
14024      *   end TB (as usual for having generated an exception)
14025      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14026      *   emit code to generate a software step exception
14027      *   end the TB
14028      */
14029     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14030     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14031     dc->is_ldex = false;
14032 
14033     /* Bound the number of insns to execute to those left on the page.  */
14034     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14035 
14036     /* If architectural single step active, limit to 1.  */
14037     if (dc->ss_active) {
14038         bound = 1;
14039     }
14040     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14041 }
14042 
14043 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14044 {
14045 }
14046 
14047 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14048 {
14049     DisasContext *dc = container_of(dcbase, DisasContext, base);
14050     target_ulong pc_arg = dc->base.pc_next;
14051 
14052     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14053         pc_arg &= ~TARGET_PAGE_MASK;
14054     }
14055     tcg_gen_insn_start(pc_arg, 0, 0);
14056     dc->insn_start = tcg_last_op();
14057 }
14058 
14059 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14060 {
14061     DisasContext *s = container_of(dcbase, DisasContext, base);
14062     CPUARMState *env = cpu_env(cpu);
14063     uint64_t pc = s->base.pc_next;
14064     uint32_t insn;
14065 
14066     /* Singlestep exceptions have the highest priority. */
14067     if (s->ss_active && !s->pstate_ss) {
14068         /* Singlestep state is Active-pending.
14069          * If we're in this state at the start of a TB then either
14070          *  a) we just took an exception to an EL which is being debugged
14071          *     and this is the first insn in the exception handler
14072          *  b) debug exceptions were masked and we just unmasked them
14073          *     without changing EL (eg by clearing PSTATE.D)
14074          * In either case we're going to take a swstep exception in the
14075          * "did not step an insn" case, and so the syndrome ISV and EX
14076          * bits should be zero.
14077          */
14078         assert(s->base.num_insns == 1);
14079         gen_swstep_exception(s, 0, 0);
14080         s->base.is_jmp = DISAS_NORETURN;
14081         s->base.pc_next = pc + 4;
14082         return;
14083     }
14084 
14085     if (pc & 3) {
14086         /*
14087          * PC alignment fault.  This has priority over the instruction abort
14088          * that we would receive from a translation fault via arm_ldl_code.
14089          * This should only be possible after an indirect branch, at the
14090          * start of the TB.
14091          */
14092         assert(s->base.num_insns == 1);
14093         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
14094         s->base.is_jmp = DISAS_NORETURN;
14095         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14096         return;
14097     }
14098 
14099     s->pc_curr = pc;
14100     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14101     s->insn = insn;
14102     s->base.pc_next = pc + 4;
14103 
14104     s->fp_access_checked = false;
14105     s->sve_access_checked = false;
14106 
14107     if (s->pstate_il) {
14108         /*
14109          * Illegal execution state. This has priority over BTI
14110          * exceptions, but comes after instruction abort exceptions.
14111          */
14112         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14113         return;
14114     }
14115 
14116     if (dc_isar_feature(aa64_bti, s)) {
14117         if (s->base.num_insns == 1) {
14118             /*
14119              * At the first insn of the TB, compute s->guarded_page.
14120              * We delayed computing this until successfully reading
14121              * the first insn of the TB, above.  This (mostly) ensures
14122              * that the softmmu tlb entry has been populated, and the
14123              * page table GP bit is available.
14124              *
14125              * Note that we need to compute this even if btype == 0,
14126              * because this value is used for BR instructions later
14127              * where ENV is not available.
14128              */
14129             s->guarded_page = is_guarded_page(env, s);
14130 
14131             /* First insn can have btype set to non-zero.  */
14132             tcg_debug_assert(s->btype >= 0);
14133 
14134             /*
14135              * Note that the Branch Target Exception has fairly high
14136              * priority -- below debugging exceptions but above most
14137              * everything else.  This allows us to handle this now
14138              * instead of waiting until the insn is otherwise decoded.
14139              */
14140             if (s->btype != 0
14141                 && s->guarded_page
14142                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14143                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14144                 return;
14145             }
14146         } else {
14147             /* Not the first insn: btype must be 0.  */
14148             tcg_debug_assert(s->btype == 0);
14149         }
14150     }
14151 
14152     s->is_nonstreaming = false;
14153     if (s->sme_trap_nonstreaming) {
14154         disas_sme_fa64(s, insn);
14155     }
14156 
14157     if (!disas_a64(s, insn) &&
14158         !disas_sme(s, insn) &&
14159         !disas_sve(s, insn)) {
14160         disas_a64_legacy(s, insn);
14161     }
14162 
14163     /*
14164      * After execution of most insns, btype is reset to 0.
14165      * Note that we set btype == -1 when the insn sets btype.
14166      */
14167     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14168         reset_btype(s);
14169     }
14170 }
14171 
14172 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14173 {
14174     DisasContext *dc = container_of(dcbase, DisasContext, base);
14175 
14176     if (unlikely(dc->ss_active)) {
14177         /* Note that this means single stepping WFI doesn't halt the CPU.
14178          * For conditional branch insns this is harmless unreachable code as
14179          * gen_goto_tb() has already handled emitting the debug exception
14180          * (and thus a tb-jump is not possible when singlestepping).
14181          */
14182         switch (dc->base.is_jmp) {
14183         default:
14184             gen_a64_update_pc(dc, 4);
14185             /* fall through */
14186         case DISAS_EXIT:
14187         case DISAS_JUMP:
14188             gen_step_complete_exception(dc);
14189             break;
14190         case DISAS_NORETURN:
14191             break;
14192         }
14193     } else {
14194         switch (dc->base.is_jmp) {
14195         case DISAS_NEXT:
14196         case DISAS_TOO_MANY:
14197             gen_goto_tb(dc, 1, 4);
14198             break;
14199         default:
14200         case DISAS_UPDATE_EXIT:
14201             gen_a64_update_pc(dc, 4);
14202             /* fall through */
14203         case DISAS_EXIT:
14204             tcg_gen_exit_tb(NULL, 0);
14205             break;
14206         case DISAS_UPDATE_NOCHAIN:
14207             gen_a64_update_pc(dc, 4);
14208             /* fall through */
14209         case DISAS_JUMP:
14210             tcg_gen_lookup_and_goto_ptr();
14211             break;
14212         case DISAS_NORETURN:
14213         case DISAS_SWI:
14214             break;
14215         case DISAS_WFE:
14216             gen_a64_update_pc(dc, 4);
14217             gen_helper_wfe(tcg_env);
14218             break;
14219         case DISAS_YIELD:
14220             gen_a64_update_pc(dc, 4);
14221             gen_helper_yield(tcg_env);
14222             break;
14223         case DISAS_WFI:
14224             /*
14225              * This is a special case because we don't want to just halt
14226              * the CPU if trying to debug across a WFI.
14227              */
14228             gen_a64_update_pc(dc, 4);
14229             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
14230             /*
14231              * The helper doesn't necessarily throw an exception, but we
14232              * must go back to the main loop to check for interrupts anyway.
14233              */
14234             tcg_gen_exit_tb(NULL, 0);
14235             break;
14236         }
14237     }
14238 }
14239 
14240 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14241                                  CPUState *cpu, FILE *logfile)
14242 {
14243     DisasContext *dc = container_of(dcbase, DisasContext, base);
14244 
14245     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14246     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14247 }
14248 
14249 const TranslatorOps aarch64_translator_ops = {
14250     .init_disas_context = aarch64_tr_init_disas_context,
14251     .tb_start           = aarch64_tr_tb_start,
14252     .insn_start         = aarch64_tr_insn_start,
14253     .translate_insn     = aarch64_tr_translate_insn,
14254     .tb_stop            = aarch64_tr_tb_stop,
14255     .disas_log          = aarch64_tr_disas_log,
14256 };
14257