xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 7e10ce2706e2dbed6a59825dc0286b3810395afa)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "disas/disas.h"
26 #include "arm_ldst.h"
27 #include "semihosting/semihost.h"
28 #include "cpregs.h"
29 
30 static TCGv_i64 cpu_X[32];
31 static TCGv_i64 cpu_pc;
32 
33 /* Load/store exclusive handling */
34 static TCGv_i64 cpu_exclusive_high;
35 
36 static const char *regnames[] = {
37     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
38     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
39     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
40     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
41 };
42 
43 enum a64_shift_type {
44     A64_SHIFT_TYPE_LSL = 0,
45     A64_SHIFT_TYPE_LSR = 1,
46     A64_SHIFT_TYPE_ASR = 2,
47     A64_SHIFT_TYPE_ROR = 3
48 };
49 
50 /*
51  * Helpers for extracting complex instruction fields
52  */
53 
54 /*
55  * For load/store with an unsigned 12 bit immediate scaled by the element
56  * size. The input has the immediate field in bits [14:3] and the element
57  * size in [2:0].
58  */
59 static int uimm_scaled(DisasContext *s, int x)
60 {
61     unsigned imm = x >> 3;
62     unsigned scale = extract32(x, 0, 3);
63     return imm << scale;
64 }
65 
66 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
67 static int scale_by_log2_tag_granule(DisasContext *s, int x)
68 {
69     return x << LOG2_TAG_GRANULE;
70 }
71 
72 /*
73  * Include the generated decoders.
74  */
75 
76 #include "decode-sme-fa64.c.inc"
77 #include "decode-a64.c.inc"
78 
79 /* Table based decoder typedefs - used when the relevant bits for decode
80  * are too awkwardly scattered across the instruction (eg SIMD).
81  */
82 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
83 
84 typedef struct AArch64DecodeTable {
85     uint32_t pattern;
86     uint32_t mask;
87     AArch64DecodeFn *disas_fn;
88 } AArch64DecodeTable;
89 
90 /* initialize TCG globals.  */
91 void a64_translate_init(void)
92 {
93     int i;
94 
95     cpu_pc = tcg_global_mem_new_i64(tcg_env,
96                                     offsetof(CPUARMState, pc),
97                                     "pc");
98     for (i = 0; i < 32; i++) {
99         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
100                                           offsetof(CPUARMState, xregs[i]),
101                                           regnames[i]);
102     }
103 
104     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
105         offsetof(CPUARMState, exclusive_high), "exclusive_high");
106 }
107 
108 /*
109  * Return the core mmu_idx to use for A64 load/store insns which
110  * have a "unprivileged load/store" variant. Those insns access
111  * EL0 if executed from an EL which has control over EL0 (usually
112  * EL1) but behave like normal loads and stores if executed from
113  * elsewhere (eg EL3).
114  *
115  * @unpriv : true for the unprivileged encoding; false for the
116  *           normal encoding (in which case we will return the same
117  *           thing as get_mem_index().
118  */
119 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
120 {
121     /*
122      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
123      * which is the usual mmu_idx for this cpu state.
124      */
125     ARMMMUIdx useridx = s->mmu_idx;
126 
127     if (unpriv && s->unpriv) {
128         /*
129          * We have pre-computed the condition for AccType_UNPRIV.
130          * Therefore we should never get here with a mmu_idx for
131          * which we do not know the corresponding user mmu_idx.
132          */
133         switch (useridx) {
134         case ARMMMUIdx_E10_1:
135         case ARMMMUIdx_E10_1_PAN:
136             useridx = ARMMMUIdx_E10_0;
137             break;
138         case ARMMMUIdx_E20_2:
139         case ARMMMUIdx_E20_2_PAN:
140             useridx = ARMMMUIdx_E20_0;
141             break;
142         default:
143             g_assert_not_reached();
144         }
145     }
146     return arm_to_core_mmu_idx(useridx);
147 }
148 
149 static void set_btype_raw(int val)
150 {
151     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
152                    offsetof(CPUARMState, btype));
153 }
154 
155 static void set_btype(DisasContext *s, int val)
156 {
157     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
158     tcg_debug_assert(val >= 1 && val <= 3);
159     set_btype_raw(val);
160     s->btype = -1;
161 }
162 
163 static void reset_btype(DisasContext *s)
164 {
165     if (s->btype != 0) {
166         set_btype_raw(0);
167         s->btype = 0;
168     }
169 }
170 
171 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
172 {
173     assert(s->pc_save != -1);
174     if (tb_cflags(s->base.tb) & CF_PCREL) {
175         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
176     } else {
177         tcg_gen_movi_i64(dest, s->pc_curr + diff);
178     }
179 }
180 
181 void gen_a64_update_pc(DisasContext *s, target_long diff)
182 {
183     gen_pc_plus_diff(s, cpu_pc, diff);
184     s->pc_save = s->pc_curr + diff;
185 }
186 
187 /*
188  * Handle Top Byte Ignore (TBI) bits.
189  *
190  * If address tagging is enabled via the TCR TBI bits:
191  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
192  *    then the address is zero-extended, clearing bits [63:56]
193  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
194  *    and TBI1 controls addresses with bit 55 == 1.
195  *    If the appropriate TBI bit is set for the address then
196  *    the address is sign-extended from bit 55 into bits [63:56]
197  *
198  * Here We have concatenated TBI{1,0} into tbi.
199  */
200 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
201                                 TCGv_i64 src, int tbi)
202 {
203     if (tbi == 0) {
204         /* Load unmodified address */
205         tcg_gen_mov_i64(dst, src);
206     } else if (!regime_has_2_ranges(s->mmu_idx)) {
207         /* Force tag byte to all zero */
208         tcg_gen_extract_i64(dst, src, 0, 56);
209     } else {
210         /* Sign-extend from bit 55.  */
211         tcg_gen_sextract_i64(dst, src, 0, 56);
212 
213         switch (tbi) {
214         case 1:
215             /* tbi0 but !tbi1: only use the extension if positive */
216             tcg_gen_and_i64(dst, dst, src);
217             break;
218         case 2:
219             /* !tbi0 but tbi1: only use the extension if negative */
220             tcg_gen_or_i64(dst, dst, src);
221             break;
222         case 3:
223             /* tbi0 and tbi1: always use the extension */
224             break;
225         default:
226             g_assert_not_reached();
227         }
228     }
229 }
230 
231 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
232 {
233     /*
234      * If address tagging is enabled for instructions via the TCR TBI bits,
235      * then loading an address into the PC will clear out any tag.
236      */
237     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
238     s->pc_save = -1;
239 }
240 
241 /*
242  * Handle MTE and/or TBI.
243  *
244  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
245  * for the tag to be present in the FAR_ELx register.  But for user-only
246  * mode we do not have a TLB with which to implement this, so we must
247  * remove the top byte now.
248  *
249  * Always return a fresh temporary that we can increment independently
250  * of the write-back address.
251  */
252 
253 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
254 {
255     TCGv_i64 clean = tcg_temp_new_i64();
256 #ifdef CONFIG_USER_ONLY
257     gen_top_byte_ignore(s, clean, addr, s->tbid);
258 #else
259     tcg_gen_mov_i64(clean, addr);
260 #endif
261     return clean;
262 }
263 
264 /* Insert a zero tag into src, with the result at dst. */
265 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
266 {
267     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
268 }
269 
270 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
271                              MMUAccessType acc, int log2_size)
272 {
273     gen_helper_probe_access(tcg_env, ptr,
274                             tcg_constant_i32(acc),
275                             tcg_constant_i32(get_mem_index(s)),
276                             tcg_constant_i32(1 << log2_size));
277 }
278 
279 /*
280  * For MTE, check a single logical or atomic access.  This probes a single
281  * address, the exact one specified.  The size and alignment of the access
282  * is not relevant to MTE, per se, but watchpoints do require the size,
283  * and we want to recognize those before making any other changes to state.
284  */
285 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
286                                       bool is_write, bool tag_checked,
287                                       MemOp memop, bool is_unpriv,
288                                       int core_idx)
289 {
290     if (tag_checked && s->mte_active[is_unpriv]) {
291         TCGv_i64 ret;
292         int desc = 0;
293 
294         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
295         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
296         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
297         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
298         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
299         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
300 
301         ret = tcg_temp_new_i64();
302         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
303 
304         return ret;
305     }
306     return clean_data_tbi(s, addr);
307 }
308 
309 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
310                         bool tag_checked, MemOp memop)
311 {
312     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
313                                  false, get_mem_index(s));
314 }
315 
316 /*
317  * For MTE, check multiple logical sequential accesses.
318  */
319 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
320                         bool tag_checked, int total_size, MemOp single_mop)
321 {
322     if (tag_checked && s->mte_active[0]) {
323         TCGv_i64 ret;
324         int desc = 0;
325 
326         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
327         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
328         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
329         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
330         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
331         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
332 
333         ret = tcg_temp_new_i64();
334         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
335 
336         return ret;
337     }
338     return clean_data_tbi(s, addr);
339 }
340 
341 /*
342  * Generate the special alignment check that applies to AccType_ATOMIC
343  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
344  * naturally aligned, but it must not cross a 16-byte boundary.
345  * See AArch64.CheckAlignment().
346  */
347 static void check_lse2_align(DisasContext *s, int rn, int imm,
348                              bool is_write, MemOp mop)
349 {
350     TCGv_i32 tmp;
351     TCGv_i64 addr;
352     TCGLabel *over_label;
353     MMUAccessType type;
354     int mmu_idx;
355 
356     tmp = tcg_temp_new_i32();
357     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
358     tcg_gen_addi_i32(tmp, tmp, imm & 15);
359     tcg_gen_andi_i32(tmp, tmp, 15);
360     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
361 
362     over_label = gen_new_label();
363     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
364 
365     addr = tcg_temp_new_i64();
366     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
367 
368     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
369     mmu_idx = get_mem_index(s);
370     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
371                                 tcg_constant_i32(mmu_idx));
372 
373     gen_set_label(over_label);
374 
375 }
376 
377 /* Handle the alignment check for AccType_ATOMIC instructions. */
378 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
379 {
380     MemOp size = mop & MO_SIZE;
381 
382     if (size == MO_8) {
383         return mop;
384     }
385 
386     /*
387      * If size == MO_128, this is a LDXP, and the operation is single-copy
388      * atomic for each doubleword, not the entire quadword; it still must
389      * be quadword aligned.
390      */
391     if (size == MO_128) {
392         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
393                                    MO_ATOM_IFALIGN_PAIR);
394     }
395     if (dc_isar_feature(aa64_lse2, s)) {
396         check_lse2_align(s, rn, 0, true, mop);
397     } else {
398         mop |= MO_ALIGN;
399     }
400     return finalize_memop(s, mop);
401 }
402 
403 /* Handle the alignment check for AccType_ORDERED instructions. */
404 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
405                                  bool is_write, MemOp mop)
406 {
407     MemOp size = mop & MO_SIZE;
408 
409     if (size == MO_8) {
410         return mop;
411     }
412     if (size == MO_128) {
413         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
414                                    MO_ATOM_IFALIGN_PAIR);
415     }
416     if (!dc_isar_feature(aa64_lse2, s)) {
417         mop |= MO_ALIGN;
418     } else if (!s->naa) {
419         check_lse2_align(s, rn, imm, is_write, mop);
420     }
421     return finalize_memop(s, mop);
422 }
423 
424 typedef struct DisasCompare64 {
425     TCGCond cond;
426     TCGv_i64 value;
427 } DisasCompare64;
428 
429 static void a64_test_cc(DisasCompare64 *c64, int cc)
430 {
431     DisasCompare c32;
432 
433     arm_test_cc(&c32, cc);
434 
435     /*
436      * Sign-extend the 32-bit value so that the GE/LT comparisons work
437      * properly.  The NE/EQ comparisons are also fine with this choice.
438       */
439     c64->cond = c32.cond;
440     c64->value = tcg_temp_new_i64();
441     tcg_gen_ext_i32_i64(c64->value, c32.value);
442 }
443 
444 static void gen_rebuild_hflags(DisasContext *s)
445 {
446     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
447 }
448 
449 static void gen_exception_internal(int excp)
450 {
451     assert(excp_is_internal(excp));
452     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
453 }
454 
455 static void gen_exception_internal_insn(DisasContext *s, int excp)
456 {
457     gen_a64_update_pc(s, 0);
458     gen_exception_internal(excp);
459     s->base.is_jmp = DISAS_NORETURN;
460 }
461 
462 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
463 {
464     gen_a64_update_pc(s, 0);
465     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
466     s->base.is_jmp = DISAS_NORETURN;
467 }
468 
469 static void gen_step_complete_exception(DisasContext *s)
470 {
471     /* We just completed step of an insn. Move from Active-not-pending
472      * to Active-pending, and then also take the swstep exception.
473      * This corresponds to making the (IMPDEF) choice to prioritize
474      * swstep exceptions over asynchronous exceptions taken to an exception
475      * level where debug is disabled. This choice has the advantage that
476      * we do not need to maintain internal state corresponding to the
477      * ISV/EX syndrome bits between completion of the step and generation
478      * of the exception, and our syndrome information is always correct.
479      */
480     gen_ss_advance(s);
481     gen_swstep_exception(s, 1, s->is_ldex);
482     s->base.is_jmp = DISAS_NORETURN;
483 }
484 
485 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
486 {
487     if (s->ss_active) {
488         return false;
489     }
490     return translator_use_goto_tb(&s->base, dest);
491 }
492 
493 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
494 {
495     if (use_goto_tb(s, s->pc_curr + diff)) {
496         /*
497          * For pcrel, the pc must always be up-to-date on entry to
498          * the linked TB, so that it can use simple additions for all
499          * further adjustments.  For !pcrel, the linked TB is compiled
500          * to know its full virtual address, so we can delay the
501          * update to pc to the unlinked path.  A long chain of links
502          * can thus avoid many updates to the PC.
503          */
504         if (tb_cflags(s->base.tb) & CF_PCREL) {
505             gen_a64_update_pc(s, diff);
506             tcg_gen_goto_tb(n);
507         } else {
508             tcg_gen_goto_tb(n);
509             gen_a64_update_pc(s, diff);
510         }
511         tcg_gen_exit_tb(s->base.tb, n);
512         s->base.is_jmp = DISAS_NORETURN;
513     } else {
514         gen_a64_update_pc(s, diff);
515         if (s->ss_active) {
516             gen_step_complete_exception(s);
517         } else {
518             tcg_gen_lookup_and_goto_ptr();
519             s->base.is_jmp = DISAS_NORETURN;
520         }
521     }
522 }
523 
524 /*
525  * Register access functions
526  *
527  * These functions are used for directly accessing a register in where
528  * changes to the final register value are likely to be made. If you
529  * need to use a register for temporary calculation (e.g. index type
530  * operations) use the read_* form.
531  *
532  * B1.2.1 Register mappings
533  *
534  * In instruction register encoding 31 can refer to ZR (zero register) or
535  * the SP (stack pointer) depending on context. In QEMU's case we map SP
536  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
537  * This is the point of the _sp forms.
538  */
539 TCGv_i64 cpu_reg(DisasContext *s, int reg)
540 {
541     if (reg == 31) {
542         TCGv_i64 t = tcg_temp_new_i64();
543         tcg_gen_movi_i64(t, 0);
544         return t;
545     } else {
546         return cpu_X[reg];
547     }
548 }
549 
550 /* register access for when 31 == SP */
551 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
552 {
553     return cpu_X[reg];
554 }
555 
556 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
557  * representing the register contents. This TCGv is an auto-freed
558  * temporary so it need not be explicitly freed, and may be modified.
559  */
560 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
561 {
562     TCGv_i64 v = tcg_temp_new_i64();
563     if (reg != 31) {
564         if (sf) {
565             tcg_gen_mov_i64(v, cpu_X[reg]);
566         } else {
567             tcg_gen_ext32u_i64(v, cpu_X[reg]);
568         }
569     } else {
570         tcg_gen_movi_i64(v, 0);
571     }
572     return v;
573 }
574 
575 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
576 {
577     TCGv_i64 v = tcg_temp_new_i64();
578     if (sf) {
579         tcg_gen_mov_i64(v, cpu_X[reg]);
580     } else {
581         tcg_gen_ext32u_i64(v, cpu_X[reg]);
582     }
583     return v;
584 }
585 
586 /* Return the offset into CPUARMState of a slice (from
587  * the least significant end) of FP register Qn (ie
588  * Dn, Sn, Hn or Bn).
589  * (Note that this is not the same mapping as for A32; see cpu.h)
590  */
591 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
592 {
593     return vec_reg_offset(s, regno, 0, size);
594 }
595 
596 /* Offset of the high half of the 128 bit vector Qn */
597 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
598 {
599     return vec_reg_offset(s, regno, 1, MO_64);
600 }
601 
602 /* Convenience accessors for reading and writing single and double
603  * FP registers. Writing clears the upper parts of the associated
604  * 128 bit vector register, as required by the architecture.
605  * Note that unlike the GP register accessors, the values returned
606  * by the read functions must be manually freed.
607  */
608 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
609 {
610     TCGv_i64 v = tcg_temp_new_i64();
611 
612     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
613     return v;
614 }
615 
616 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
617 {
618     TCGv_i32 v = tcg_temp_new_i32();
619 
620     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
621     return v;
622 }
623 
624 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
625 {
626     TCGv_i32 v = tcg_temp_new_i32();
627 
628     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
629     return v;
630 }
631 
632 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
633  * If SVE is not enabled, then there are only 128 bits in the vector.
634  */
635 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
636 {
637     unsigned ofs = fp_reg_offset(s, rd, MO_64);
638     unsigned vsz = vec_full_reg_size(s);
639 
640     /* Nop move, with side effect of clearing the tail. */
641     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
642 }
643 
644 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
645 {
646     unsigned ofs = fp_reg_offset(s, reg, MO_64);
647 
648     tcg_gen_st_i64(v, tcg_env, ofs);
649     clear_vec_high(s, false, reg);
650 }
651 
652 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
653 {
654     TCGv_i64 tmp = tcg_temp_new_i64();
655 
656     tcg_gen_extu_i32_i64(tmp, v);
657     write_fp_dreg(s, reg, tmp);
658 }
659 
660 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
661 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
662                          GVecGen2Fn *gvec_fn, int vece)
663 {
664     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
665             is_q ? 16 : 8, vec_full_reg_size(s));
666 }
667 
668 /* Expand a 2-operand + immediate AdvSIMD vector operation using
669  * an expander function.
670  */
671 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
672                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
673 {
674     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
675             imm, is_q ? 16 : 8, vec_full_reg_size(s));
676 }
677 
678 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
679 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
680                          GVecGen3Fn *gvec_fn, int vece)
681 {
682     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
683             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
684 }
685 
686 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
687 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
688                          int rx, GVecGen4Fn *gvec_fn, int vece)
689 {
690     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
691             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
692             is_q ? 16 : 8, vec_full_reg_size(s));
693 }
694 
695 /* Expand a 2-operand operation using an out-of-line helper.  */
696 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
697                              int rn, int data, gen_helper_gvec_2 *fn)
698 {
699     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
700                        vec_full_reg_offset(s, rn),
701                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
702 }
703 
704 /* Expand a 3-operand operation using an out-of-line helper.  */
705 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
706                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
707 {
708     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
709                        vec_full_reg_offset(s, rn),
710                        vec_full_reg_offset(s, rm),
711                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
712 }
713 
714 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
715  * an out-of-line helper.
716  */
717 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
718                               int rm, bool is_fp16, int data,
719                               gen_helper_gvec_3_ptr *fn)
720 {
721     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
722     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
723                        vec_full_reg_offset(s, rn),
724                        vec_full_reg_offset(s, rm), fpst,
725                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
726 }
727 
728 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
729 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
730                             int rm, gen_helper_gvec_3_ptr *fn)
731 {
732     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
733 
734     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
735     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
736                        vec_full_reg_offset(s, rn),
737                        vec_full_reg_offset(s, rm), qc_ptr,
738                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
739 }
740 
741 /* Expand a 4-operand operation using an out-of-line helper.  */
742 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
743                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
744 {
745     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
746                        vec_full_reg_offset(s, rn),
747                        vec_full_reg_offset(s, rm),
748                        vec_full_reg_offset(s, ra),
749                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
750 }
751 
752 /*
753  * Expand a 4-operand + fpstatus pointer + simd data value operation using
754  * an out-of-line helper.
755  */
756 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
757                               int rm, int ra, bool is_fp16, int data,
758                               gen_helper_gvec_4_ptr *fn)
759 {
760     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
761     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
762                        vec_full_reg_offset(s, rn),
763                        vec_full_reg_offset(s, rm),
764                        vec_full_reg_offset(s, ra), fpst,
765                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
766 }
767 
768 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
769  * than the 32 bit equivalent.
770  */
771 static inline void gen_set_NZ64(TCGv_i64 result)
772 {
773     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
774     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
775 }
776 
777 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
778 static inline void gen_logic_CC(int sf, TCGv_i64 result)
779 {
780     if (sf) {
781         gen_set_NZ64(result);
782     } else {
783         tcg_gen_extrl_i64_i32(cpu_ZF, result);
784         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
785     }
786     tcg_gen_movi_i32(cpu_CF, 0);
787     tcg_gen_movi_i32(cpu_VF, 0);
788 }
789 
790 /* dest = T0 + T1; compute C, N, V and Z flags */
791 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
792 {
793     TCGv_i64 result, flag, tmp;
794     result = tcg_temp_new_i64();
795     flag = tcg_temp_new_i64();
796     tmp = tcg_temp_new_i64();
797 
798     tcg_gen_movi_i64(tmp, 0);
799     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
800 
801     tcg_gen_extrl_i64_i32(cpu_CF, flag);
802 
803     gen_set_NZ64(result);
804 
805     tcg_gen_xor_i64(flag, result, t0);
806     tcg_gen_xor_i64(tmp, t0, t1);
807     tcg_gen_andc_i64(flag, flag, tmp);
808     tcg_gen_extrh_i64_i32(cpu_VF, flag);
809 
810     tcg_gen_mov_i64(dest, result);
811 }
812 
813 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
814 {
815     TCGv_i32 t0_32 = tcg_temp_new_i32();
816     TCGv_i32 t1_32 = tcg_temp_new_i32();
817     TCGv_i32 tmp = tcg_temp_new_i32();
818 
819     tcg_gen_movi_i32(tmp, 0);
820     tcg_gen_extrl_i64_i32(t0_32, t0);
821     tcg_gen_extrl_i64_i32(t1_32, t1);
822     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
823     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
824     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
825     tcg_gen_xor_i32(tmp, t0_32, t1_32);
826     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
827     tcg_gen_extu_i32_i64(dest, cpu_NF);
828 }
829 
830 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
831 {
832     if (sf) {
833         gen_add64_CC(dest, t0, t1);
834     } else {
835         gen_add32_CC(dest, t0, t1);
836     }
837 }
838 
839 /* dest = T0 - T1; compute C, N, V and Z flags */
840 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
841 {
842     /* 64 bit arithmetic */
843     TCGv_i64 result, flag, tmp;
844 
845     result = tcg_temp_new_i64();
846     flag = tcg_temp_new_i64();
847     tcg_gen_sub_i64(result, t0, t1);
848 
849     gen_set_NZ64(result);
850 
851     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
852     tcg_gen_extrl_i64_i32(cpu_CF, flag);
853 
854     tcg_gen_xor_i64(flag, result, t0);
855     tmp = tcg_temp_new_i64();
856     tcg_gen_xor_i64(tmp, t0, t1);
857     tcg_gen_and_i64(flag, flag, tmp);
858     tcg_gen_extrh_i64_i32(cpu_VF, flag);
859     tcg_gen_mov_i64(dest, result);
860 }
861 
862 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
863 {
864     /* 32 bit arithmetic */
865     TCGv_i32 t0_32 = tcg_temp_new_i32();
866     TCGv_i32 t1_32 = tcg_temp_new_i32();
867     TCGv_i32 tmp;
868 
869     tcg_gen_extrl_i64_i32(t0_32, t0);
870     tcg_gen_extrl_i64_i32(t1_32, t1);
871     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
872     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
873     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
874     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
875     tmp = tcg_temp_new_i32();
876     tcg_gen_xor_i32(tmp, t0_32, t1_32);
877     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
878     tcg_gen_extu_i32_i64(dest, cpu_NF);
879 }
880 
881 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
882 {
883     if (sf) {
884         gen_sub64_CC(dest, t0, t1);
885     } else {
886         gen_sub32_CC(dest, t0, t1);
887     }
888 }
889 
890 /* dest = T0 + T1 + CF; do not compute flags. */
891 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
892 {
893     TCGv_i64 flag = tcg_temp_new_i64();
894     tcg_gen_extu_i32_i64(flag, cpu_CF);
895     tcg_gen_add_i64(dest, t0, t1);
896     tcg_gen_add_i64(dest, dest, flag);
897 
898     if (!sf) {
899         tcg_gen_ext32u_i64(dest, dest);
900     }
901 }
902 
903 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
904 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
905 {
906     if (sf) {
907         TCGv_i64 result = tcg_temp_new_i64();
908         TCGv_i64 cf_64 = tcg_temp_new_i64();
909         TCGv_i64 vf_64 = tcg_temp_new_i64();
910         TCGv_i64 tmp = tcg_temp_new_i64();
911         TCGv_i64 zero = tcg_constant_i64(0);
912 
913         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
914         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
915         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
916         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
917         gen_set_NZ64(result);
918 
919         tcg_gen_xor_i64(vf_64, result, t0);
920         tcg_gen_xor_i64(tmp, t0, t1);
921         tcg_gen_andc_i64(vf_64, vf_64, tmp);
922         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
923 
924         tcg_gen_mov_i64(dest, result);
925     } else {
926         TCGv_i32 t0_32 = tcg_temp_new_i32();
927         TCGv_i32 t1_32 = tcg_temp_new_i32();
928         TCGv_i32 tmp = tcg_temp_new_i32();
929         TCGv_i32 zero = tcg_constant_i32(0);
930 
931         tcg_gen_extrl_i64_i32(t0_32, t0);
932         tcg_gen_extrl_i64_i32(t1_32, t1);
933         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
934         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
935 
936         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
937         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
938         tcg_gen_xor_i32(tmp, t0_32, t1_32);
939         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
940         tcg_gen_extu_i32_i64(dest, cpu_NF);
941     }
942 }
943 
944 /*
945  * Load/Store generators
946  */
947 
948 /*
949  * Store from GPR register to memory.
950  */
951 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
952                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
953                              bool iss_valid,
954                              unsigned int iss_srt,
955                              bool iss_sf, bool iss_ar)
956 {
957     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
958 
959     if (iss_valid) {
960         uint32_t syn;
961 
962         syn = syn_data_abort_with_iss(0,
963                                       (memop & MO_SIZE),
964                                       false,
965                                       iss_srt,
966                                       iss_sf,
967                                       iss_ar,
968                                       0, 0, 0, 0, 0, false);
969         disas_set_insn_syndrome(s, syn);
970     }
971 }
972 
973 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
974                       TCGv_i64 tcg_addr, MemOp memop,
975                       bool iss_valid,
976                       unsigned int iss_srt,
977                       bool iss_sf, bool iss_ar)
978 {
979     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
980                      iss_valid, iss_srt, iss_sf, iss_ar);
981 }
982 
983 /*
984  * Load from memory to GPR register
985  */
986 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
987                              MemOp memop, bool extend, int memidx,
988                              bool iss_valid, unsigned int iss_srt,
989                              bool iss_sf, bool iss_ar)
990 {
991     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
992 
993     if (extend && (memop & MO_SIGN)) {
994         g_assert((memop & MO_SIZE) <= MO_32);
995         tcg_gen_ext32u_i64(dest, dest);
996     }
997 
998     if (iss_valid) {
999         uint32_t syn;
1000 
1001         syn = syn_data_abort_with_iss(0,
1002                                       (memop & MO_SIZE),
1003                                       (memop & MO_SIGN) != 0,
1004                                       iss_srt,
1005                                       iss_sf,
1006                                       iss_ar,
1007                                       0, 0, 0, 0, 0, false);
1008         disas_set_insn_syndrome(s, syn);
1009     }
1010 }
1011 
1012 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1013                       MemOp memop, bool extend,
1014                       bool iss_valid, unsigned int iss_srt,
1015                       bool iss_sf, bool iss_ar)
1016 {
1017     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1018                      iss_valid, iss_srt, iss_sf, iss_ar);
1019 }
1020 
1021 /*
1022  * Store from FP register to memory
1023  */
1024 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1025 {
1026     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1027     TCGv_i64 tmplo = tcg_temp_new_i64();
1028 
1029     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1030 
1031     if ((mop & MO_SIZE) < MO_128) {
1032         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1033     } else {
1034         TCGv_i64 tmphi = tcg_temp_new_i64();
1035         TCGv_i128 t16 = tcg_temp_new_i128();
1036 
1037         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1038         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1039 
1040         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1041     }
1042 }
1043 
1044 /*
1045  * Load from memory to FP register
1046  */
1047 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1048 {
1049     /* This always zero-extends and writes to a full 128 bit wide vector */
1050     TCGv_i64 tmplo = tcg_temp_new_i64();
1051     TCGv_i64 tmphi = NULL;
1052 
1053     if ((mop & MO_SIZE) < MO_128) {
1054         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1055     } else {
1056         TCGv_i128 t16 = tcg_temp_new_i128();
1057 
1058         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1059 
1060         tmphi = tcg_temp_new_i64();
1061         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1062     }
1063 
1064     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1065 
1066     if (tmphi) {
1067         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1068     }
1069     clear_vec_high(s, tmphi != NULL, destidx);
1070 }
1071 
1072 /*
1073  * Vector load/store helpers.
1074  *
1075  * The principal difference between this and a FP load is that we don't
1076  * zero extend as we are filling a partial chunk of the vector register.
1077  * These functions don't support 128 bit loads/stores, which would be
1078  * normal load/store operations.
1079  *
1080  * The _i32 versions are useful when operating on 32 bit quantities
1081  * (eg for floating point single or using Neon helper functions).
1082  */
1083 
1084 /* Get value of an element within a vector register */
1085 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1086                              int element, MemOp memop)
1087 {
1088     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1089     switch ((unsigned)memop) {
1090     case MO_8:
1091         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1092         break;
1093     case MO_16:
1094         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1095         break;
1096     case MO_32:
1097         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1098         break;
1099     case MO_8|MO_SIGN:
1100         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1101         break;
1102     case MO_16|MO_SIGN:
1103         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1104         break;
1105     case MO_32|MO_SIGN:
1106         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1107         break;
1108     case MO_64:
1109     case MO_64|MO_SIGN:
1110         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1111         break;
1112     default:
1113         g_assert_not_reached();
1114     }
1115 }
1116 
1117 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1118                                  int element, MemOp memop)
1119 {
1120     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1121     switch (memop) {
1122     case MO_8:
1123         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1124         break;
1125     case MO_16:
1126         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1127         break;
1128     case MO_8|MO_SIGN:
1129         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1130         break;
1131     case MO_16|MO_SIGN:
1132         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1133         break;
1134     case MO_32:
1135     case MO_32|MO_SIGN:
1136         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1137         break;
1138     default:
1139         g_assert_not_reached();
1140     }
1141 }
1142 
1143 /* Set value of an element within a vector register */
1144 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1145                               int element, MemOp memop)
1146 {
1147     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1148     switch (memop) {
1149     case MO_8:
1150         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1151         break;
1152     case MO_16:
1153         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1154         break;
1155     case MO_32:
1156         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1157         break;
1158     case MO_64:
1159         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1160         break;
1161     default:
1162         g_assert_not_reached();
1163     }
1164 }
1165 
1166 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1167                                   int destidx, int element, MemOp memop)
1168 {
1169     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1170     switch (memop) {
1171     case MO_8:
1172         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1173         break;
1174     case MO_16:
1175         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1176         break;
1177     case MO_32:
1178         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1179         break;
1180     default:
1181         g_assert_not_reached();
1182     }
1183 }
1184 
1185 /* Store from vector register to memory */
1186 static void do_vec_st(DisasContext *s, int srcidx, int element,
1187                       TCGv_i64 tcg_addr, MemOp mop)
1188 {
1189     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1190 
1191     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1192     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1193 }
1194 
1195 /* Load from memory to vector register */
1196 static void do_vec_ld(DisasContext *s, int destidx, int element,
1197                       TCGv_i64 tcg_addr, MemOp mop)
1198 {
1199     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1200 
1201     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1202     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1203 }
1204 
1205 /* Check that FP/Neon access is enabled. If it is, return
1206  * true. If not, emit code to generate an appropriate exception,
1207  * and return false; the caller should not emit any code for
1208  * the instruction. Note that this check must happen after all
1209  * unallocated-encoding checks (otherwise the syndrome information
1210  * for the resulting exception will be incorrect).
1211  */
1212 static bool fp_access_check_only(DisasContext *s)
1213 {
1214     if (s->fp_excp_el) {
1215         assert(!s->fp_access_checked);
1216         s->fp_access_checked = true;
1217 
1218         gen_exception_insn_el(s, 0, EXCP_UDEF,
1219                               syn_fp_access_trap(1, 0xe, false, 0),
1220                               s->fp_excp_el);
1221         return false;
1222     }
1223     s->fp_access_checked = true;
1224     return true;
1225 }
1226 
1227 static bool fp_access_check(DisasContext *s)
1228 {
1229     if (!fp_access_check_only(s)) {
1230         return false;
1231     }
1232     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1233         gen_exception_insn(s, 0, EXCP_UDEF,
1234                            syn_smetrap(SME_ET_Streaming, false));
1235         return false;
1236     }
1237     return true;
1238 }
1239 
1240 /*
1241  * Check that SVE access is enabled.  If it is, return true.
1242  * If not, emit code to generate an appropriate exception and return false.
1243  * This function corresponds to CheckSVEEnabled().
1244  */
1245 bool sve_access_check(DisasContext *s)
1246 {
1247     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1248         assert(dc_isar_feature(aa64_sme, s));
1249         if (!sme_sm_enabled_check(s)) {
1250             goto fail_exit;
1251         }
1252     } else if (s->sve_excp_el) {
1253         gen_exception_insn_el(s, 0, EXCP_UDEF,
1254                               syn_sve_access_trap(), s->sve_excp_el);
1255         goto fail_exit;
1256     }
1257     s->sve_access_checked = true;
1258     return fp_access_check(s);
1259 
1260  fail_exit:
1261     /* Assert that we only raise one exception per instruction. */
1262     assert(!s->sve_access_checked);
1263     s->sve_access_checked = true;
1264     return false;
1265 }
1266 
1267 /*
1268  * Check that SME access is enabled, raise an exception if not.
1269  * Note that this function corresponds to CheckSMEAccess and is
1270  * only used directly for cpregs.
1271  */
1272 static bool sme_access_check(DisasContext *s)
1273 {
1274     if (s->sme_excp_el) {
1275         gen_exception_insn_el(s, 0, EXCP_UDEF,
1276                               syn_smetrap(SME_ET_AccessTrap, false),
1277                               s->sme_excp_el);
1278         return false;
1279     }
1280     return true;
1281 }
1282 
1283 /* This function corresponds to CheckSMEEnabled. */
1284 bool sme_enabled_check(DisasContext *s)
1285 {
1286     /*
1287      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1288      * to be zero when fp_excp_el has priority.  This is because we need
1289      * sme_excp_el by itself for cpregs access checks.
1290      */
1291     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1292         s->fp_access_checked = true;
1293         return sme_access_check(s);
1294     }
1295     return fp_access_check_only(s);
1296 }
1297 
1298 /* Common subroutine for CheckSMEAnd*Enabled. */
1299 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1300 {
1301     if (!sme_enabled_check(s)) {
1302         return false;
1303     }
1304     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1305         gen_exception_insn(s, 0, EXCP_UDEF,
1306                            syn_smetrap(SME_ET_NotStreaming, false));
1307         return false;
1308     }
1309     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1310         gen_exception_insn(s, 0, EXCP_UDEF,
1311                            syn_smetrap(SME_ET_InactiveZA, false));
1312         return false;
1313     }
1314     return true;
1315 }
1316 
1317 /*
1318  * This utility function is for doing register extension with an
1319  * optional shift. You will likely want to pass a temporary for the
1320  * destination register. See DecodeRegExtend() in the ARM ARM.
1321  */
1322 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1323                               int option, unsigned int shift)
1324 {
1325     int extsize = extract32(option, 0, 2);
1326     bool is_signed = extract32(option, 2, 1);
1327 
1328     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1329     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1330 }
1331 
1332 static inline void gen_check_sp_alignment(DisasContext *s)
1333 {
1334     /* The AArch64 architecture mandates that (if enabled via PSTATE
1335      * or SCTLR bits) there is a check that SP is 16-aligned on every
1336      * SP-relative load or store (with an exception generated if it is not).
1337      * In line with general QEMU practice regarding misaligned accesses,
1338      * we omit these checks for the sake of guest program performance.
1339      * This function is provided as a hook so we can more easily add these
1340      * checks in future (possibly as a "favour catching guest program bugs
1341      * over speed" user selectable option).
1342      */
1343 }
1344 
1345 /*
1346  * This provides a simple table based table lookup decoder. It is
1347  * intended to be used when the relevant bits for decode are too
1348  * awkwardly placed and switch/if based logic would be confusing and
1349  * deeply nested. Since it's a linear search through the table, tables
1350  * should be kept small.
1351  *
1352  * It returns the first handler where insn & mask == pattern, or
1353  * NULL if there is no match.
1354  * The table is terminated by an empty mask (i.e. 0)
1355  */
1356 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1357                                                uint32_t insn)
1358 {
1359     const AArch64DecodeTable *tptr = table;
1360 
1361     while (tptr->mask) {
1362         if ((insn & tptr->mask) == tptr->pattern) {
1363             return tptr->disas_fn;
1364         }
1365         tptr++;
1366     }
1367     return NULL;
1368 }
1369 
1370 /*
1371  * The instruction disassembly implemented here matches
1372  * the instruction encoding classifications in chapter C4
1373  * of the ARM Architecture Reference Manual (DDI0487B_a);
1374  * classification names and decode diagrams here should generally
1375  * match up with those in the manual.
1376  */
1377 
1378 static bool trans_B(DisasContext *s, arg_i *a)
1379 {
1380     reset_btype(s);
1381     gen_goto_tb(s, 0, a->imm);
1382     return true;
1383 }
1384 
1385 static bool trans_BL(DisasContext *s, arg_i *a)
1386 {
1387     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1388     reset_btype(s);
1389     gen_goto_tb(s, 0, a->imm);
1390     return true;
1391 }
1392 
1393 
1394 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1395 {
1396     DisasLabel match;
1397     TCGv_i64 tcg_cmp;
1398 
1399     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1400     reset_btype(s);
1401 
1402     match = gen_disas_label(s);
1403     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1404                         tcg_cmp, 0, match.label);
1405     gen_goto_tb(s, 0, 4);
1406     set_disas_label(s, match);
1407     gen_goto_tb(s, 1, a->imm);
1408     return true;
1409 }
1410 
1411 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1412 {
1413     DisasLabel match;
1414     TCGv_i64 tcg_cmp;
1415 
1416     tcg_cmp = tcg_temp_new_i64();
1417     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1418 
1419     reset_btype(s);
1420 
1421     match = gen_disas_label(s);
1422     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1423                         tcg_cmp, 0, match.label);
1424     gen_goto_tb(s, 0, 4);
1425     set_disas_label(s, match);
1426     gen_goto_tb(s, 1, a->imm);
1427     return true;
1428 }
1429 
1430 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1431 {
1432     /* BC.cond is only present with FEAT_HBC */
1433     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1434         return false;
1435     }
1436     reset_btype(s);
1437     if (a->cond < 0x0e) {
1438         /* genuinely conditional branches */
1439         DisasLabel match = gen_disas_label(s);
1440         arm_gen_test_cc(a->cond, match.label);
1441         gen_goto_tb(s, 0, 4);
1442         set_disas_label(s, match);
1443         gen_goto_tb(s, 1, a->imm);
1444     } else {
1445         /* 0xe and 0xf are both "always" conditions */
1446         gen_goto_tb(s, 0, a->imm);
1447     }
1448     return true;
1449 }
1450 
1451 static void set_btype_for_br(DisasContext *s, int rn)
1452 {
1453     if (dc_isar_feature(aa64_bti, s)) {
1454         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1455         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1456     }
1457 }
1458 
1459 static void set_btype_for_blr(DisasContext *s)
1460 {
1461     if (dc_isar_feature(aa64_bti, s)) {
1462         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1463         set_btype(s, 2);
1464     }
1465 }
1466 
1467 static bool trans_BR(DisasContext *s, arg_r *a)
1468 {
1469     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1470     set_btype_for_br(s, a->rn);
1471     s->base.is_jmp = DISAS_JUMP;
1472     return true;
1473 }
1474 
1475 static bool trans_BLR(DisasContext *s, arg_r *a)
1476 {
1477     TCGv_i64 dst = cpu_reg(s, a->rn);
1478     TCGv_i64 lr = cpu_reg(s, 30);
1479     if (dst == lr) {
1480         TCGv_i64 tmp = tcg_temp_new_i64();
1481         tcg_gen_mov_i64(tmp, dst);
1482         dst = tmp;
1483     }
1484     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1485     gen_a64_set_pc(s, dst);
1486     set_btype_for_blr(s);
1487     s->base.is_jmp = DISAS_JUMP;
1488     return true;
1489 }
1490 
1491 static bool trans_RET(DisasContext *s, arg_r *a)
1492 {
1493     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1494     s->base.is_jmp = DISAS_JUMP;
1495     return true;
1496 }
1497 
1498 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1499                                    TCGv_i64 modifier, bool use_key_a)
1500 {
1501     TCGv_i64 truedst;
1502     /*
1503      * Return the branch target for a BRAA/RETA/etc, which is either
1504      * just the destination dst, or that value with the pauth check
1505      * done and the code removed from the high bits.
1506      */
1507     if (!s->pauth_active) {
1508         return dst;
1509     }
1510 
1511     truedst = tcg_temp_new_i64();
1512     if (use_key_a) {
1513         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1514     } else {
1515         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1516     }
1517     return truedst;
1518 }
1519 
1520 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1521 {
1522     TCGv_i64 dst;
1523 
1524     if (!dc_isar_feature(aa64_pauth, s)) {
1525         return false;
1526     }
1527 
1528     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1529     gen_a64_set_pc(s, dst);
1530     set_btype_for_br(s, a->rn);
1531     s->base.is_jmp = DISAS_JUMP;
1532     return true;
1533 }
1534 
1535 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1536 {
1537     TCGv_i64 dst, lr;
1538 
1539     if (!dc_isar_feature(aa64_pauth, s)) {
1540         return false;
1541     }
1542 
1543     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1544     lr = cpu_reg(s, 30);
1545     if (dst == lr) {
1546         TCGv_i64 tmp = tcg_temp_new_i64();
1547         tcg_gen_mov_i64(tmp, dst);
1548         dst = tmp;
1549     }
1550     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1551     gen_a64_set_pc(s, dst);
1552     set_btype_for_blr(s);
1553     s->base.is_jmp = DISAS_JUMP;
1554     return true;
1555 }
1556 
1557 static bool trans_RETA(DisasContext *s, arg_reta *a)
1558 {
1559     TCGv_i64 dst;
1560 
1561     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1562     gen_a64_set_pc(s, dst);
1563     s->base.is_jmp = DISAS_JUMP;
1564     return true;
1565 }
1566 
1567 static bool trans_BRA(DisasContext *s, arg_bra *a)
1568 {
1569     TCGv_i64 dst;
1570 
1571     if (!dc_isar_feature(aa64_pauth, s)) {
1572         return false;
1573     }
1574     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1575     gen_a64_set_pc(s, dst);
1576     set_btype_for_br(s, a->rn);
1577     s->base.is_jmp = DISAS_JUMP;
1578     return true;
1579 }
1580 
1581 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1582 {
1583     TCGv_i64 dst, lr;
1584 
1585     if (!dc_isar_feature(aa64_pauth, s)) {
1586         return false;
1587     }
1588     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1589     lr = cpu_reg(s, 30);
1590     if (dst == lr) {
1591         TCGv_i64 tmp = tcg_temp_new_i64();
1592         tcg_gen_mov_i64(tmp, dst);
1593         dst = tmp;
1594     }
1595     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1596     gen_a64_set_pc(s, dst);
1597     set_btype_for_blr(s);
1598     s->base.is_jmp = DISAS_JUMP;
1599     return true;
1600 }
1601 
1602 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1603 {
1604     TCGv_i64 dst;
1605 
1606     if (s->current_el == 0) {
1607         return false;
1608     }
1609     if (s->trap_eret) {
1610         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1611         return true;
1612     }
1613     dst = tcg_temp_new_i64();
1614     tcg_gen_ld_i64(dst, tcg_env,
1615                    offsetof(CPUARMState, elr_el[s->current_el]));
1616 
1617     translator_io_start(&s->base);
1618 
1619     gen_helper_exception_return(tcg_env, dst);
1620     /* Must exit loop to check un-masked IRQs */
1621     s->base.is_jmp = DISAS_EXIT;
1622     return true;
1623 }
1624 
1625 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1626 {
1627     TCGv_i64 dst;
1628 
1629     if (!dc_isar_feature(aa64_pauth, s)) {
1630         return false;
1631     }
1632     if (s->current_el == 0) {
1633         return false;
1634     }
1635     /* The FGT trap takes precedence over an auth trap. */
1636     if (s->trap_eret) {
1637         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1638         return true;
1639     }
1640     dst = tcg_temp_new_i64();
1641     tcg_gen_ld_i64(dst, tcg_env,
1642                    offsetof(CPUARMState, elr_el[s->current_el]));
1643 
1644     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1645 
1646     translator_io_start(&s->base);
1647 
1648     gen_helper_exception_return(tcg_env, dst);
1649     /* Must exit loop to check un-masked IRQs */
1650     s->base.is_jmp = DISAS_EXIT;
1651     return true;
1652 }
1653 
1654 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1655 {
1656     return true;
1657 }
1658 
1659 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1660 {
1661     /*
1662      * When running in MTTCG we don't generate jumps to the yield and
1663      * WFE helpers as it won't affect the scheduling of other vCPUs.
1664      * If we wanted to more completely model WFE/SEV so we don't busy
1665      * spin unnecessarily we would need to do something more involved.
1666      */
1667     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1668         s->base.is_jmp = DISAS_YIELD;
1669     }
1670     return true;
1671 }
1672 
1673 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1674 {
1675     s->base.is_jmp = DISAS_WFI;
1676     return true;
1677 }
1678 
1679 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1680 {
1681     /*
1682      * When running in MTTCG we don't generate jumps to the yield and
1683      * WFE helpers as it won't affect the scheduling of other vCPUs.
1684      * If we wanted to more completely model WFE/SEV so we don't busy
1685      * spin unnecessarily we would need to do something more involved.
1686      */
1687     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1688         s->base.is_jmp = DISAS_WFE;
1689     }
1690     return true;
1691 }
1692 
1693 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1694 {
1695     if (s->pauth_active) {
1696         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1697     }
1698     return true;
1699 }
1700 
1701 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1702 {
1703     if (s->pauth_active) {
1704         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1705     }
1706     return true;
1707 }
1708 
1709 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1710 {
1711     if (s->pauth_active) {
1712         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1713     }
1714     return true;
1715 }
1716 
1717 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1718 {
1719     if (s->pauth_active) {
1720         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1721     }
1722     return true;
1723 }
1724 
1725 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1726 {
1727     if (s->pauth_active) {
1728         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1729     }
1730     return true;
1731 }
1732 
1733 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1734 {
1735     /* Without RAS, we must implement this as NOP. */
1736     if (dc_isar_feature(aa64_ras, s)) {
1737         /*
1738          * QEMU does not have a source of physical SErrors,
1739          * so we are only concerned with virtual SErrors.
1740          * The pseudocode in the ARM for this case is
1741          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1742          *      AArch64.vESBOperation();
1743          * Most of the condition can be evaluated at translation time.
1744          * Test for EL2 present, and defer test for SEL2 to runtime.
1745          */
1746         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1747             gen_helper_vesb(tcg_env);
1748         }
1749     }
1750     return true;
1751 }
1752 
1753 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1754 {
1755     if (s->pauth_active) {
1756         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1757     }
1758     return true;
1759 }
1760 
1761 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1762 {
1763     if (s->pauth_active) {
1764         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1765     }
1766     return true;
1767 }
1768 
1769 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1770 {
1771     if (s->pauth_active) {
1772         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1773     }
1774     return true;
1775 }
1776 
1777 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1778 {
1779     if (s->pauth_active) {
1780         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1781     }
1782     return true;
1783 }
1784 
1785 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1786 {
1787     if (s->pauth_active) {
1788         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1789     }
1790     return true;
1791 }
1792 
1793 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1794 {
1795     if (s->pauth_active) {
1796         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1797     }
1798     return true;
1799 }
1800 
1801 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1802 {
1803     if (s->pauth_active) {
1804         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1805     }
1806     return true;
1807 }
1808 
1809 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1810 {
1811     if (s->pauth_active) {
1812         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1813     }
1814     return true;
1815 }
1816 
1817 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1818 {
1819     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1820     return true;
1821 }
1822 
1823 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1824 {
1825     /* We handle DSB and DMB the same way */
1826     TCGBar bar;
1827 
1828     switch (a->types) {
1829     case 1: /* MBReqTypes_Reads */
1830         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1831         break;
1832     case 2: /* MBReqTypes_Writes */
1833         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1834         break;
1835     default: /* MBReqTypes_All */
1836         bar = TCG_BAR_SC | TCG_MO_ALL;
1837         break;
1838     }
1839     tcg_gen_mb(bar);
1840     return true;
1841 }
1842 
1843 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1844 {
1845     /*
1846      * We need to break the TB after this insn to execute
1847      * self-modifying code correctly and also to take
1848      * any pending interrupts immediately.
1849      */
1850     reset_btype(s);
1851     gen_goto_tb(s, 0, 4);
1852     return true;
1853 }
1854 
1855 static bool trans_SB(DisasContext *s, arg_SB *a)
1856 {
1857     if (!dc_isar_feature(aa64_sb, s)) {
1858         return false;
1859     }
1860     /*
1861      * TODO: There is no speculation barrier opcode for TCG;
1862      * MB and end the TB instead.
1863      */
1864     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1865     gen_goto_tb(s, 0, 4);
1866     return true;
1867 }
1868 
1869 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1870 {
1871     if (!dc_isar_feature(aa64_condm_4, s)) {
1872         return false;
1873     }
1874     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1875     return true;
1876 }
1877 
1878 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1879 {
1880     TCGv_i32 z;
1881 
1882     if (!dc_isar_feature(aa64_condm_5, s)) {
1883         return false;
1884     }
1885 
1886     z = tcg_temp_new_i32();
1887 
1888     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1889 
1890     /*
1891      * (!C & !Z) << 31
1892      * (!(C | Z)) << 31
1893      * ~((C | Z) << 31)
1894      * ~-(C | Z)
1895      * (C | Z) - 1
1896      */
1897     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1898     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1899 
1900     /* !(Z & C) */
1901     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1902     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1903 
1904     /* (!C & Z) << 31 -> -(Z & ~C) */
1905     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1906     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1907 
1908     /* C | Z */
1909     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1910 
1911     return true;
1912 }
1913 
1914 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
1915 {
1916     if (!dc_isar_feature(aa64_condm_5, s)) {
1917         return false;
1918     }
1919 
1920     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1921     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1922 
1923     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1924     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1925 
1926     tcg_gen_movi_i32(cpu_NF, 0);
1927     tcg_gen_movi_i32(cpu_VF, 0);
1928 
1929     return true;
1930 }
1931 
1932 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
1933 {
1934     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1935         return false;
1936     }
1937     if (a->imm & 1) {
1938         set_pstate_bits(PSTATE_UAO);
1939     } else {
1940         clear_pstate_bits(PSTATE_UAO);
1941     }
1942     gen_rebuild_hflags(s);
1943     s->base.is_jmp = DISAS_TOO_MANY;
1944     return true;
1945 }
1946 
1947 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
1948 {
1949     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1950         return false;
1951     }
1952     if (a->imm & 1) {
1953         set_pstate_bits(PSTATE_PAN);
1954     } else {
1955         clear_pstate_bits(PSTATE_PAN);
1956     }
1957     gen_rebuild_hflags(s);
1958     s->base.is_jmp = DISAS_TOO_MANY;
1959     return true;
1960 }
1961 
1962 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
1963 {
1964     if (s->current_el == 0) {
1965         return false;
1966     }
1967     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
1968     s->base.is_jmp = DISAS_TOO_MANY;
1969     return true;
1970 }
1971 
1972 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
1973 {
1974     if (!dc_isar_feature(aa64_ssbs, s)) {
1975         return false;
1976     }
1977     if (a->imm & 1) {
1978         set_pstate_bits(PSTATE_SSBS);
1979     } else {
1980         clear_pstate_bits(PSTATE_SSBS);
1981     }
1982     /* Don't need to rebuild hflags since SSBS is a nop */
1983     s->base.is_jmp = DISAS_TOO_MANY;
1984     return true;
1985 }
1986 
1987 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
1988 {
1989     if (!dc_isar_feature(aa64_dit, s)) {
1990         return false;
1991     }
1992     if (a->imm & 1) {
1993         set_pstate_bits(PSTATE_DIT);
1994     } else {
1995         clear_pstate_bits(PSTATE_DIT);
1996     }
1997     /* There's no need to rebuild hflags because DIT is a nop */
1998     s->base.is_jmp = DISAS_TOO_MANY;
1999     return true;
2000 }
2001 
2002 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2003 {
2004     if (dc_isar_feature(aa64_mte, s)) {
2005         /* Full MTE is enabled -- set the TCO bit as directed. */
2006         if (a->imm & 1) {
2007             set_pstate_bits(PSTATE_TCO);
2008         } else {
2009             clear_pstate_bits(PSTATE_TCO);
2010         }
2011         gen_rebuild_hflags(s);
2012         /* Many factors, including TCO, go into MTE_ACTIVE. */
2013         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2014         return true;
2015     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2016         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2017         return true;
2018     } else {
2019         /* Insn not present */
2020         return false;
2021     }
2022 }
2023 
2024 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2025 {
2026     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2027     s->base.is_jmp = DISAS_TOO_MANY;
2028     return true;
2029 }
2030 
2031 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2032 {
2033     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2034     /* Exit the cpu loop to re-evaluate pending IRQs. */
2035     s->base.is_jmp = DISAS_UPDATE_EXIT;
2036     return true;
2037 }
2038 
2039 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2040 {
2041     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2042         return false;
2043     }
2044 
2045     if (a->imm == 0) {
2046         clear_pstate_bits(PSTATE_ALLINT);
2047     } else if (s->current_el > 1) {
2048         set_pstate_bits(PSTATE_ALLINT);
2049     } else {
2050         gen_helper_msr_set_allint_el1(tcg_env);
2051     }
2052 
2053     /* Exit the cpu loop to re-evaluate pending IRQs. */
2054     s->base.is_jmp = DISAS_UPDATE_EXIT;
2055     return true;
2056 }
2057 
2058 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2059 {
2060     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2061         return false;
2062     }
2063     if (sme_access_check(s)) {
2064         int old = s->pstate_sm | (s->pstate_za << 1);
2065         int new = a->imm * 3;
2066 
2067         if ((old ^ new) & a->mask) {
2068             /* At least one bit changes. */
2069             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2070                                 tcg_constant_i32(a->mask));
2071             s->base.is_jmp = DISAS_TOO_MANY;
2072         }
2073     }
2074     return true;
2075 }
2076 
2077 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2078 {
2079     TCGv_i32 tmp = tcg_temp_new_i32();
2080     TCGv_i32 nzcv = tcg_temp_new_i32();
2081 
2082     /* build bit 31, N */
2083     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2084     /* build bit 30, Z */
2085     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2086     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2087     /* build bit 29, C */
2088     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2089     /* build bit 28, V */
2090     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2091     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2092     /* generate result */
2093     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2094 }
2095 
2096 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2097 {
2098     TCGv_i32 nzcv = tcg_temp_new_i32();
2099 
2100     /* take NZCV from R[t] */
2101     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2102 
2103     /* bit 31, N */
2104     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2105     /* bit 30, Z */
2106     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2107     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2108     /* bit 29, C */
2109     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2110     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2111     /* bit 28, V */
2112     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2113     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2114 }
2115 
2116 static void gen_sysreg_undef(DisasContext *s, bool isread,
2117                              uint8_t op0, uint8_t op1, uint8_t op2,
2118                              uint8_t crn, uint8_t crm, uint8_t rt)
2119 {
2120     /*
2121      * Generate code to emit an UNDEF with correct syndrome
2122      * information for a failed system register access.
2123      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2124      * but if FEAT_IDST is implemented then read accesses to registers
2125      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2126      * syndrome.
2127      */
2128     uint32_t syndrome;
2129 
2130     if (isread && dc_isar_feature(aa64_ids, s) &&
2131         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2132         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2133     } else {
2134         syndrome = syn_uncategorized();
2135     }
2136     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2137 }
2138 
2139 /* MRS - move from system register
2140  * MSR (register) - move to system register
2141  * SYS
2142  * SYSL
2143  * These are all essentially the same insn in 'read' and 'write'
2144  * versions, with varying op0 fields.
2145  */
2146 static void handle_sys(DisasContext *s, bool isread,
2147                        unsigned int op0, unsigned int op1, unsigned int op2,
2148                        unsigned int crn, unsigned int crm, unsigned int rt)
2149 {
2150     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2151                                       crn, crm, op0, op1, op2);
2152     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2153     bool need_exit_tb = false;
2154     bool nv_trap_to_el2 = false;
2155     bool nv_redirect_reg = false;
2156     bool skip_fp_access_checks = false;
2157     bool nv2_mem_redirect = false;
2158     TCGv_ptr tcg_ri = NULL;
2159     TCGv_i64 tcg_rt;
2160     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2161 
2162     if (crn == 11 || crn == 15) {
2163         /*
2164          * Check for TIDCP trap, which must take precedence over
2165          * the UNDEF for "no such register" etc.
2166          */
2167         switch (s->current_el) {
2168         case 0:
2169             if (dc_isar_feature(aa64_tidcp1, s)) {
2170                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2171             }
2172             break;
2173         case 1:
2174             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2175             break;
2176         }
2177     }
2178 
2179     if (!ri) {
2180         /* Unknown register; this might be a guest error or a QEMU
2181          * unimplemented feature.
2182          */
2183         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2184                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2185                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2186         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2187         return;
2188     }
2189 
2190     if (s->nv2 && ri->nv2_redirect_offset) {
2191         /*
2192          * Some registers always redirect to memory; some only do so if
2193          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2194          * pairs which share an offset; see the table in R_CSRPQ).
2195          */
2196         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2197             nv2_mem_redirect = s->nv1;
2198         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2199             nv2_mem_redirect = !s->nv1;
2200         } else {
2201             nv2_mem_redirect = true;
2202         }
2203     }
2204 
2205     /* Check access permissions */
2206     if (!cp_access_ok(s->current_el, ri, isread)) {
2207         /*
2208          * FEAT_NV/NV2 handling does not do the usual FP access checks
2209          * for registers only accessible at EL2 (though it *does* do them
2210          * for registers accessible at EL1).
2211          */
2212         skip_fp_access_checks = true;
2213         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2214             /*
2215              * This is one of the few EL2 registers which should redirect
2216              * to the equivalent EL1 register. We do that after running
2217              * the EL2 register's accessfn.
2218              */
2219             nv_redirect_reg = true;
2220             assert(!nv2_mem_redirect);
2221         } else if (nv2_mem_redirect) {
2222             /*
2223              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2224              * UNDEF to EL1.
2225              */
2226         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2227             /*
2228              * This register / instruction exists and is an EL2 register, so
2229              * we must trap to EL2 if accessed in nested virtualization EL1
2230              * instead of UNDEFing. We'll do that after the usual access checks.
2231              * (This makes a difference only for a couple of registers like
2232              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2233              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2234              * an accessfn which does nothing when called from EL1, because
2235              * the trap-to-EL3 controls which would apply to that register
2236              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2237              */
2238             nv_trap_to_el2 = true;
2239         } else {
2240             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2241             return;
2242         }
2243     }
2244 
2245     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2246         /* Emit code to perform further access permissions checks at
2247          * runtime; this may result in an exception.
2248          */
2249         gen_a64_update_pc(s, 0);
2250         tcg_ri = tcg_temp_new_ptr();
2251         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2252                                        tcg_constant_i32(key),
2253                                        tcg_constant_i32(syndrome),
2254                                        tcg_constant_i32(isread));
2255     } else if (ri->type & ARM_CP_RAISES_EXC) {
2256         /*
2257          * The readfn or writefn might raise an exception;
2258          * synchronize the CPU state in case it does.
2259          */
2260         gen_a64_update_pc(s, 0);
2261     }
2262 
2263     if (!skip_fp_access_checks) {
2264         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2265             return;
2266         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2267             return;
2268         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2269             return;
2270         }
2271     }
2272 
2273     if (nv_trap_to_el2) {
2274         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2275         return;
2276     }
2277 
2278     if (nv_redirect_reg) {
2279         /*
2280          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2281          * Conveniently in all cases the encoding of the EL1 register is
2282          * identical to the EL2 register except that opc1 is 0.
2283          * Get the reginfo for the EL1 register to use for the actual access.
2284          * We don't use the EL1 register's access function, and
2285          * fine-grained-traps on EL1 also do not apply here.
2286          */
2287         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2288                                  crn, crm, op0, 0, op2);
2289         ri = get_arm_cp_reginfo(s->cp_regs, key);
2290         assert(ri);
2291         assert(cp_access_ok(s->current_el, ri, isread));
2292         /*
2293          * We might not have done an update_pc earlier, so check we don't
2294          * need it. We could support this in future if necessary.
2295          */
2296         assert(!(ri->type & ARM_CP_RAISES_EXC));
2297     }
2298 
2299     if (nv2_mem_redirect) {
2300         /*
2301          * This system register is being redirected into an EL2 memory access.
2302          * This means it is not an IO operation, doesn't change hflags,
2303          * and need not end the TB, because it has no side effects.
2304          *
2305          * The access is 64-bit single copy atomic, guaranteed aligned because
2306          * of the definition of VCNR_EL2. Its endianness depends on
2307          * SCTLR_EL2.EE, not on the data endianness of EL1.
2308          * It is done under either the EL2 translation regime or the EL2&0
2309          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2310          * PSTATE.PAN is 0.
2311          */
2312         TCGv_i64 ptr = tcg_temp_new_i64();
2313         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2314         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2315         int memidx = arm_to_core_mmu_idx(armmemidx);
2316         uint32_t syn;
2317 
2318         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2319 
2320         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2321         tcg_gen_addi_i64(ptr, ptr,
2322                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2323         tcg_rt = cpu_reg(s, rt);
2324 
2325         syn = syn_data_abort_vncr(0, !isread, 0);
2326         disas_set_insn_syndrome(s, syn);
2327         if (isread) {
2328             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2329         } else {
2330             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2331         }
2332         return;
2333     }
2334 
2335     /* Handle special cases first */
2336     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2337     case 0:
2338         break;
2339     case ARM_CP_NOP:
2340         return;
2341     case ARM_CP_NZCV:
2342         tcg_rt = cpu_reg(s, rt);
2343         if (isread) {
2344             gen_get_nzcv(tcg_rt);
2345         } else {
2346             gen_set_nzcv(tcg_rt);
2347         }
2348         return;
2349     case ARM_CP_CURRENTEL:
2350     {
2351         /*
2352          * Reads as current EL value from pstate, which is
2353          * guaranteed to be constant by the tb flags.
2354          * For nested virt we should report EL2.
2355          */
2356         int el = s->nv ? 2 : s->current_el;
2357         tcg_rt = cpu_reg(s, rt);
2358         tcg_gen_movi_i64(tcg_rt, el << 2);
2359         return;
2360     }
2361     case ARM_CP_DC_ZVA:
2362         /* Writes clear the aligned block of memory which rt points into. */
2363         if (s->mte_active[0]) {
2364             int desc = 0;
2365 
2366             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2367             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2368             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2369 
2370             tcg_rt = tcg_temp_new_i64();
2371             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2372                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2373         } else {
2374             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2375         }
2376         gen_helper_dc_zva(tcg_env, tcg_rt);
2377         return;
2378     case ARM_CP_DC_GVA:
2379         {
2380             TCGv_i64 clean_addr, tag;
2381 
2382             /*
2383              * DC_GVA, like DC_ZVA, requires that we supply the original
2384              * pointer for an invalid page.  Probe that address first.
2385              */
2386             tcg_rt = cpu_reg(s, rt);
2387             clean_addr = clean_data_tbi(s, tcg_rt);
2388             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2389 
2390             if (s->ata[0]) {
2391                 /* Extract the tag from the register to match STZGM.  */
2392                 tag = tcg_temp_new_i64();
2393                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2394                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2395             }
2396         }
2397         return;
2398     case ARM_CP_DC_GZVA:
2399         {
2400             TCGv_i64 clean_addr, tag;
2401 
2402             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2403             tcg_rt = cpu_reg(s, rt);
2404             clean_addr = clean_data_tbi(s, tcg_rt);
2405             gen_helper_dc_zva(tcg_env, clean_addr);
2406 
2407             if (s->ata[0]) {
2408                 /* Extract the tag from the register to match STZGM.  */
2409                 tag = tcg_temp_new_i64();
2410                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2411                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2412             }
2413         }
2414         return;
2415     default:
2416         g_assert_not_reached();
2417     }
2418 
2419     if (ri->type & ARM_CP_IO) {
2420         /* I/O operations must end the TB here (whether read or write) */
2421         need_exit_tb = translator_io_start(&s->base);
2422     }
2423 
2424     tcg_rt = cpu_reg(s, rt);
2425 
2426     if (isread) {
2427         if (ri->type & ARM_CP_CONST) {
2428             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2429         } else if (ri->readfn) {
2430             if (!tcg_ri) {
2431                 tcg_ri = gen_lookup_cp_reg(key);
2432             }
2433             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2434         } else {
2435             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2436         }
2437     } else {
2438         if (ri->type & ARM_CP_CONST) {
2439             /* If not forbidden by access permissions, treat as WI */
2440             return;
2441         } else if (ri->writefn) {
2442             if (!tcg_ri) {
2443                 tcg_ri = gen_lookup_cp_reg(key);
2444             }
2445             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2446         } else {
2447             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2448         }
2449     }
2450 
2451     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2452         /*
2453          * A write to any coprocessor register that ends a TB
2454          * must rebuild the hflags for the next TB.
2455          */
2456         gen_rebuild_hflags(s);
2457         /*
2458          * We default to ending the TB on a coprocessor register write,
2459          * but allow this to be suppressed by the register definition
2460          * (usually only necessary to work around guest bugs).
2461          */
2462         need_exit_tb = true;
2463     }
2464     if (need_exit_tb) {
2465         s->base.is_jmp = DISAS_UPDATE_EXIT;
2466     }
2467 }
2468 
2469 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2470 {
2471     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2472     return true;
2473 }
2474 
2475 static bool trans_SVC(DisasContext *s, arg_i *a)
2476 {
2477     /*
2478      * For SVC, HVC and SMC we advance the single-step state
2479      * machine before taking the exception. This is architecturally
2480      * mandated, to ensure that single-stepping a system call
2481      * instruction works properly.
2482      */
2483     uint32_t syndrome = syn_aa64_svc(a->imm);
2484     if (s->fgt_svc) {
2485         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2486         return true;
2487     }
2488     gen_ss_advance(s);
2489     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2490     return true;
2491 }
2492 
2493 static bool trans_HVC(DisasContext *s, arg_i *a)
2494 {
2495     int target_el = s->current_el == 3 ? 3 : 2;
2496 
2497     if (s->current_el == 0) {
2498         unallocated_encoding(s);
2499         return true;
2500     }
2501     /*
2502      * The pre HVC helper handles cases when HVC gets trapped
2503      * as an undefined insn by runtime configuration.
2504      */
2505     gen_a64_update_pc(s, 0);
2506     gen_helper_pre_hvc(tcg_env);
2507     /* Architecture requires ss advance before we do the actual work */
2508     gen_ss_advance(s);
2509     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2510     return true;
2511 }
2512 
2513 static bool trans_SMC(DisasContext *s, arg_i *a)
2514 {
2515     if (s->current_el == 0) {
2516         unallocated_encoding(s);
2517         return true;
2518     }
2519     gen_a64_update_pc(s, 0);
2520     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2521     /* Architecture requires ss advance before we do the actual work */
2522     gen_ss_advance(s);
2523     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2524     return true;
2525 }
2526 
2527 static bool trans_BRK(DisasContext *s, arg_i *a)
2528 {
2529     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2530     return true;
2531 }
2532 
2533 static bool trans_HLT(DisasContext *s, arg_i *a)
2534 {
2535     /*
2536      * HLT. This has two purposes.
2537      * Architecturally, it is an external halting debug instruction.
2538      * Since QEMU doesn't implement external debug, we treat this as
2539      * it is required for halting debug disabled: it will UNDEF.
2540      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2541      */
2542     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2543         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2544     } else {
2545         unallocated_encoding(s);
2546     }
2547     return true;
2548 }
2549 
2550 /*
2551  * Load/Store exclusive instructions are implemented by remembering
2552  * the value/address loaded, and seeing if these are the same
2553  * when the store is performed. This is not actually the architecturally
2554  * mandated semantics, but it works for typical guest code sequences
2555  * and avoids having to monitor regular stores.
2556  *
2557  * The store exclusive uses the atomic cmpxchg primitives to avoid
2558  * races in multi-threaded linux-user and when MTTCG softmmu is
2559  * enabled.
2560  */
2561 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2562                                int size, bool is_pair)
2563 {
2564     int idx = get_mem_index(s);
2565     TCGv_i64 dirty_addr, clean_addr;
2566     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2567 
2568     s->is_ldex = true;
2569     dirty_addr = cpu_reg_sp(s, rn);
2570     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2571 
2572     g_assert(size <= 3);
2573     if (is_pair) {
2574         g_assert(size >= 2);
2575         if (size == 2) {
2576             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2577             if (s->be_data == MO_LE) {
2578                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2579                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2580             } else {
2581                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2582                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2583             }
2584         } else {
2585             TCGv_i128 t16 = tcg_temp_new_i128();
2586 
2587             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2588 
2589             if (s->be_data == MO_LE) {
2590                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2591                                       cpu_exclusive_high, t16);
2592             } else {
2593                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2594                                       cpu_exclusive_val, t16);
2595             }
2596             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2597             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2598         }
2599     } else {
2600         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2601         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2602     }
2603     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2604 }
2605 
2606 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2607                                 int rn, int size, int is_pair)
2608 {
2609     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2610      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2611      *     [addr] = {Rt};
2612      *     if (is_pair) {
2613      *         [addr + datasize] = {Rt2};
2614      *     }
2615      *     {Rd} = 0;
2616      * } else {
2617      *     {Rd} = 1;
2618      * }
2619      * env->exclusive_addr = -1;
2620      */
2621     TCGLabel *fail_label = gen_new_label();
2622     TCGLabel *done_label = gen_new_label();
2623     TCGv_i64 tmp, clean_addr;
2624     MemOp memop;
2625 
2626     /*
2627      * FIXME: We are out of spec here.  We have recorded only the address
2628      * from load_exclusive, not the entire range, and we assume that the
2629      * size of the access on both sides match.  The architecture allows the
2630      * store to be smaller than the load, so long as the stored bytes are
2631      * within the range recorded by the load.
2632      */
2633 
2634     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2635     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2636     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2637 
2638     /*
2639      * The write, and any associated faults, only happen if the virtual
2640      * and physical addresses pass the exclusive monitor check.  These
2641      * faults are exceedingly unlikely, because normally the guest uses
2642      * the exact same address register for the load_exclusive, and we
2643      * would have recognized these faults there.
2644      *
2645      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2646      * unaligned 4-byte write within the range of an aligned 8-byte load.
2647      * With LSE2, the store would need to cross a 16-byte boundary when the
2648      * load did not, which would mean the store is outside the range
2649      * recorded for the monitor, which would have failed a corrected monitor
2650      * check above.  For now, we assume no size change and retain the
2651      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2652      *
2653      * It is possible to trigger an MTE fault, by performing the load with
2654      * a virtual address with a valid tag and performing the store with the
2655      * same virtual address and a different invalid tag.
2656      */
2657     memop = size + is_pair;
2658     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2659         memop |= MO_ALIGN;
2660     }
2661     memop = finalize_memop(s, memop);
2662     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2663 
2664     tmp = tcg_temp_new_i64();
2665     if (is_pair) {
2666         if (size == 2) {
2667             if (s->be_data == MO_LE) {
2668                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2669             } else {
2670                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2671             }
2672             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2673                                        cpu_exclusive_val, tmp,
2674                                        get_mem_index(s), memop);
2675             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2676         } else {
2677             TCGv_i128 t16 = tcg_temp_new_i128();
2678             TCGv_i128 c16 = tcg_temp_new_i128();
2679             TCGv_i64 a, b;
2680 
2681             if (s->be_data == MO_LE) {
2682                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2683                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2684                                         cpu_exclusive_high);
2685             } else {
2686                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2687                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2688                                         cpu_exclusive_val);
2689             }
2690 
2691             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2692                                         get_mem_index(s), memop);
2693 
2694             a = tcg_temp_new_i64();
2695             b = tcg_temp_new_i64();
2696             if (s->be_data == MO_LE) {
2697                 tcg_gen_extr_i128_i64(a, b, t16);
2698             } else {
2699                 tcg_gen_extr_i128_i64(b, a, t16);
2700             }
2701 
2702             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2703             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2704             tcg_gen_or_i64(tmp, a, b);
2705 
2706             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2707         }
2708     } else {
2709         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2710                                    cpu_reg(s, rt), get_mem_index(s), memop);
2711         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2712     }
2713     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2714     tcg_gen_br(done_label);
2715 
2716     gen_set_label(fail_label);
2717     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2718     gen_set_label(done_label);
2719     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2720 }
2721 
2722 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2723                                  int rn, int size)
2724 {
2725     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2726     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2727     int memidx = get_mem_index(s);
2728     TCGv_i64 clean_addr;
2729     MemOp memop;
2730 
2731     if (rn == 31) {
2732         gen_check_sp_alignment(s);
2733     }
2734     memop = check_atomic_align(s, rn, size);
2735     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2736     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2737                                memidx, memop);
2738 }
2739 
2740 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2741                                       int rn, int size)
2742 {
2743     TCGv_i64 s1 = cpu_reg(s, rs);
2744     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2745     TCGv_i64 t1 = cpu_reg(s, rt);
2746     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2747     TCGv_i64 clean_addr;
2748     int memidx = get_mem_index(s);
2749     MemOp memop;
2750 
2751     if (rn == 31) {
2752         gen_check_sp_alignment(s);
2753     }
2754 
2755     /* This is a single atomic access, despite the "pair". */
2756     memop = check_atomic_align(s, rn, size + 1);
2757     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2758 
2759     if (size == 2) {
2760         TCGv_i64 cmp = tcg_temp_new_i64();
2761         TCGv_i64 val = tcg_temp_new_i64();
2762 
2763         if (s->be_data == MO_LE) {
2764             tcg_gen_concat32_i64(val, t1, t2);
2765             tcg_gen_concat32_i64(cmp, s1, s2);
2766         } else {
2767             tcg_gen_concat32_i64(val, t2, t1);
2768             tcg_gen_concat32_i64(cmp, s2, s1);
2769         }
2770 
2771         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2772 
2773         if (s->be_data == MO_LE) {
2774             tcg_gen_extr32_i64(s1, s2, cmp);
2775         } else {
2776             tcg_gen_extr32_i64(s2, s1, cmp);
2777         }
2778     } else {
2779         TCGv_i128 cmp = tcg_temp_new_i128();
2780         TCGv_i128 val = tcg_temp_new_i128();
2781 
2782         if (s->be_data == MO_LE) {
2783             tcg_gen_concat_i64_i128(val, t1, t2);
2784             tcg_gen_concat_i64_i128(cmp, s1, s2);
2785         } else {
2786             tcg_gen_concat_i64_i128(val, t2, t1);
2787             tcg_gen_concat_i64_i128(cmp, s2, s1);
2788         }
2789 
2790         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2791 
2792         if (s->be_data == MO_LE) {
2793             tcg_gen_extr_i128_i64(s1, s2, cmp);
2794         } else {
2795             tcg_gen_extr_i128_i64(s2, s1, cmp);
2796         }
2797     }
2798 }
2799 
2800 /*
2801  * Compute the ISS.SF bit for syndrome information if an exception
2802  * is taken on a load or store. This indicates whether the instruction
2803  * is accessing a 32-bit or 64-bit register. This logic is derived
2804  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2805  */
2806 static bool ldst_iss_sf(int size, bool sign, bool ext)
2807 {
2808 
2809     if (sign) {
2810         /*
2811          * Signed loads are 64 bit results if we are not going to
2812          * do a zero-extend from 32 to 64 after the load.
2813          * (For a store, sign and ext are always false.)
2814          */
2815         return !ext;
2816     } else {
2817         /* Unsigned loads/stores work at the specified size */
2818         return size == MO_64;
2819     }
2820 }
2821 
2822 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2823 {
2824     if (a->rn == 31) {
2825         gen_check_sp_alignment(s);
2826     }
2827     if (a->lasr) {
2828         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2829     }
2830     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2831     return true;
2832 }
2833 
2834 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2835 {
2836     if (a->rn == 31) {
2837         gen_check_sp_alignment(s);
2838     }
2839     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2840     if (a->lasr) {
2841         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2842     }
2843     return true;
2844 }
2845 
2846 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2847 {
2848     TCGv_i64 clean_addr;
2849     MemOp memop;
2850     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2851 
2852     /*
2853      * StoreLORelease is the same as Store-Release for QEMU, but
2854      * needs the feature-test.
2855      */
2856     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2857         return false;
2858     }
2859     /* Generate ISS for non-exclusive accesses including LASR.  */
2860     if (a->rn == 31) {
2861         gen_check_sp_alignment(s);
2862     }
2863     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2864     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2865     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2866                                 true, a->rn != 31, memop);
2867     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2868               iss_sf, a->lasr);
2869     return true;
2870 }
2871 
2872 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2873 {
2874     TCGv_i64 clean_addr;
2875     MemOp memop;
2876     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2877 
2878     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2879     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2880         return false;
2881     }
2882     /* Generate ISS for non-exclusive accesses including LASR.  */
2883     if (a->rn == 31) {
2884         gen_check_sp_alignment(s);
2885     }
2886     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
2887     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2888                                 false, a->rn != 31, memop);
2889     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
2890               a->rt, iss_sf, a->lasr);
2891     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2892     return true;
2893 }
2894 
2895 static bool trans_STXP(DisasContext *s, arg_stxr *a)
2896 {
2897     if (a->rn == 31) {
2898         gen_check_sp_alignment(s);
2899     }
2900     if (a->lasr) {
2901         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2902     }
2903     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
2904     return true;
2905 }
2906 
2907 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
2908 {
2909     if (a->rn == 31) {
2910         gen_check_sp_alignment(s);
2911     }
2912     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
2913     if (a->lasr) {
2914         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2915     }
2916     return true;
2917 }
2918 
2919 static bool trans_CASP(DisasContext *s, arg_CASP *a)
2920 {
2921     if (!dc_isar_feature(aa64_atomics, s)) {
2922         return false;
2923     }
2924     if (((a->rt | a->rs) & 1) != 0) {
2925         return false;
2926     }
2927 
2928     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
2929     return true;
2930 }
2931 
2932 static bool trans_CAS(DisasContext *s, arg_CAS *a)
2933 {
2934     if (!dc_isar_feature(aa64_atomics, s)) {
2935         return false;
2936     }
2937     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
2938     return true;
2939 }
2940 
2941 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
2942 {
2943     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
2944     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
2945     TCGv_i64 clean_addr = tcg_temp_new_i64();
2946     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
2947 
2948     gen_pc_plus_diff(s, clean_addr, a->imm);
2949     do_gpr_ld(s, tcg_rt, clean_addr, memop,
2950               false, true, a->rt, iss_sf, false);
2951     return true;
2952 }
2953 
2954 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
2955 {
2956     /* Load register (literal), vector version */
2957     TCGv_i64 clean_addr;
2958     MemOp memop;
2959 
2960     if (!fp_access_check(s)) {
2961         return true;
2962     }
2963     memop = finalize_memop_asimd(s, a->sz);
2964     clean_addr = tcg_temp_new_i64();
2965     gen_pc_plus_diff(s, clean_addr, a->imm);
2966     do_fp_ld(s, a->rt, clean_addr, memop);
2967     return true;
2968 }
2969 
2970 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
2971                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
2972                                  uint64_t offset, bool is_store, MemOp mop)
2973 {
2974     if (a->rn == 31) {
2975         gen_check_sp_alignment(s);
2976     }
2977 
2978     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
2979     if (!a->p) {
2980         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
2981     }
2982 
2983     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
2984                                  (a->w || a->rn != 31), 2 << a->sz, mop);
2985 }
2986 
2987 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
2988                                   TCGv_i64 dirty_addr, uint64_t offset)
2989 {
2990     if (a->w) {
2991         if (a->p) {
2992             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2993         }
2994         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
2995     }
2996 }
2997 
2998 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
2999 {
3000     uint64_t offset = a->imm << a->sz;
3001     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3002     MemOp mop = finalize_memop(s, a->sz);
3003 
3004     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3005     tcg_rt = cpu_reg(s, a->rt);
3006     tcg_rt2 = cpu_reg(s, a->rt2);
3007     /*
3008      * We built mop above for the single logical access -- rebuild it
3009      * now for the paired operation.
3010      *
3011      * With LSE2, non-sign-extending pairs are treated atomically if
3012      * aligned, and if unaligned one of the pair will be completely
3013      * within a 16-byte block and that element will be atomic.
3014      * Otherwise each element is separately atomic.
3015      * In all cases, issue one operation with the correct atomicity.
3016      */
3017     mop = a->sz + 1;
3018     if (s->align_mem) {
3019         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3020     }
3021     mop = finalize_memop_pair(s, mop);
3022     if (a->sz == 2) {
3023         TCGv_i64 tmp = tcg_temp_new_i64();
3024 
3025         if (s->be_data == MO_LE) {
3026             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3027         } else {
3028             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3029         }
3030         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3031     } else {
3032         TCGv_i128 tmp = tcg_temp_new_i128();
3033 
3034         if (s->be_data == MO_LE) {
3035             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3036         } else {
3037             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3038         }
3039         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3040     }
3041     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3042     return true;
3043 }
3044 
3045 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3046 {
3047     uint64_t offset = a->imm << a->sz;
3048     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3049     MemOp mop = finalize_memop(s, a->sz);
3050 
3051     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3052     tcg_rt = cpu_reg(s, a->rt);
3053     tcg_rt2 = cpu_reg(s, a->rt2);
3054 
3055     /*
3056      * We built mop above for the single logical access -- rebuild it
3057      * now for the paired operation.
3058      *
3059      * With LSE2, non-sign-extending pairs are treated atomically if
3060      * aligned, and if unaligned one of the pair will be completely
3061      * within a 16-byte block and that element will be atomic.
3062      * Otherwise each element is separately atomic.
3063      * In all cases, issue one operation with the correct atomicity.
3064      *
3065      * This treats sign-extending loads like zero-extending loads,
3066      * since that reuses the most code below.
3067      */
3068     mop = a->sz + 1;
3069     if (s->align_mem) {
3070         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3071     }
3072     mop = finalize_memop_pair(s, mop);
3073     if (a->sz == 2) {
3074         int o2 = s->be_data == MO_LE ? 32 : 0;
3075         int o1 = o2 ^ 32;
3076 
3077         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3078         if (a->sign) {
3079             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3080             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3081         } else {
3082             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3083             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3084         }
3085     } else {
3086         TCGv_i128 tmp = tcg_temp_new_i128();
3087 
3088         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3089         if (s->be_data == MO_LE) {
3090             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3091         } else {
3092             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3093         }
3094     }
3095     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3096     return true;
3097 }
3098 
3099 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3100 {
3101     uint64_t offset = a->imm << a->sz;
3102     TCGv_i64 clean_addr, dirty_addr;
3103     MemOp mop;
3104 
3105     if (!fp_access_check(s)) {
3106         return true;
3107     }
3108 
3109     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3110     mop = finalize_memop_asimd(s, a->sz);
3111     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3112     do_fp_st(s, a->rt, clean_addr, mop);
3113     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3114     do_fp_st(s, a->rt2, clean_addr, mop);
3115     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3116     return true;
3117 }
3118 
3119 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3120 {
3121     uint64_t offset = a->imm << a->sz;
3122     TCGv_i64 clean_addr, dirty_addr;
3123     MemOp mop;
3124 
3125     if (!fp_access_check(s)) {
3126         return true;
3127     }
3128 
3129     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3130     mop = finalize_memop_asimd(s, a->sz);
3131     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3132     do_fp_ld(s, a->rt, clean_addr, mop);
3133     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3134     do_fp_ld(s, a->rt2, clean_addr, mop);
3135     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3136     return true;
3137 }
3138 
3139 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3140 {
3141     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3142     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3143     MemOp mop;
3144     TCGv_i128 tmp;
3145 
3146     /* STGP only comes in one size. */
3147     tcg_debug_assert(a->sz == MO_64);
3148 
3149     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3150         return false;
3151     }
3152 
3153     if (a->rn == 31) {
3154         gen_check_sp_alignment(s);
3155     }
3156 
3157     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3158     if (!a->p) {
3159         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3160     }
3161 
3162     clean_addr = clean_data_tbi(s, dirty_addr);
3163     tcg_rt = cpu_reg(s, a->rt);
3164     tcg_rt2 = cpu_reg(s, a->rt2);
3165 
3166     /*
3167      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3168      * and one tag operation.  We implement it as one single aligned 16-byte
3169      * memory operation for convenience.  Note that the alignment ensures
3170      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3171      */
3172     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3173 
3174     tmp = tcg_temp_new_i128();
3175     if (s->be_data == MO_LE) {
3176         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3177     } else {
3178         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3179     }
3180     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3181 
3182     /* Perform the tag store, if tag access enabled. */
3183     if (s->ata[0]) {
3184         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3185             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3186         } else {
3187             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3188         }
3189     }
3190 
3191     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3192     return true;
3193 }
3194 
3195 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3196                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3197                                  uint64_t offset, bool is_store, MemOp mop)
3198 {
3199     int memidx;
3200 
3201     if (a->rn == 31) {
3202         gen_check_sp_alignment(s);
3203     }
3204 
3205     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3206     if (!a->p) {
3207         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3208     }
3209     memidx = get_a64_user_mem_index(s, a->unpriv);
3210     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3211                                         a->w || a->rn != 31,
3212                                         mop, a->unpriv, memidx);
3213 }
3214 
3215 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3216                                   TCGv_i64 dirty_addr, uint64_t offset)
3217 {
3218     if (a->w) {
3219         if (a->p) {
3220             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3221         }
3222         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3223     }
3224 }
3225 
3226 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3227 {
3228     bool iss_sf, iss_valid = !a->w;
3229     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3230     int memidx = get_a64_user_mem_index(s, a->unpriv);
3231     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3232 
3233     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3234 
3235     tcg_rt = cpu_reg(s, a->rt);
3236     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3237 
3238     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3239                      iss_valid, a->rt, iss_sf, false);
3240     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3241     return true;
3242 }
3243 
3244 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3245 {
3246     bool iss_sf, iss_valid = !a->w;
3247     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3248     int memidx = get_a64_user_mem_index(s, a->unpriv);
3249     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3250 
3251     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3252 
3253     tcg_rt = cpu_reg(s, a->rt);
3254     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3255 
3256     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3257                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3258     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3259     return true;
3260 }
3261 
3262 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3263 {
3264     TCGv_i64 clean_addr, dirty_addr;
3265     MemOp mop;
3266 
3267     if (!fp_access_check(s)) {
3268         return true;
3269     }
3270     mop = finalize_memop_asimd(s, a->sz);
3271     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3272     do_fp_st(s, a->rt, clean_addr, mop);
3273     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3274     return true;
3275 }
3276 
3277 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3278 {
3279     TCGv_i64 clean_addr, dirty_addr;
3280     MemOp mop;
3281 
3282     if (!fp_access_check(s)) {
3283         return true;
3284     }
3285     mop = finalize_memop_asimd(s, a->sz);
3286     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3287     do_fp_ld(s, a->rt, clean_addr, mop);
3288     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3289     return true;
3290 }
3291 
3292 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3293                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3294                              bool is_store, MemOp memop)
3295 {
3296     TCGv_i64 tcg_rm;
3297 
3298     if (a->rn == 31) {
3299         gen_check_sp_alignment(s);
3300     }
3301     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3302 
3303     tcg_rm = read_cpu_reg(s, a->rm, 1);
3304     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3305 
3306     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3307     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3308 }
3309 
3310 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3311 {
3312     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3313     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3314     MemOp memop;
3315 
3316     if (extract32(a->opt, 1, 1) == 0) {
3317         return false;
3318     }
3319 
3320     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3321     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3322     tcg_rt = cpu_reg(s, a->rt);
3323     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3324               a->ext, true, a->rt, iss_sf, false);
3325     return true;
3326 }
3327 
3328 static bool trans_STR(DisasContext *s, arg_ldst *a)
3329 {
3330     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3331     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3332     MemOp memop;
3333 
3334     if (extract32(a->opt, 1, 1) == 0) {
3335         return false;
3336     }
3337 
3338     memop = finalize_memop(s, a->sz);
3339     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3340     tcg_rt = cpu_reg(s, a->rt);
3341     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3342     return true;
3343 }
3344 
3345 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3346 {
3347     TCGv_i64 clean_addr, dirty_addr;
3348     MemOp memop;
3349 
3350     if (extract32(a->opt, 1, 1) == 0) {
3351         return false;
3352     }
3353 
3354     if (!fp_access_check(s)) {
3355         return true;
3356     }
3357 
3358     memop = finalize_memop_asimd(s, a->sz);
3359     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3360     do_fp_ld(s, a->rt, clean_addr, memop);
3361     return true;
3362 }
3363 
3364 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3365 {
3366     TCGv_i64 clean_addr, dirty_addr;
3367     MemOp memop;
3368 
3369     if (extract32(a->opt, 1, 1) == 0) {
3370         return false;
3371     }
3372 
3373     if (!fp_access_check(s)) {
3374         return true;
3375     }
3376 
3377     memop = finalize_memop_asimd(s, a->sz);
3378     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3379     do_fp_st(s, a->rt, clean_addr, memop);
3380     return true;
3381 }
3382 
3383 
3384 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3385                          int sign, bool invert)
3386 {
3387     MemOp mop = a->sz | sign;
3388     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3389 
3390     if (a->rn == 31) {
3391         gen_check_sp_alignment(s);
3392     }
3393     mop = check_atomic_align(s, a->rn, mop);
3394     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3395                                 a->rn != 31, mop);
3396     tcg_rs = read_cpu_reg(s, a->rs, true);
3397     tcg_rt = cpu_reg(s, a->rt);
3398     if (invert) {
3399         tcg_gen_not_i64(tcg_rs, tcg_rs);
3400     }
3401     /*
3402      * The tcg atomic primitives are all full barriers.  Therefore we
3403      * can ignore the Acquire and Release bits of this instruction.
3404      */
3405     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3406 
3407     if (mop & MO_SIGN) {
3408         switch (a->sz) {
3409         case MO_8:
3410             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3411             break;
3412         case MO_16:
3413             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3414             break;
3415         case MO_32:
3416             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3417             break;
3418         case MO_64:
3419             break;
3420         default:
3421             g_assert_not_reached();
3422         }
3423     }
3424     return true;
3425 }
3426 
3427 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3428 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3429 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3430 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3431 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3432 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3433 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3434 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3435 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3436 
3437 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3438 {
3439     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3440     TCGv_i64 clean_addr;
3441     MemOp mop;
3442 
3443     if (!dc_isar_feature(aa64_atomics, s) ||
3444         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3445         return false;
3446     }
3447     if (a->rn == 31) {
3448         gen_check_sp_alignment(s);
3449     }
3450     mop = check_atomic_align(s, a->rn, a->sz);
3451     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3452                                 a->rn != 31, mop);
3453     /*
3454      * LDAPR* are a special case because they are a simple load, not a
3455      * fetch-and-do-something op.
3456      * The architectural consistency requirements here are weaker than
3457      * full load-acquire (we only need "load-acquire processor consistent"),
3458      * but we choose to implement them as full LDAQ.
3459      */
3460     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3461               true, a->rt, iss_sf, true);
3462     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3463     return true;
3464 }
3465 
3466 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3467 {
3468     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3469     MemOp memop;
3470 
3471     /* Load with pointer authentication */
3472     if (!dc_isar_feature(aa64_pauth, s)) {
3473         return false;
3474     }
3475 
3476     if (a->rn == 31) {
3477         gen_check_sp_alignment(s);
3478     }
3479     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3480 
3481     if (s->pauth_active) {
3482         if (!a->m) {
3483             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3484                                       tcg_constant_i64(0));
3485         } else {
3486             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3487                                       tcg_constant_i64(0));
3488         }
3489     }
3490 
3491     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3492 
3493     memop = finalize_memop(s, MO_64);
3494 
3495     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3496     clean_addr = gen_mte_check1(s, dirty_addr, false,
3497                                 a->w || a->rn != 31, memop);
3498 
3499     tcg_rt = cpu_reg(s, a->rt);
3500     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3501               /* extend */ false, /* iss_valid */ !a->w,
3502               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3503 
3504     if (a->w) {
3505         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3506     }
3507     return true;
3508 }
3509 
3510 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3511 {
3512     TCGv_i64 clean_addr, dirty_addr;
3513     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3514     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3515 
3516     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3517         return false;
3518     }
3519 
3520     if (a->rn == 31) {
3521         gen_check_sp_alignment(s);
3522     }
3523 
3524     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3525     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3526     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3527     clean_addr = clean_data_tbi(s, dirty_addr);
3528 
3529     /*
3530      * Load-AcquirePC semantics; we implement as the slightly more
3531      * restrictive Load-Acquire.
3532      */
3533     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3534               a->rt, iss_sf, true);
3535     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3536     return true;
3537 }
3538 
3539 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3540 {
3541     TCGv_i64 clean_addr, dirty_addr;
3542     MemOp mop = a->sz;
3543     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3544 
3545     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3546         return false;
3547     }
3548 
3549     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3550 
3551     if (a->rn == 31) {
3552         gen_check_sp_alignment(s);
3553     }
3554 
3555     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3556     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3557     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3558     clean_addr = clean_data_tbi(s, dirty_addr);
3559 
3560     /* Store-Release semantics */
3561     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3562     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3563     return true;
3564 }
3565 
3566 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3567 {
3568     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3569     MemOp endian, align, mop;
3570 
3571     int total;    /* total bytes */
3572     int elements; /* elements per vector */
3573     int r;
3574     int size = a->sz;
3575 
3576     if (!a->p && a->rm != 0) {
3577         /* For non-postindexed accesses the Rm field must be 0 */
3578         return false;
3579     }
3580     if (size == 3 && !a->q && a->selem != 1) {
3581         return false;
3582     }
3583     if (!fp_access_check(s)) {
3584         return true;
3585     }
3586 
3587     if (a->rn == 31) {
3588         gen_check_sp_alignment(s);
3589     }
3590 
3591     /* For our purposes, bytes are always little-endian.  */
3592     endian = s->be_data;
3593     if (size == 0) {
3594         endian = MO_LE;
3595     }
3596 
3597     total = a->rpt * a->selem * (a->q ? 16 : 8);
3598     tcg_rn = cpu_reg_sp(s, a->rn);
3599 
3600     /*
3601      * Issue the MTE check vs the logical repeat count, before we
3602      * promote consecutive little-endian elements below.
3603      */
3604     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3605                                 finalize_memop_asimd(s, size));
3606 
3607     /*
3608      * Consecutive little-endian elements from a single register
3609      * can be promoted to a larger little-endian operation.
3610      */
3611     align = MO_ALIGN;
3612     if (a->selem == 1 && endian == MO_LE) {
3613         align = pow2_align(size);
3614         size = 3;
3615     }
3616     if (!s->align_mem) {
3617         align = 0;
3618     }
3619     mop = endian | size | align;
3620 
3621     elements = (a->q ? 16 : 8) >> size;
3622     tcg_ebytes = tcg_constant_i64(1 << size);
3623     for (r = 0; r < a->rpt; r++) {
3624         int e;
3625         for (e = 0; e < elements; e++) {
3626             int xs;
3627             for (xs = 0; xs < a->selem; xs++) {
3628                 int tt = (a->rt + r + xs) % 32;
3629                 do_vec_ld(s, tt, e, clean_addr, mop);
3630                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3631             }
3632         }
3633     }
3634 
3635     /*
3636      * For non-quad operations, setting a slice of the low 64 bits of
3637      * the register clears the high 64 bits (in the ARM ARM pseudocode
3638      * this is implicit in the fact that 'rval' is a 64 bit wide
3639      * variable).  For quad operations, we might still need to zero
3640      * the high bits of SVE.
3641      */
3642     for (r = 0; r < a->rpt * a->selem; r++) {
3643         int tt = (a->rt + r) % 32;
3644         clear_vec_high(s, a->q, tt);
3645     }
3646 
3647     if (a->p) {
3648         if (a->rm == 31) {
3649             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3650         } else {
3651             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3652         }
3653     }
3654     return true;
3655 }
3656 
3657 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3658 {
3659     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3660     MemOp endian, align, mop;
3661 
3662     int total;    /* total bytes */
3663     int elements; /* elements per vector */
3664     int r;
3665     int size = a->sz;
3666 
3667     if (!a->p && a->rm != 0) {
3668         /* For non-postindexed accesses the Rm field must be 0 */
3669         return false;
3670     }
3671     if (size == 3 && !a->q && a->selem != 1) {
3672         return false;
3673     }
3674     if (!fp_access_check(s)) {
3675         return true;
3676     }
3677 
3678     if (a->rn == 31) {
3679         gen_check_sp_alignment(s);
3680     }
3681 
3682     /* For our purposes, bytes are always little-endian.  */
3683     endian = s->be_data;
3684     if (size == 0) {
3685         endian = MO_LE;
3686     }
3687 
3688     total = a->rpt * a->selem * (a->q ? 16 : 8);
3689     tcg_rn = cpu_reg_sp(s, a->rn);
3690 
3691     /*
3692      * Issue the MTE check vs the logical repeat count, before we
3693      * promote consecutive little-endian elements below.
3694      */
3695     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3696                                 finalize_memop_asimd(s, size));
3697 
3698     /*
3699      * Consecutive little-endian elements from a single register
3700      * can be promoted to a larger little-endian operation.
3701      */
3702     align = MO_ALIGN;
3703     if (a->selem == 1 && endian == MO_LE) {
3704         align = pow2_align(size);
3705         size = 3;
3706     }
3707     if (!s->align_mem) {
3708         align = 0;
3709     }
3710     mop = endian | size | align;
3711 
3712     elements = (a->q ? 16 : 8) >> size;
3713     tcg_ebytes = tcg_constant_i64(1 << size);
3714     for (r = 0; r < a->rpt; r++) {
3715         int e;
3716         for (e = 0; e < elements; e++) {
3717             int xs;
3718             for (xs = 0; xs < a->selem; xs++) {
3719                 int tt = (a->rt + r + xs) % 32;
3720                 do_vec_st(s, tt, e, clean_addr, mop);
3721                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3722             }
3723         }
3724     }
3725 
3726     if (a->p) {
3727         if (a->rm == 31) {
3728             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3729         } else {
3730             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3731         }
3732     }
3733     return true;
3734 }
3735 
3736 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3737 {
3738     int xs, total, rt;
3739     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3740     MemOp mop;
3741 
3742     if (!a->p && a->rm != 0) {
3743         return false;
3744     }
3745     if (!fp_access_check(s)) {
3746         return true;
3747     }
3748 
3749     if (a->rn == 31) {
3750         gen_check_sp_alignment(s);
3751     }
3752 
3753     total = a->selem << a->scale;
3754     tcg_rn = cpu_reg_sp(s, a->rn);
3755 
3756     mop = finalize_memop_asimd(s, a->scale);
3757     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3758                                 total, mop);
3759 
3760     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3761     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3762         do_vec_st(s, rt, a->index, clean_addr, mop);
3763         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3764     }
3765 
3766     if (a->p) {
3767         if (a->rm == 31) {
3768             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3769         } else {
3770             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3771         }
3772     }
3773     return true;
3774 }
3775 
3776 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3777 {
3778     int xs, total, rt;
3779     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3780     MemOp mop;
3781 
3782     if (!a->p && a->rm != 0) {
3783         return false;
3784     }
3785     if (!fp_access_check(s)) {
3786         return true;
3787     }
3788 
3789     if (a->rn == 31) {
3790         gen_check_sp_alignment(s);
3791     }
3792 
3793     total = a->selem << a->scale;
3794     tcg_rn = cpu_reg_sp(s, a->rn);
3795 
3796     mop = finalize_memop_asimd(s, a->scale);
3797     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3798                                 total, mop);
3799 
3800     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3801     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3802         do_vec_ld(s, rt, a->index, clean_addr, mop);
3803         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3804     }
3805 
3806     if (a->p) {
3807         if (a->rm == 31) {
3808             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3809         } else {
3810             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3811         }
3812     }
3813     return true;
3814 }
3815 
3816 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3817 {
3818     int xs, total, rt;
3819     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3820     MemOp mop;
3821 
3822     if (!a->p && a->rm != 0) {
3823         return false;
3824     }
3825     if (!fp_access_check(s)) {
3826         return true;
3827     }
3828 
3829     if (a->rn == 31) {
3830         gen_check_sp_alignment(s);
3831     }
3832 
3833     total = a->selem << a->scale;
3834     tcg_rn = cpu_reg_sp(s, a->rn);
3835 
3836     mop = finalize_memop_asimd(s, a->scale);
3837     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3838                                 total, mop);
3839 
3840     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3841     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3842         /* Load and replicate to all elements */
3843         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3844 
3845         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3846         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3847                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3848         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3849     }
3850 
3851     if (a->p) {
3852         if (a->rm == 31) {
3853             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3854         } else {
3855             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3856         }
3857     }
3858     return true;
3859 }
3860 
3861 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3862 {
3863     TCGv_i64 addr, clean_addr, tcg_rt;
3864     int size = 4 << s->dcz_blocksize;
3865 
3866     if (!dc_isar_feature(aa64_mte, s)) {
3867         return false;
3868     }
3869     if (s->current_el == 0) {
3870         return false;
3871     }
3872 
3873     if (a->rn == 31) {
3874         gen_check_sp_alignment(s);
3875     }
3876 
3877     addr = read_cpu_reg_sp(s, a->rn, true);
3878     tcg_gen_addi_i64(addr, addr, a->imm);
3879     tcg_rt = cpu_reg(s, a->rt);
3880 
3881     if (s->ata[0]) {
3882         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
3883     }
3884     /*
3885      * The non-tags portion of STZGM is mostly like DC_ZVA,
3886      * except the alignment happens before the access.
3887      */
3888     clean_addr = clean_data_tbi(s, addr);
3889     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3890     gen_helper_dc_zva(tcg_env, clean_addr);
3891     return true;
3892 }
3893 
3894 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
3895 {
3896     TCGv_i64 addr, clean_addr, tcg_rt;
3897 
3898     if (!dc_isar_feature(aa64_mte, s)) {
3899         return false;
3900     }
3901     if (s->current_el == 0) {
3902         return false;
3903     }
3904 
3905     if (a->rn == 31) {
3906         gen_check_sp_alignment(s);
3907     }
3908 
3909     addr = read_cpu_reg_sp(s, a->rn, true);
3910     tcg_gen_addi_i64(addr, addr, a->imm);
3911     tcg_rt = cpu_reg(s, a->rt);
3912 
3913     if (s->ata[0]) {
3914         gen_helper_stgm(tcg_env, addr, tcg_rt);
3915     } else {
3916         MMUAccessType acc = MMU_DATA_STORE;
3917         int size = 4 << s->gm_blocksize;
3918 
3919         clean_addr = clean_data_tbi(s, addr);
3920         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3921         gen_probe_access(s, clean_addr, acc, size);
3922     }
3923     return true;
3924 }
3925 
3926 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
3927 {
3928     TCGv_i64 addr, clean_addr, tcg_rt;
3929 
3930     if (!dc_isar_feature(aa64_mte, s)) {
3931         return false;
3932     }
3933     if (s->current_el == 0) {
3934         return false;
3935     }
3936 
3937     if (a->rn == 31) {
3938         gen_check_sp_alignment(s);
3939     }
3940 
3941     addr = read_cpu_reg_sp(s, a->rn, true);
3942     tcg_gen_addi_i64(addr, addr, a->imm);
3943     tcg_rt = cpu_reg(s, a->rt);
3944 
3945     if (s->ata[0]) {
3946         gen_helper_ldgm(tcg_rt, tcg_env, addr);
3947     } else {
3948         MMUAccessType acc = MMU_DATA_LOAD;
3949         int size = 4 << s->gm_blocksize;
3950 
3951         clean_addr = clean_data_tbi(s, addr);
3952         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3953         gen_probe_access(s, clean_addr, acc, size);
3954         /* The result tags are zeros.  */
3955         tcg_gen_movi_i64(tcg_rt, 0);
3956     }
3957     return true;
3958 }
3959 
3960 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
3961 {
3962     TCGv_i64 addr, clean_addr, tcg_rt;
3963 
3964     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3965         return false;
3966     }
3967 
3968     if (a->rn == 31) {
3969         gen_check_sp_alignment(s);
3970     }
3971 
3972     addr = read_cpu_reg_sp(s, a->rn, true);
3973     if (!a->p) {
3974         /* pre-index or signed offset */
3975         tcg_gen_addi_i64(addr, addr, a->imm);
3976     }
3977 
3978     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
3979     tcg_rt = cpu_reg(s, a->rt);
3980     if (s->ata[0]) {
3981         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
3982     } else {
3983         /*
3984          * Tag access disabled: we must check for aborts on the load
3985          * load from [rn+offset], and then insert a 0 tag into rt.
3986          */
3987         clean_addr = clean_data_tbi(s, addr);
3988         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
3989         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
3990     }
3991 
3992     if (a->w) {
3993         /* pre-index or post-index */
3994         if (a->p) {
3995             /* post-index */
3996             tcg_gen_addi_i64(addr, addr, a->imm);
3997         }
3998         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
3999     }
4000     return true;
4001 }
4002 
4003 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4004 {
4005     TCGv_i64 addr, tcg_rt;
4006 
4007     if (a->rn == 31) {
4008         gen_check_sp_alignment(s);
4009     }
4010 
4011     addr = read_cpu_reg_sp(s, a->rn, true);
4012     if (!a->p) {
4013         /* pre-index or signed offset */
4014         tcg_gen_addi_i64(addr, addr, a->imm);
4015     }
4016     tcg_rt = cpu_reg_sp(s, a->rt);
4017     if (!s->ata[0]) {
4018         /*
4019          * For STG and ST2G, we need to check alignment and probe memory.
4020          * TODO: For STZG and STZ2G, we could rely on the stores below,
4021          * at least for system mode; user-only won't enforce alignment.
4022          */
4023         if (is_pair) {
4024             gen_helper_st2g_stub(tcg_env, addr);
4025         } else {
4026             gen_helper_stg_stub(tcg_env, addr);
4027         }
4028     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4029         if (is_pair) {
4030             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4031         } else {
4032             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4033         }
4034     } else {
4035         if (is_pair) {
4036             gen_helper_st2g(tcg_env, addr, tcg_rt);
4037         } else {
4038             gen_helper_stg(tcg_env, addr, tcg_rt);
4039         }
4040     }
4041 
4042     if (is_zero) {
4043         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4044         TCGv_i64 zero64 = tcg_constant_i64(0);
4045         TCGv_i128 zero128 = tcg_temp_new_i128();
4046         int mem_index = get_mem_index(s);
4047         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4048 
4049         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4050 
4051         /* This is 1 or 2 atomic 16-byte operations. */
4052         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4053         if (is_pair) {
4054             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4055             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4056         }
4057     }
4058 
4059     if (a->w) {
4060         /* pre-index or post-index */
4061         if (a->p) {
4062             /* post-index */
4063             tcg_gen_addi_i64(addr, addr, a->imm);
4064         }
4065         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4066     }
4067     return true;
4068 }
4069 
4070 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4071 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4072 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4073 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4074 
4075 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4076 
4077 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4078                    bool is_setg, SetFn fn)
4079 {
4080     int memidx;
4081     uint32_t syndrome, desc = 0;
4082 
4083     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4084         return false;
4085     }
4086 
4087     /*
4088      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4089      * us to pull this check before the CheckMOPSEnabled() test
4090      * (which we do in the helper function)
4091      */
4092     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4093         a->rd == 31 || a->rn == 31) {
4094         return false;
4095     }
4096 
4097     memidx = get_a64_user_mem_index(s, a->unpriv);
4098 
4099     /*
4100      * We pass option_a == true, matching our implementation;
4101      * we pass wrong_option == false: helper function may set that bit.
4102      */
4103     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4104                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4105 
4106     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4107         /* We may need to do MTE tag checking, so assemble the descriptor */
4108         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4109         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4110         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4111         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4112     }
4113     /* The helper function always needs the memidx even with MTE disabled */
4114     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4115 
4116     /*
4117      * The helper needs the register numbers, but since they're in
4118      * the syndrome anyway, we let it extract them from there rather
4119      * than passing in an extra three integer arguments.
4120      */
4121     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4122     return true;
4123 }
4124 
4125 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4126 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4127 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4128 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4129 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4130 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4131 
4132 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4133 
4134 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4135 {
4136     int rmemidx, wmemidx;
4137     uint32_t syndrome, rdesc = 0, wdesc = 0;
4138     bool wunpriv = extract32(a->options, 0, 1);
4139     bool runpriv = extract32(a->options, 1, 1);
4140 
4141     /*
4142      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4143      * us to pull this check before the CheckMOPSEnabled() test
4144      * (which we do in the helper function)
4145      */
4146     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4147         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4148         return false;
4149     }
4150 
4151     rmemidx = get_a64_user_mem_index(s, runpriv);
4152     wmemidx = get_a64_user_mem_index(s, wunpriv);
4153 
4154     /*
4155      * We pass option_a == true, matching our implementation;
4156      * we pass wrong_option == false: helper function may set that bit.
4157      */
4158     syndrome = syn_mop(false, false, a->options, is_epilogue,
4159                        false, true, a->rd, a->rs, a->rn);
4160 
4161     /* If we need to do MTE tag checking, assemble the descriptors */
4162     if (s->mte_active[runpriv]) {
4163         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4164         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4165     }
4166     if (s->mte_active[wunpriv]) {
4167         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4168         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4169         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4170     }
4171     /* The helper function needs these parts of the descriptor regardless */
4172     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4173     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4174 
4175     /*
4176      * The helper needs the register numbers, but since they're in
4177      * the syndrome anyway, we let it extract them from there rather
4178      * than passing in an extra three integer arguments.
4179      */
4180     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4181        tcg_constant_i32(rdesc));
4182     return true;
4183 }
4184 
4185 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4186 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4187 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4188 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4189 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4190 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4191 
4192 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4193 
4194 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4195                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4196 {
4197     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4198     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4199     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4200 
4201     fn(tcg_rd, tcg_rn, tcg_imm);
4202     if (!a->sf) {
4203         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4204     }
4205     return true;
4206 }
4207 
4208 /*
4209  * PC-rel. addressing
4210  */
4211 
4212 static bool trans_ADR(DisasContext *s, arg_ri *a)
4213 {
4214     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4215     return true;
4216 }
4217 
4218 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4219 {
4220     int64_t offset = (int64_t)a->imm << 12;
4221 
4222     /* The page offset is ok for CF_PCREL. */
4223     offset -= s->pc_curr & 0xfff;
4224     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4225     return true;
4226 }
4227 
4228 /*
4229  * Add/subtract (immediate)
4230  */
4231 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4232 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4233 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4234 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4235 
4236 /*
4237  * Add/subtract (immediate, with tags)
4238  */
4239 
4240 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4241                                       bool sub_op)
4242 {
4243     TCGv_i64 tcg_rn, tcg_rd;
4244     int imm;
4245 
4246     imm = a->uimm6 << LOG2_TAG_GRANULE;
4247     if (sub_op) {
4248         imm = -imm;
4249     }
4250 
4251     tcg_rn = cpu_reg_sp(s, a->rn);
4252     tcg_rd = cpu_reg_sp(s, a->rd);
4253 
4254     if (s->ata[0]) {
4255         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4256                            tcg_constant_i32(imm),
4257                            tcg_constant_i32(a->uimm4));
4258     } else {
4259         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4260         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4261     }
4262     return true;
4263 }
4264 
4265 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4266 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4267 
4268 /* The input should be a value in the bottom e bits (with higher
4269  * bits zero); returns that value replicated into every element
4270  * of size e in a 64 bit integer.
4271  */
4272 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4273 {
4274     assert(e != 0);
4275     while (e < 64) {
4276         mask |= mask << e;
4277         e *= 2;
4278     }
4279     return mask;
4280 }
4281 
4282 /*
4283  * Logical (immediate)
4284  */
4285 
4286 /*
4287  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4288  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4289  * value (ie should cause a guest UNDEF exception), and true if they are
4290  * valid, in which case the decoded bit pattern is written to result.
4291  */
4292 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4293                             unsigned int imms, unsigned int immr)
4294 {
4295     uint64_t mask;
4296     unsigned e, levels, s, r;
4297     int len;
4298 
4299     assert(immn < 2 && imms < 64 && immr < 64);
4300 
4301     /* The bit patterns we create here are 64 bit patterns which
4302      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4303      * 64 bits each. Each element contains the same value: a run
4304      * of between 1 and e-1 non-zero bits, rotated within the
4305      * element by between 0 and e-1 bits.
4306      *
4307      * The element size and run length are encoded into immn (1 bit)
4308      * and imms (6 bits) as follows:
4309      * 64 bit elements: immn = 1, imms = <length of run - 1>
4310      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4311      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4312      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4313      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4314      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4315      * Notice that immn = 0, imms = 11111x is the only combination
4316      * not covered by one of the above options; this is reserved.
4317      * Further, <length of run - 1> all-ones is a reserved pattern.
4318      *
4319      * In all cases the rotation is by immr % e (and immr is 6 bits).
4320      */
4321 
4322     /* First determine the element size */
4323     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4324     if (len < 1) {
4325         /* This is the immn == 0, imms == 0x11111x case */
4326         return false;
4327     }
4328     e = 1 << len;
4329 
4330     levels = e - 1;
4331     s = imms & levels;
4332     r = immr & levels;
4333 
4334     if (s == levels) {
4335         /* <length of run - 1> mustn't be all-ones. */
4336         return false;
4337     }
4338 
4339     /* Create the value of one element: s+1 set bits rotated
4340      * by r within the element (which is e bits wide)...
4341      */
4342     mask = MAKE_64BIT_MASK(0, s + 1);
4343     if (r) {
4344         mask = (mask >> r) | (mask << (e - r));
4345         mask &= MAKE_64BIT_MASK(0, e);
4346     }
4347     /* ...then replicate the element over the whole 64 bit value */
4348     mask = bitfield_replicate(mask, e);
4349     *result = mask;
4350     return true;
4351 }
4352 
4353 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4354                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4355 {
4356     TCGv_i64 tcg_rd, tcg_rn;
4357     uint64_t imm;
4358 
4359     /* Some immediate field values are reserved. */
4360     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4361                                 extract32(a->dbm, 0, 6),
4362                                 extract32(a->dbm, 6, 6))) {
4363         return false;
4364     }
4365     if (!a->sf) {
4366         imm &= 0xffffffffull;
4367     }
4368 
4369     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4370     tcg_rn = cpu_reg(s, a->rn);
4371 
4372     fn(tcg_rd, tcg_rn, imm);
4373     if (set_cc) {
4374         gen_logic_CC(a->sf, tcg_rd);
4375     }
4376     if (!a->sf) {
4377         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4378     }
4379     return true;
4380 }
4381 
4382 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4383 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4384 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4385 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4386 
4387 /*
4388  * Move wide (immediate)
4389  */
4390 
4391 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4392 {
4393     int pos = a->hw << 4;
4394     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4395     return true;
4396 }
4397 
4398 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4399 {
4400     int pos = a->hw << 4;
4401     uint64_t imm = a->imm;
4402 
4403     imm = ~(imm << pos);
4404     if (!a->sf) {
4405         imm = (uint32_t)imm;
4406     }
4407     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4408     return true;
4409 }
4410 
4411 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4412 {
4413     int pos = a->hw << 4;
4414     TCGv_i64 tcg_rd, tcg_im;
4415 
4416     tcg_rd = cpu_reg(s, a->rd);
4417     tcg_im = tcg_constant_i64(a->imm);
4418     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4419     if (!a->sf) {
4420         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4421     }
4422     return true;
4423 }
4424 
4425 /*
4426  * Bitfield
4427  */
4428 
4429 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4430 {
4431     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4432     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4433     unsigned int bitsize = a->sf ? 64 : 32;
4434     unsigned int ri = a->immr;
4435     unsigned int si = a->imms;
4436     unsigned int pos, len;
4437 
4438     if (si >= ri) {
4439         /* Wd<s-r:0> = Wn<s:r> */
4440         len = (si - ri) + 1;
4441         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4442         if (!a->sf) {
4443             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4444         }
4445     } else {
4446         /* Wd<32+s-r,32-r> = Wn<s:0> */
4447         len = si + 1;
4448         pos = (bitsize - ri) & (bitsize - 1);
4449 
4450         if (len < ri) {
4451             /*
4452              * Sign extend the destination field from len to fill the
4453              * balance of the word.  Let the deposit below insert all
4454              * of those sign bits.
4455              */
4456             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4457             len = ri;
4458         }
4459 
4460         /*
4461          * We start with zero, and we haven't modified any bits outside
4462          * bitsize, therefore no final zero-extension is unneeded for !sf.
4463          */
4464         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4465     }
4466     return true;
4467 }
4468 
4469 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4470 {
4471     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4472     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4473     unsigned int bitsize = a->sf ? 64 : 32;
4474     unsigned int ri = a->immr;
4475     unsigned int si = a->imms;
4476     unsigned int pos, len;
4477 
4478     tcg_rd = cpu_reg(s, a->rd);
4479     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4480 
4481     if (si >= ri) {
4482         /* Wd<s-r:0> = Wn<s:r> */
4483         len = (si - ri) + 1;
4484         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4485     } else {
4486         /* Wd<32+s-r,32-r> = Wn<s:0> */
4487         len = si + 1;
4488         pos = (bitsize - ri) & (bitsize - 1);
4489         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4490     }
4491     return true;
4492 }
4493 
4494 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4495 {
4496     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4497     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4498     unsigned int bitsize = a->sf ? 64 : 32;
4499     unsigned int ri = a->immr;
4500     unsigned int si = a->imms;
4501     unsigned int pos, len;
4502 
4503     tcg_rd = cpu_reg(s, a->rd);
4504     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4505 
4506     if (si >= ri) {
4507         /* Wd<s-r:0> = Wn<s:r> */
4508         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4509         len = (si - ri) + 1;
4510         pos = 0;
4511     } else {
4512         /* Wd<32+s-r,32-r> = Wn<s:0> */
4513         len = si + 1;
4514         pos = (bitsize - ri) & (bitsize - 1);
4515     }
4516 
4517     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4518     if (!a->sf) {
4519         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4520     }
4521     return true;
4522 }
4523 
4524 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4525 {
4526     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4527 
4528     tcg_rd = cpu_reg(s, a->rd);
4529 
4530     if (unlikely(a->imm == 0)) {
4531         /*
4532          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4533          * so an extract from bit 0 is a special case.
4534          */
4535         if (a->sf) {
4536             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4537         } else {
4538             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4539         }
4540     } else {
4541         tcg_rm = cpu_reg(s, a->rm);
4542         tcg_rn = cpu_reg(s, a->rn);
4543 
4544         if (a->sf) {
4545             /* Specialization to ROR happens in EXTRACT2.  */
4546             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4547         } else {
4548             TCGv_i32 t0 = tcg_temp_new_i32();
4549 
4550             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4551             if (a->rm == a->rn) {
4552                 tcg_gen_rotri_i32(t0, t0, a->imm);
4553             } else {
4554                 TCGv_i32 t1 = tcg_temp_new_i32();
4555                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4556                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4557             }
4558             tcg_gen_extu_i32_i64(tcg_rd, t0);
4559         }
4560     }
4561     return true;
4562 }
4563 
4564 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4565  * Note that it is the caller's responsibility to ensure that the
4566  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4567  * mandated semantics for out of range shifts.
4568  */
4569 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4570                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4571 {
4572     switch (shift_type) {
4573     case A64_SHIFT_TYPE_LSL:
4574         tcg_gen_shl_i64(dst, src, shift_amount);
4575         break;
4576     case A64_SHIFT_TYPE_LSR:
4577         tcg_gen_shr_i64(dst, src, shift_amount);
4578         break;
4579     case A64_SHIFT_TYPE_ASR:
4580         if (!sf) {
4581             tcg_gen_ext32s_i64(dst, src);
4582         }
4583         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4584         break;
4585     case A64_SHIFT_TYPE_ROR:
4586         if (sf) {
4587             tcg_gen_rotr_i64(dst, src, shift_amount);
4588         } else {
4589             TCGv_i32 t0, t1;
4590             t0 = tcg_temp_new_i32();
4591             t1 = tcg_temp_new_i32();
4592             tcg_gen_extrl_i64_i32(t0, src);
4593             tcg_gen_extrl_i64_i32(t1, shift_amount);
4594             tcg_gen_rotr_i32(t0, t0, t1);
4595             tcg_gen_extu_i32_i64(dst, t0);
4596         }
4597         break;
4598     default:
4599         assert(FALSE); /* all shift types should be handled */
4600         break;
4601     }
4602 
4603     if (!sf) { /* zero extend final result */
4604         tcg_gen_ext32u_i64(dst, dst);
4605     }
4606 }
4607 
4608 /* Shift a TCGv src by immediate, put result in dst.
4609  * The shift amount must be in range (this should always be true as the
4610  * relevant instructions will UNDEF on bad shift immediates).
4611  */
4612 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4613                           enum a64_shift_type shift_type, unsigned int shift_i)
4614 {
4615     assert(shift_i < (sf ? 64 : 32));
4616 
4617     if (shift_i == 0) {
4618         tcg_gen_mov_i64(dst, src);
4619     } else {
4620         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4621     }
4622 }
4623 
4624 /* Logical (shifted register)
4625  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4626  * +----+-----+-----------+-------+---+------+--------+------+------+
4627  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4628  * +----+-----+-----------+-------+---+------+--------+------+------+
4629  */
4630 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4631 {
4632     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4633     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4634 
4635     sf = extract32(insn, 31, 1);
4636     opc = extract32(insn, 29, 2);
4637     shift_type = extract32(insn, 22, 2);
4638     invert = extract32(insn, 21, 1);
4639     rm = extract32(insn, 16, 5);
4640     shift_amount = extract32(insn, 10, 6);
4641     rn = extract32(insn, 5, 5);
4642     rd = extract32(insn, 0, 5);
4643 
4644     if (!sf && (shift_amount & (1 << 5))) {
4645         unallocated_encoding(s);
4646         return;
4647     }
4648 
4649     tcg_rd = cpu_reg(s, rd);
4650 
4651     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4652         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4653          * register-register MOV and MVN, so it is worth special casing.
4654          */
4655         tcg_rm = cpu_reg(s, rm);
4656         if (invert) {
4657             tcg_gen_not_i64(tcg_rd, tcg_rm);
4658             if (!sf) {
4659                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4660             }
4661         } else {
4662             if (sf) {
4663                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4664             } else {
4665                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4666             }
4667         }
4668         return;
4669     }
4670 
4671     tcg_rm = read_cpu_reg(s, rm, sf);
4672 
4673     if (shift_amount) {
4674         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4675     }
4676 
4677     tcg_rn = cpu_reg(s, rn);
4678 
4679     switch (opc | (invert << 2)) {
4680     case 0: /* AND */
4681     case 3: /* ANDS */
4682         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4683         break;
4684     case 1: /* ORR */
4685         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4686         break;
4687     case 2: /* EOR */
4688         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4689         break;
4690     case 4: /* BIC */
4691     case 7: /* BICS */
4692         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4693         break;
4694     case 5: /* ORN */
4695         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4696         break;
4697     case 6: /* EON */
4698         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4699         break;
4700     default:
4701         assert(FALSE);
4702         break;
4703     }
4704 
4705     if (!sf) {
4706         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4707     }
4708 
4709     if (opc == 3) {
4710         gen_logic_CC(sf, tcg_rd);
4711     }
4712 }
4713 
4714 /*
4715  * Add/subtract (extended register)
4716  *
4717  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4718  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4719  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4720  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4721  *
4722  *  sf: 0 -> 32bit, 1 -> 64bit
4723  *  op: 0 -> add  , 1 -> sub
4724  *   S: 1 -> set flags
4725  * opt: 00
4726  * option: extension type (see DecodeRegExtend)
4727  * imm3: optional shift to Rm
4728  *
4729  * Rd = Rn + LSL(extend(Rm), amount)
4730  */
4731 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4732 {
4733     int rd = extract32(insn, 0, 5);
4734     int rn = extract32(insn, 5, 5);
4735     int imm3 = extract32(insn, 10, 3);
4736     int option = extract32(insn, 13, 3);
4737     int rm = extract32(insn, 16, 5);
4738     int opt = extract32(insn, 22, 2);
4739     bool setflags = extract32(insn, 29, 1);
4740     bool sub_op = extract32(insn, 30, 1);
4741     bool sf = extract32(insn, 31, 1);
4742 
4743     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4744     TCGv_i64 tcg_rd;
4745     TCGv_i64 tcg_result;
4746 
4747     if (imm3 > 4 || opt != 0) {
4748         unallocated_encoding(s);
4749         return;
4750     }
4751 
4752     /* non-flag setting ops may use SP */
4753     if (!setflags) {
4754         tcg_rd = cpu_reg_sp(s, rd);
4755     } else {
4756         tcg_rd = cpu_reg(s, rd);
4757     }
4758     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4759 
4760     tcg_rm = read_cpu_reg(s, rm, sf);
4761     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4762 
4763     tcg_result = tcg_temp_new_i64();
4764 
4765     if (!setflags) {
4766         if (sub_op) {
4767             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4768         } else {
4769             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4770         }
4771     } else {
4772         if (sub_op) {
4773             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4774         } else {
4775             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4776         }
4777     }
4778 
4779     if (sf) {
4780         tcg_gen_mov_i64(tcg_rd, tcg_result);
4781     } else {
4782         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4783     }
4784 }
4785 
4786 /*
4787  * Add/subtract (shifted register)
4788  *
4789  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4790  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4791  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4792  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4793  *
4794  *    sf: 0 -> 32bit, 1 -> 64bit
4795  *    op: 0 -> add  , 1 -> sub
4796  *     S: 1 -> set flags
4797  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4798  *  imm6: Shift amount to apply to Rm before the add/sub
4799  */
4800 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4801 {
4802     int rd = extract32(insn, 0, 5);
4803     int rn = extract32(insn, 5, 5);
4804     int imm6 = extract32(insn, 10, 6);
4805     int rm = extract32(insn, 16, 5);
4806     int shift_type = extract32(insn, 22, 2);
4807     bool setflags = extract32(insn, 29, 1);
4808     bool sub_op = extract32(insn, 30, 1);
4809     bool sf = extract32(insn, 31, 1);
4810 
4811     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4812     TCGv_i64 tcg_rn, tcg_rm;
4813     TCGv_i64 tcg_result;
4814 
4815     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4816         unallocated_encoding(s);
4817         return;
4818     }
4819 
4820     tcg_rn = read_cpu_reg(s, rn, sf);
4821     tcg_rm = read_cpu_reg(s, rm, sf);
4822 
4823     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4824 
4825     tcg_result = tcg_temp_new_i64();
4826 
4827     if (!setflags) {
4828         if (sub_op) {
4829             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4830         } else {
4831             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4832         }
4833     } else {
4834         if (sub_op) {
4835             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4836         } else {
4837             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4838         }
4839     }
4840 
4841     if (sf) {
4842         tcg_gen_mov_i64(tcg_rd, tcg_result);
4843     } else {
4844         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4845     }
4846 }
4847 
4848 /* Data-processing (3 source)
4849  *
4850  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4851  *  +--+------+-----------+------+------+----+------+------+------+
4852  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4853  *  +--+------+-----------+------+------+----+------+------+------+
4854  */
4855 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4856 {
4857     int rd = extract32(insn, 0, 5);
4858     int rn = extract32(insn, 5, 5);
4859     int ra = extract32(insn, 10, 5);
4860     int rm = extract32(insn, 16, 5);
4861     int op_id = (extract32(insn, 29, 3) << 4) |
4862         (extract32(insn, 21, 3) << 1) |
4863         extract32(insn, 15, 1);
4864     bool sf = extract32(insn, 31, 1);
4865     bool is_sub = extract32(op_id, 0, 1);
4866     bool is_high = extract32(op_id, 2, 1);
4867     bool is_signed = false;
4868     TCGv_i64 tcg_op1;
4869     TCGv_i64 tcg_op2;
4870     TCGv_i64 tcg_tmp;
4871 
4872     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4873     switch (op_id) {
4874     case 0x42: /* SMADDL */
4875     case 0x43: /* SMSUBL */
4876     case 0x44: /* SMULH */
4877         is_signed = true;
4878         break;
4879     case 0x0: /* MADD (32bit) */
4880     case 0x1: /* MSUB (32bit) */
4881     case 0x40: /* MADD (64bit) */
4882     case 0x41: /* MSUB (64bit) */
4883     case 0x4a: /* UMADDL */
4884     case 0x4b: /* UMSUBL */
4885     case 0x4c: /* UMULH */
4886         break;
4887     default:
4888         unallocated_encoding(s);
4889         return;
4890     }
4891 
4892     if (is_high) {
4893         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4894         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4895         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4896         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4897 
4898         if (is_signed) {
4899             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4900         } else {
4901             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4902         }
4903         return;
4904     }
4905 
4906     tcg_op1 = tcg_temp_new_i64();
4907     tcg_op2 = tcg_temp_new_i64();
4908     tcg_tmp = tcg_temp_new_i64();
4909 
4910     if (op_id < 0x42) {
4911         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4912         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4913     } else {
4914         if (is_signed) {
4915             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4916             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4917         } else {
4918             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4919             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4920         }
4921     }
4922 
4923     if (ra == 31 && !is_sub) {
4924         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4925         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4926     } else {
4927         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4928         if (is_sub) {
4929             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4930         } else {
4931             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4932         }
4933     }
4934 
4935     if (!sf) {
4936         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4937     }
4938 }
4939 
4940 /* Add/subtract (with carry)
4941  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4942  * +--+--+--+------------------------+------+-------------+------+-----+
4943  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4944  * +--+--+--+------------------------+------+-------------+------+-----+
4945  */
4946 
4947 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4948 {
4949     unsigned int sf, op, setflags, rm, rn, rd;
4950     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4951 
4952     sf = extract32(insn, 31, 1);
4953     op = extract32(insn, 30, 1);
4954     setflags = extract32(insn, 29, 1);
4955     rm = extract32(insn, 16, 5);
4956     rn = extract32(insn, 5, 5);
4957     rd = extract32(insn, 0, 5);
4958 
4959     tcg_rd = cpu_reg(s, rd);
4960     tcg_rn = cpu_reg(s, rn);
4961 
4962     if (op) {
4963         tcg_y = tcg_temp_new_i64();
4964         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4965     } else {
4966         tcg_y = cpu_reg(s, rm);
4967     }
4968 
4969     if (setflags) {
4970         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4971     } else {
4972         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4973     }
4974 }
4975 
4976 /*
4977  * Rotate right into flags
4978  *  31 30 29                21       15          10      5  4      0
4979  * +--+--+--+-----------------+--------+-----------+------+--+------+
4980  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4981  * +--+--+--+-----------------+--------+-----------+------+--+------+
4982  */
4983 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4984 {
4985     int mask = extract32(insn, 0, 4);
4986     int o2 = extract32(insn, 4, 1);
4987     int rn = extract32(insn, 5, 5);
4988     int imm6 = extract32(insn, 15, 6);
4989     int sf_op_s = extract32(insn, 29, 3);
4990     TCGv_i64 tcg_rn;
4991     TCGv_i32 nzcv;
4992 
4993     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4994         unallocated_encoding(s);
4995         return;
4996     }
4997 
4998     tcg_rn = read_cpu_reg(s, rn, 1);
4999     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
5000 
5001     nzcv = tcg_temp_new_i32();
5002     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
5003 
5004     if (mask & 8) { /* N */
5005         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
5006     }
5007     if (mask & 4) { /* Z */
5008         tcg_gen_not_i32(cpu_ZF, nzcv);
5009         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
5010     }
5011     if (mask & 2) { /* C */
5012         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
5013     }
5014     if (mask & 1) { /* V */
5015         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
5016     }
5017 }
5018 
5019 /*
5020  * Evaluate into flags
5021  *  31 30 29                21        15   14        10      5  4      0
5022  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5023  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5024  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5025  */
5026 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5027 {
5028     int o3_mask = extract32(insn, 0, 5);
5029     int rn = extract32(insn, 5, 5);
5030     int o2 = extract32(insn, 15, 6);
5031     int sz = extract32(insn, 14, 1);
5032     int sf_op_s = extract32(insn, 29, 3);
5033     TCGv_i32 tmp;
5034     int shift;
5035 
5036     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5037         !dc_isar_feature(aa64_condm_4, s)) {
5038         unallocated_encoding(s);
5039         return;
5040     }
5041     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5042 
5043     tmp = tcg_temp_new_i32();
5044     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5045     tcg_gen_shli_i32(cpu_NF, tmp, shift);
5046     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5047     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5048     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5049 }
5050 
5051 /* Conditional compare (immediate / register)
5052  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5053  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5054  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5055  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5056  *        [1]                             y                [0]       [0]
5057  */
5058 static void disas_cc(DisasContext *s, uint32_t insn)
5059 {
5060     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5061     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5062     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5063     DisasCompare c;
5064 
5065     if (!extract32(insn, 29, 1)) {
5066         unallocated_encoding(s);
5067         return;
5068     }
5069     if (insn & (1 << 10 | 1 << 4)) {
5070         unallocated_encoding(s);
5071         return;
5072     }
5073     sf = extract32(insn, 31, 1);
5074     op = extract32(insn, 30, 1);
5075     is_imm = extract32(insn, 11, 1);
5076     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5077     cond = extract32(insn, 12, 4);
5078     rn = extract32(insn, 5, 5);
5079     nzcv = extract32(insn, 0, 4);
5080 
5081     /* Set T0 = !COND.  */
5082     tcg_t0 = tcg_temp_new_i32();
5083     arm_test_cc(&c, cond);
5084     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5085 
5086     /* Load the arguments for the new comparison.  */
5087     if (is_imm) {
5088         tcg_y = tcg_temp_new_i64();
5089         tcg_gen_movi_i64(tcg_y, y);
5090     } else {
5091         tcg_y = cpu_reg(s, y);
5092     }
5093     tcg_rn = cpu_reg(s, rn);
5094 
5095     /* Set the flags for the new comparison.  */
5096     tcg_tmp = tcg_temp_new_i64();
5097     if (op) {
5098         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5099     } else {
5100         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5101     }
5102 
5103     /* If COND was false, force the flags to #nzcv.  Compute two masks
5104      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5105      * For tcg hosts that support ANDC, we can make do with just T1.
5106      * In either case, allow the tcg optimizer to delete any unused mask.
5107      */
5108     tcg_t1 = tcg_temp_new_i32();
5109     tcg_t2 = tcg_temp_new_i32();
5110     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5111     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5112 
5113     if (nzcv & 8) { /* N */
5114         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5115     } else {
5116         if (TCG_TARGET_HAS_andc_i32) {
5117             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5118         } else {
5119             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5120         }
5121     }
5122     if (nzcv & 4) { /* Z */
5123         if (TCG_TARGET_HAS_andc_i32) {
5124             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5125         } else {
5126             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5127         }
5128     } else {
5129         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5130     }
5131     if (nzcv & 2) { /* C */
5132         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5133     } else {
5134         if (TCG_TARGET_HAS_andc_i32) {
5135             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5136         } else {
5137             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5138         }
5139     }
5140     if (nzcv & 1) { /* V */
5141         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5142     } else {
5143         if (TCG_TARGET_HAS_andc_i32) {
5144             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5145         } else {
5146             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5147         }
5148     }
5149 }
5150 
5151 /* Conditional select
5152  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5153  * +----+----+---+-----------------+------+------+-----+------+------+
5154  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5155  * +----+----+---+-----------------+------+------+-----+------+------+
5156  */
5157 static void disas_cond_select(DisasContext *s, uint32_t insn)
5158 {
5159     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5160     TCGv_i64 tcg_rd, zero;
5161     DisasCompare64 c;
5162 
5163     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5164         /* S == 1 or op2<1> == 1 */
5165         unallocated_encoding(s);
5166         return;
5167     }
5168     sf = extract32(insn, 31, 1);
5169     else_inv = extract32(insn, 30, 1);
5170     rm = extract32(insn, 16, 5);
5171     cond = extract32(insn, 12, 4);
5172     else_inc = extract32(insn, 10, 1);
5173     rn = extract32(insn, 5, 5);
5174     rd = extract32(insn, 0, 5);
5175 
5176     tcg_rd = cpu_reg(s, rd);
5177 
5178     a64_test_cc(&c, cond);
5179     zero = tcg_constant_i64(0);
5180 
5181     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5182         /* CSET & CSETM.  */
5183         if (else_inv) {
5184             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
5185                                    tcg_rd, c.value, zero);
5186         } else {
5187             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
5188                                 tcg_rd, c.value, zero);
5189         }
5190     } else {
5191         TCGv_i64 t_true = cpu_reg(s, rn);
5192         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5193         if (else_inv && else_inc) {
5194             tcg_gen_neg_i64(t_false, t_false);
5195         } else if (else_inv) {
5196             tcg_gen_not_i64(t_false, t_false);
5197         } else if (else_inc) {
5198             tcg_gen_addi_i64(t_false, t_false, 1);
5199         }
5200         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5201     }
5202 
5203     if (!sf) {
5204         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5205     }
5206 }
5207 
5208 static void handle_clz(DisasContext *s, unsigned int sf,
5209                        unsigned int rn, unsigned int rd)
5210 {
5211     TCGv_i64 tcg_rd, tcg_rn;
5212     tcg_rd = cpu_reg(s, rd);
5213     tcg_rn = cpu_reg(s, rn);
5214 
5215     if (sf) {
5216         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5217     } else {
5218         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5219         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5220         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5221         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5222     }
5223 }
5224 
5225 static void handle_cls(DisasContext *s, unsigned int sf,
5226                        unsigned int rn, unsigned int rd)
5227 {
5228     TCGv_i64 tcg_rd, tcg_rn;
5229     tcg_rd = cpu_reg(s, rd);
5230     tcg_rn = cpu_reg(s, rn);
5231 
5232     if (sf) {
5233         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5234     } else {
5235         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5236         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5237         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5238         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5239     }
5240 }
5241 
5242 static void handle_rbit(DisasContext *s, unsigned int sf,
5243                         unsigned int rn, unsigned int rd)
5244 {
5245     TCGv_i64 tcg_rd, tcg_rn;
5246     tcg_rd = cpu_reg(s, rd);
5247     tcg_rn = cpu_reg(s, rn);
5248 
5249     if (sf) {
5250         gen_helper_rbit64(tcg_rd, tcg_rn);
5251     } else {
5252         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5253         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5254         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5255         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5256     }
5257 }
5258 
5259 /* REV with sf==1, opcode==3 ("REV64") */
5260 static void handle_rev64(DisasContext *s, unsigned int sf,
5261                          unsigned int rn, unsigned int rd)
5262 {
5263     if (!sf) {
5264         unallocated_encoding(s);
5265         return;
5266     }
5267     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5268 }
5269 
5270 /* REV with sf==0, opcode==2
5271  * REV32 (sf==1, opcode==2)
5272  */
5273 static void handle_rev32(DisasContext *s, unsigned int sf,
5274                          unsigned int rn, unsigned int rd)
5275 {
5276     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5277     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5278 
5279     if (sf) {
5280         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5281         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5282     } else {
5283         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5284     }
5285 }
5286 
5287 /* REV16 (opcode==1) */
5288 static void handle_rev16(DisasContext *s, unsigned int sf,
5289                          unsigned int rn, unsigned int rd)
5290 {
5291     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5292     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5293     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5294     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5295 
5296     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5297     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5298     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5299     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5300     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5301 }
5302 
5303 /* Data-processing (1 source)
5304  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5305  * +----+---+---+-----------------+---------+--------+------+------+
5306  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5307  * +----+---+---+-----------------+---------+--------+------+------+
5308  */
5309 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5310 {
5311     unsigned int sf, opcode, opcode2, rn, rd;
5312     TCGv_i64 tcg_rd;
5313 
5314     if (extract32(insn, 29, 1)) {
5315         unallocated_encoding(s);
5316         return;
5317     }
5318 
5319     sf = extract32(insn, 31, 1);
5320     opcode = extract32(insn, 10, 6);
5321     opcode2 = extract32(insn, 16, 5);
5322     rn = extract32(insn, 5, 5);
5323     rd = extract32(insn, 0, 5);
5324 
5325 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5326 
5327     switch (MAP(sf, opcode2, opcode)) {
5328     case MAP(0, 0x00, 0x00): /* RBIT */
5329     case MAP(1, 0x00, 0x00):
5330         handle_rbit(s, sf, rn, rd);
5331         break;
5332     case MAP(0, 0x00, 0x01): /* REV16 */
5333     case MAP(1, 0x00, 0x01):
5334         handle_rev16(s, sf, rn, rd);
5335         break;
5336     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5337     case MAP(1, 0x00, 0x02):
5338         handle_rev32(s, sf, rn, rd);
5339         break;
5340     case MAP(1, 0x00, 0x03): /* REV64 */
5341         handle_rev64(s, sf, rn, rd);
5342         break;
5343     case MAP(0, 0x00, 0x04): /* CLZ */
5344     case MAP(1, 0x00, 0x04):
5345         handle_clz(s, sf, rn, rd);
5346         break;
5347     case MAP(0, 0x00, 0x05): /* CLS */
5348     case MAP(1, 0x00, 0x05):
5349         handle_cls(s, sf, rn, rd);
5350         break;
5351     case MAP(1, 0x01, 0x00): /* PACIA */
5352         if (s->pauth_active) {
5353             tcg_rd = cpu_reg(s, rd);
5354             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5355         } else if (!dc_isar_feature(aa64_pauth, s)) {
5356             goto do_unallocated;
5357         }
5358         break;
5359     case MAP(1, 0x01, 0x01): /* PACIB */
5360         if (s->pauth_active) {
5361             tcg_rd = cpu_reg(s, rd);
5362             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5363         } else if (!dc_isar_feature(aa64_pauth, s)) {
5364             goto do_unallocated;
5365         }
5366         break;
5367     case MAP(1, 0x01, 0x02): /* PACDA */
5368         if (s->pauth_active) {
5369             tcg_rd = cpu_reg(s, rd);
5370             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5371         } else if (!dc_isar_feature(aa64_pauth, s)) {
5372             goto do_unallocated;
5373         }
5374         break;
5375     case MAP(1, 0x01, 0x03): /* PACDB */
5376         if (s->pauth_active) {
5377             tcg_rd = cpu_reg(s, rd);
5378             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5379         } else if (!dc_isar_feature(aa64_pauth, s)) {
5380             goto do_unallocated;
5381         }
5382         break;
5383     case MAP(1, 0x01, 0x04): /* AUTIA */
5384         if (s->pauth_active) {
5385             tcg_rd = cpu_reg(s, rd);
5386             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5387         } else if (!dc_isar_feature(aa64_pauth, s)) {
5388             goto do_unallocated;
5389         }
5390         break;
5391     case MAP(1, 0x01, 0x05): /* AUTIB */
5392         if (s->pauth_active) {
5393             tcg_rd = cpu_reg(s, rd);
5394             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5395         } else if (!dc_isar_feature(aa64_pauth, s)) {
5396             goto do_unallocated;
5397         }
5398         break;
5399     case MAP(1, 0x01, 0x06): /* AUTDA */
5400         if (s->pauth_active) {
5401             tcg_rd = cpu_reg(s, rd);
5402             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5403         } else if (!dc_isar_feature(aa64_pauth, s)) {
5404             goto do_unallocated;
5405         }
5406         break;
5407     case MAP(1, 0x01, 0x07): /* AUTDB */
5408         if (s->pauth_active) {
5409             tcg_rd = cpu_reg(s, rd);
5410             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5411         } else if (!dc_isar_feature(aa64_pauth, s)) {
5412             goto do_unallocated;
5413         }
5414         break;
5415     case MAP(1, 0x01, 0x08): /* PACIZA */
5416         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5417             goto do_unallocated;
5418         } else if (s->pauth_active) {
5419             tcg_rd = cpu_reg(s, rd);
5420             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5421         }
5422         break;
5423     case MAP(1, 0x01, 0x09): /* PACIZB */
5424         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5425             goto do_unallocated;
5426         } else if (s->pauth_active) {
5427             tcg_rd = cpu_reg(s, rd);
5428             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5429         }
5430         break;
5431     case MAP(1, 0x01, 0x0a): /* PACDZA */
5432         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5433             goto do_unallocated;
5434         } else if (s->pauth_active) {
5435             tcg_rd = cpu_reg(s, rd);
5436             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5437         }
5438         break;
5439     case MAP(1, 0x01, 0x0b): /* PACDZB */
5440         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5441             goto do_unallocated;
5442         } else if (s->pauth_active) {
5443             tcg_rd = cpu_reg(s, rd);
5444             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5445         }
5446         break;
5447     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5448         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5449             goto do_unallocated;
5450         } else if (s->pauth_active) {
5451             tcg_rd = cpu_reg(s, rd);
5452             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5453         }
5454         break;
5455     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5456         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5457             goto do_unallocated;
5458         } else if (s->pauth_active) {
5459             tcg_rd = cpu_reg(s, rd);
5460             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5461         }
5462         break;
5463     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5464         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5465             goto do_unallocated;
5466         } else if (s->pauth_active) {
5467             tcg_rd = cpu_reg(s, rd);
5468             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5469         }
5470         break;
5471     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5472         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5473             goto do_unallocated;
5474         } else if (s->pauth_active) {
5475             tcg_rd = cpu_reg(s, rd);
5476             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5477         }
5478         break;
5479     case MAP(1, 0x01, 0x10): /* XPACI */
5480         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5481             goto do_unallocated;
5482         } else if (s->pauth_active) {
5483             tcg_rd = cpu_reg(s, rd);
5484             gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
5485         }
5486         break;
5487     case MAP(1, 0x01, 0x11): /* XPACD */
5488         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5489             goto do_unallocated;
5490         } else if (s->pauth_active) {
5491             tcg_rd = cpu_reg(s, rd);
5492             gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
5493         }
5494         break;
5495     default:
5496     do_unallocated:
5497         unallocated_encoding(s);
5498         break;
5499     }
5500 
5501 #undef MAP
5502 }
5503 
5504 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5505                        unsigned int rm, unsigned int rn, unsigned int rd)
5506 {
5507     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5508     tcg_rd = cpu_reg(s, rd);
5509 
5510     if (!sf && is_signed) {
5511         tcg_n = tcg_temp_new_i64();
5512         tcg_m = tcg_temp_new_i64();
5513         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5514         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5515     } else {
5516         tcg_n = read_cpu_reg(s, rn, sf);
5517         tcg_m = read_cpu_reg(s, rm, sf);
5518     }
5519 
5520     if (is_signed) {
5521         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5522     } else {
5523         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5524     }
5525 
5526     if (!sf) { /* zero extend final result */
5527         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5528     }
5529 }
5530 
5531 /* LSLV, LSRV, ASRV, RORV */
5532 static void handle_shift_reg(DisasContext *s,
5533                              enum a64_shift_type shift_type, unsigned int sf,
5534                              unsigned int rm, unsigned int rn, unsigned int rd)
5535 {
5536     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5537     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5538     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5539 
5540     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5541     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5542 }
5543 
5544 /* CRC32[BHWX], CRC32C[BHWX] */
5545 static void handle_crc32(DisasContext *s,
5546                          unsigned int sf, unsigned int sz, bool crc32c,
5547                          unsigned int rm, unsigned int rn, unsigned int rd)
5548 {
5549     TCGv_i64 tcg_acc, tcg_val;
5550     TCGv_i32 tcg_bytes;
5551 
5552     if (!dc_isar_feature(aa64_crc32, s)
5553         || (sf == 1 && sz != 3)
5554         || (sf == 0 && sz == 3)) {
5555         unallocated_encoding(s);
5556         return;
5557     }
5558 
5559     if (sz == 3) {
5560         tcg_val = cpu_reg(s, rm);
5561     } else {
5562         uint64_t mask;
5563         switch (sz) {
5564         case 0:
5565             mask = 0xFF;
5566             break;
5567         case 1:
5568             mask = 0xFFFF;
5569             break;
5570         case 2:
5571             mask = 0xFFFFFFFF;
5572             break;
5573         default:
5574             g_assert_not_reached();
5575         }
5576         tcg_val = tcg_temp_new_i64();
5577         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5578     }
5579 
5580     tcg_acc = cpu_reg(s, rn);
5581     tcg_bytes = tcg_constant_i32(1 << sz);
5582 
5583     if (crc32c) {
5584         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5585     } else {
5586         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5587     }
5588 }
5589 
5590 /* Data-processing (2 source)
5591  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5592  * +----+---+---+-----------------+------+--------+------+------+
5593  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5594  * +----+---+---+-----------------+------+--------+------+------+
5595  */
5596 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5597 {
5598     unsigned int sf, rm, opcode, rn, rd, setflag;
5599     sf = extract32(insn, 31, 1);
5600     setflag = extract32(insn, 29, 1);
5601     rm = extract32(insn, 16, 5);
5602     opcode = extract32(insn, 10, 6);
5603     rn = extract32(insn, 5, 5);
5604     rd = extract32(insn, 0, 5);
5605 
5606     if (setflag && opcode != 0) {
5607         unallocated_encoding(s);
5608         return;
5609     }
5610 
5611     switch (opcode) {
5612     case 0: /* SUBP(S) */
5613         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5614             goto do_unallocated;
5615         } else {
5616             TCGv_i64 tcg_n, tcg_m, tcg_d;
5617 
5618             tcg_n = read_cpu_reg_sp(s, rn, true);
5619             tcg_m = read_cpu_reg_sp(s, rm, true);
5620             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5621             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5622             tcg_d = cpu_reg(s, rd);
5623 
5624             if (setflag) {
5625                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5626             } else {
5627                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5628             }
5629         }
5630         break;
5631     case 2: /* UDIV */
5632         handle_div(s, false, sf, rm, rn, rd);
5633         break;
5634     case 3: /* SDIV */
5635         handle_div(s, true, sf, rm, rn, rd);
5636         break;
5637     case 4: /* IRG */
5638         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5639             goto do_unallocated;
5640         }
5641         if (s->ata[0]) {
5642             gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
5643                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5644         } else {
5645             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5646                                              cpu_reg_sp(s, rn));
5647         }
5648         break;
5649     case 5: /* GMI */
5650         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5651             goto do_unallocated;
5652         } else {
5653             TCGv_i64 t = tcg_temp_new_i64();
5654 
5655             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5656             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5657             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5658         }
5659         break;
5660     case 8: /* LSLV */
5661         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5662         break;
5663     case 9: /* LSRV */
5664         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5665         break;
5666     case 10: /* ASRV */
5667         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5668         break;
5669     case 11: /* RORV */
5670         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5671         break;
5672     case 12: /* PACGA */
5673         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5674             goto do_unallocated;
5675         }
5676         gen_helper_pacga(cpu_reg(s, rd), tcg_env,
5677                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5678         break;
5679     case 16:
5680     case 17:
5681     case 18:
5682     case 19:
5683     case 20:
5684     case 21:
5685     case 22:
5686     case 23: /* CRC32 */
5687     {
5688         int sz = extract32(opcode, 0, 2);
5689         bool crc32c = extract32(opcode, 2, 1);
5690         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5691         break;
5692     }
5693     default:
5694     do_unallocated:
5695         unallocated_encoding(s);
5696         break;
5697     }
5698 }
5699 
5700 /*
5701  * Data processing - register
5702  *  31  30 29  28      25    21  20  16      10         0
5703  * +--+---+--+---+-------+-----+-------+-------+---------+
5704  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5705  * +--+---+--+---+-------+-----+-------+-------+---------+
5706  */
5707 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5708 {
5709     int op0 = extract32(insn, 30, 1);
5710     int op1 = extract32(insn, 28, 1);
5711     int op2 = extract32(insn, 21, 4);
5712     int op3 = extract32(insn, 10, 6);
5713 
5714     if (!op1) {
5715         if (op2 & 8) {
5716             if (op2 & 1) {
5717                 /* Add/sub (extended register) */
5718                 disas_add_sub_ext_reg(s, insn);
5719             } else {
5720                 /* Add/sub (shifted register) */
5721                 disas_add_sub_reg(s, insn);
5722             }
5723         } else {
5724             /* Logical (shifted register) */
5725             disas_logic_reg(s, insn);
5726         }
5727         return;
5728     }
5729 
5730     switch (op2) {
5731     case 0x0:
5732         switch (op3) {
5733         case 0x00: /* Add/subtract (with carry) */
5734             disas_adc_sbc(s, insn);
5735             break;
5736 
5737         case 0x01: /* Rotate right into flags */
5738         case 0x21:
5739             disas_rotate_right_into_flags(s, insn);
5740             break;
5741 
5742         case 0x02: /* Evaluate into flags */
5743         case 0x12:
5744         case 0x22:
5745         case 0x32:
5746             disas_evaluate_into_flags(s, insn);
5747             break;
5748 
5749         default:
5750             goto do_unallocated;
5751         }
5752         break;
5753 
5754     case 0x2: /* Conditional compare */
5755         disas_cc(s, insn); /* both imm and reg forms */
5756         break;
5757 
5758     case 0x4: /* Conditional select */
5759         disas_cond_select(s, insn);
5760         break;
5761 
5762     case 0x6: /* Data-processing */
5763         if (op0) {    /* (1 source) */
5764             disas_data_proc_1src(s, insn);
5765         } else {      /* (2 source) */
5766             disas_data_proc_2src(s, insn);
5767         }
5768         break;
5769     case 0x8 ... 0xf: /* (3 source) */
5770         disas_data_proc_3src(s, insn);
5771         break;
5772 
5773     default:
5774     do_unallocated:
5775         unallocated_encoding(s);
5776         break;
5777     }
5778 }
5779 
5780 static void handle_fp_compare(DisasContext *s, int size,
5781                               unsigned int rn, unsigned int rm,
5782                               bool cmp_with_zero, bool signal_all_nans)
5783 {
5784     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5785     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5786 
5787     if (size == MO_64) {
5788         TCGv_i64 tcg_vn, tcg_vm;
5789 
5790         tcg_vn = read_fp_dreg(s, rn);
5791         if (cmp_with_zero) {
5792             tcg_vm = tcg_constant_i64(0);
5793         } else {
5794             tcg_vm = read_fp_dreg(s, rm);
5795         }
5796         if (signal_all_nans) {
5797             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5798         } else {
5799             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5800         }
5801     } else {
5802         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5803         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5804 
5805         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5806         if (cmp_with_zero) {
5807             tcg_gen_movi_i32(tcg_vm, 0);
5808         } else {
5809             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5810         }
5811 
5812         switch (size) {
5813         case MO_32:
5814             if (signal_all_nans) {
5815                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5816             } else {
5817                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5818             }
5819             break;
5820         case MO_16:
5821             if (signal_all_nans) {
5822                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5823             } else {
5824                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5825             }
5826             break;
5827         default:
5828             g_assert_not_reached();
5829         }
5830     }
5831 
5832     gen_set_nzcv(tcg_flags);
5833 }
5834 
5835 /* Floating point compare
5836  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5837  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5838  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5839  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5840  */
5841 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5842 {
5843     unsigned int mos, type, rm, op, rn, opc, op2r;
5844     int size;
5845 
5846     mos = extract32(insn, 29, 3);
5847     type = extract32(insn, 22, 2);
5848     rm = extract32(insn, 16, 5);
5849     op = extract32(insn, 14, 2);
5850     rn = extract32(insn, 5, 5);
5851     opc = extract32(insn, 3, 2);
5852     op2r = extract32(insn, 0, 3);
5853 
5854     if (mos || op || op2r) {
5855         unallocated_encoding(s);
5856         return;
5857     }
5858 
5859     switch (type) {
5860     case 0:
5861         size = MO_32;
5862         break;
5863     case 1:
5864         size = MO_64;
5865         break;
5866     case 3:
5867         size = MO_16;
5868         if (dc_isar_feature(aa64_fp16, s)) {
5869             break;
5870         }
5871         /* fallthru */
5872     default:
5873         unallocated_encoding(s);
5874         return;
5875     }
5876 
5877     if (!fp_access_check(s)) {
5878         return;
5879     }
5880 
5881     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5882 }
5883 
5884 /* Floating point conditional compare
5885  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5886  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5887  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5888  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5889  */
5890 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5891 {
5892     unsigned int mos, type, rm, cond, rn, op, nzcv;
5893     TCGLabel *label_continue = NULL;
5894     int size;
5895 
5896     mos = extract32(insn, 29, 3);
5897     type = extract32(insn, 22, 2);
5898     rm = extract32(insn, 16, 5);
5899     cond = extract32(insn, 12, 4);
5900     rn = extract32(insn, 5, 5);
5901     op = extract32(insn, 4, 1);
5902     nzcv = extract32(insn, 0, 4);
5903 
5904     if (mos) {
5905         unallocated_encoding(s);
5906         return;
5907     }
5908 
5909     switch (type) {
5910     case 0:
5911         size = MO_32;
5912         break;
5913     case 1:
5914         size = MO_64;
5915         break;
5916     case 3:
5917         size = MO_16;
5918         if (dc_isar_feature(aa64_fp16, s)) {
5919             break;
5920         }
5921         /* fallthru */
5922     default:
5923         unallocated_encoding(s);
5924         return;
5925     }
5926 
5927     if (!fp_access_check(s)) {
5928         return;
5929     }
5930 
5931     if (cond < 0x0e) { /* not always */
5932         TCGLabel *label_match = gen_new_label();
5933         label_continue = gen_new_label();
5934         arm_gen_test_cc(cond, label_match);
5935         /* nomatch: */
5936         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5937         tcg_gen_br(label_continue);
5938         gen_set_label(label_match);
5939     }
5940 
5941     handle_fp_compare(s, size, rn, rm, false, op);
5942 
5943     if (cond < 0x0e) {
5944         gen_set_label(label_continue);
5945     }
5946 }
5947 
5948 /* Floating point conditional select
5949  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5950  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5951  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5952  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5953  */
5954 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5955 {
5956     unsigned int mos, type, rm, cond, rn, rd;
5957     TCGv_i64 t_true, t_false;
5958     DisasCompare64 c;
5959     MemOp sz;
5960 
5961     mos = extract32(insn, 29, 3);
5962     type = extract32(insn, 22, 2);
5963     rm = extract32(insn, 16, 5);
5964     cond = extract32(insn, 12, 4);
5965     rn = extract32(insn, 5, 5);
5966     rd = extract32(insn, 0, 5);
5967 
5968     if (mos) {
5969         unallocated_encoding(s);
5970         return;
5971     }
5972 
5973     switch (type) {
5974     case 0:
5975         sz = MO_32;
5976         break;
5977     case 1:
5978         sz = MO_64;
5979         break;
5980     case 3:
5981         sz = MO_16;
5982         if (dc_isar_feature(aa64_fp16, s)) {
5983             break;
5984         }
5985         /* fallthru */
5986     default:
5987         unallocated_encoding(s);
5988         return;
5989     }
5990 
5991     if (!fp_access_check(s)) {
5992         return;
5993     }
5994 
5995     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5996     t_true = tcg_temp_new_i64();
5997     t_false = tcg_temp_new_i64();
5998     read_vec_element(s, t_true, rn, 0, sz);
5999     read_vec_element(s, t_false, rm, 0, sz);
6000 
6001     a64_test_cc(&c, cond);
6002     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6003                         t_true, t_false);
6004 
6005     /* Note that sregs & hregs write back zeros to the high bits,
6006        and we've already done the zero-extension.  */
6007     write_fp_dreg(s, rd, t_true);
6008 }
6009 
6010 /* Floating-point data-processing (1 source) - half precision */
6011 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
6012 {
6013     TCGv_ptr fpst = NULL;
6014     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
6015     TCGv_i32 tcg_res = tcg_temp_new_i32();
6016 
6017     switch (opcode) {
6018     case 0x0: /* FMOV */
6019         tcg_gen_mov_i32(tcg_res, tcg_op);
6020         break;
6021     case 0x1: /* FABS */
6022         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6023         break;
6024     case 0x2: /* FNEG */
6025         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6026         break;
6027     case 0x3: /* FSQRT */
6028         fpst = fpstatus_ptr(FPST_FPCR_F16);
6029         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6030         break;
6031     case 0x8: /* FRINTN */
6032     case 0x9: /* FRINTP */
6033     case 0xa: /* FRINTM */
6034     case 0xb: /* FRINTZ */
6035     case 0xc: /* FRINTA */
6036     {
6037         TCGv_i32 tcg_rmode;
6038 
6039         fpst = fpstatus_ptr(FPST_FPCR_F16);
6040         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
6041         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6042         gen_restore_rmode(tcg_rmode, fpst);
6043         break;
6044     }
6045     case 0xe: /* FRINTX */
6046         fpst = fpstatus_ptr(FPST_FPCR_F16);
6047         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6048         break;
6049     case 0xf: /* FRINTI */
6050         fpst = fpstatus_ptr(FPST_FPCR_F16);
6051         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6052         break;
6053     default:
6054         g_assert_not_reached();
6055     }
6056 
6057     write_fp_sreg(s, rd, tcg_res);
6058 }
6059 
6060 /* Floating-point data-processing (1 source) - single precision */
6061 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6062 {
6063     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6064     TCGv_i32 tcg_op, tcg_res;
6065     TCGv_ptr fpst;
6066     int rmode = -1;
6067 
6068     tcg_op = read_fp_sreg(s, rn);
6069     tcg_res = tcg_temp_new_i32();
6070 
6071     switch (opcode) {
6072     case 0x0: /* FMOV */
6073         tcg_gen_mov_i32(tcg_res, tcg_op);
6074         goto done;
6075     case 0x1: /* FABS */
6076         gen_helper_vfp_abss(tcg_res, tcg_op);
6077         goto done;
6078     case 0x2: /* FNEG */
6079         gen_helper_vfp_negs(tcg_res, tcg_op);
6080         goto done;
6081     case 0x3: /* FSQRT */
6082         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
6083         goto done;
6084     case 0x6: /* BFCVT */
6085         gen_fpst = gen_helper_bfcvt;
6086         break;
6087     case 0x8: /* FRINTN */
6088     case 0x9: /* FRINTP */
6089     case 0xa: /* FRINTM */
6090     case 0xb: /* FRINTZ */
6091     case 0xc: /* FRINTA */
6092         rmode = opcode & 7;
6093         gen_fpst = gen_helper_rints;
6094         break;
6095     case 0xe: /* FRINTX */
6096         gen_fpst = gen_helper_rints_exact;
6097         break;
6098     case 0xf: /* FRINTI */
6099         gen_fpst = gen_helper_rints;
6100         break;
6101     case 0x10: /* FRINT32Z */
6102         rmode = FPROUNDING_ZERO;
6103         gen_fpst = gen_helper_frint32_s;
6104         break;
6105     case 0x11: /* FRINT32X */
6106         gen_fpst = gen_helper_frint32_s;
6107         break;
6108     case 0x12: /* FRINT64Z */
6109         rmode = FPROUNDING_ZERO;
6110         gen_fpst = gen_helper_frint64_s;
6111         break;
6112     case 0x13: /* FRINT64X */
6113         gen_fpst = gen_helper_frint64_s;
6114         break;
6115     default:
6116         g_assert_not_reached();
6117     }
6118 
6119     fpst = fpstatus_ptr(FPST_FPCR);
6120     if (rmode >= 0) {
6121         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6122         gen_fpst(tcg_res, tcg_op, fpst);
6123         gen_restore_rmode(tcg_rmode, fpst);
6124     } else {
6125         gen_fpst(tcg_res, tcg_op, fpst);
6126     }
6127 
6128  done:
6129     write_fp_sreg(s, rd, tcg_res);
6130 }
6131 
6132 /* Floating-point data-processing (1 source) - double precision */
6133 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6134 {
6135     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6136     TCGv_i64 tcg_op, tcg_res;
6137     TCGv_ptr fpst;
6138     int rmode = -1;
6139 
6140     switch (opcode) {
6141     case 0x0: /* FMOV */
6142         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6143         return;
6144     }
6145 
6146     tcg_op = read_fp_dreg(s, rn);
6147     tcg_res = tcg_temp_new_i64();
6148 
6149     switch (opcode) {
6150     case 0x1: /* FABS */
6151         gen_helper_vfp_absd(tcg_res, tcg_op);
6152         goto done;
6153     case 0x2: /* FNEG */
6154         gen_helper_vfp_negd(tcg_res, tcg_op);
6155         goto done;
6156     case 0x3: /* FSQRT */
6157         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
6158         goto done;
6159     case 0x8: /* FRINTN */
6160     case 0x9: /* FRINTP */
6161     case 0xa: /* FRINTM */
6162     case 0xb: /* FRINTZ */
6163     case 0xc: /* FRINTA */
6164         rmode = opcode & 7;
6165         gen_fpst = gen_helper_rintd;
6166         break;
6167     case 0xe: /* FRINTX */
6168         gen_fpst = gen_helper_rintd_exact;
6169         break;
6170     case 0xf: /* FRINTI */
6171         gen_fpst = gen_helper_rintd;
6172         break;
6173     case 0x10: /* FRINT32Z */
6174         rmode = FPROUNDING_ZERO;
6175         gen_fpst = gen_helper_frint32_d;
6176         break;
6177     case 0x11: /* FRINT32X */
6178         gen_fpst = gen_helper_frint32_d;
6179         break;
6180     case 0x12: /* FRINT64Z */
6181         rmode = FPROUNDING_ZERO;
6182         gen_fpst = gen_helper_frint64_d;
6183         break;
6184     case 0x13: /* FRINT64X */
6185         gen_fpst = gen_helper_frint64_d;
6186         break;
6187     default:
6188         g_assert_not_reached();
6189     }
6190 
6191     fpst = fpstatus_ptr(FPST_FPCR);
6192     if (rmode >= 0) {
6193         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6194         gen_fpst(tcg_res, tcg_op, fpst);
6195         gen_restore_rmode(tcg_rmode, fpst);
6196     } else {
6197         gen_fpst(tcg_res, tcg_op, fpst);
6198     }
6199 
6200  done:
6201     write_fp_dreg(s, rd, tcg_res);
6202 }
6203 
6204 static void handle_fp_fcvt(DisasContext *s, int opcode,
6205                            int rd, int rn, int dtype, int ntype)
6206 {
6207     switch (ntype) {
6208     case 0x0:
6209     {
6210         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6211         if (dtype == 1) {
6212             /* Single to double */
6213             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6214             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
6215             write_fp_dreg(s, rd, tcg_rd);
6216         } else {
6217             /* Single to half */
6218             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6219             TCGv_i32 ahp = get_ahp_flag();
6220             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6221 
6222             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6223             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6224             write_fp_sreg(s, rd, tcg_rd);
6225         }
6226         break;
6227     }
6228     case 0x1:
6229     {
6230         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6231         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6232         if (dtype == 0) {
6233             /* Double to single */
6234             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
6235         } else {
6236             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6237             TCGv_i32 ahp = get_ahp_flag();
6238             /* Double to half */
6239             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6240             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6241         }
6242         write_fp_sreg(s, rd, tcg_rd);
6243         break;
6244     }
6245     case 0x3:
6246     {
6247         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6248         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6249         TCGv_i32 tcg_ahp = get_ahp_flag();
6250         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6251         if (dtype == 0) {
6252             /* Half to single */
6253             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6254             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6255             write_fp_sreg(s, rd, tcg_rd);
6256         } else {
6257             /* Half to double */
6258             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6259             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6260             write_fp_dreg(s, rd, tcg_rd);
6261         }
6262         break;
6263     }
6264     default:
6265         g_assert_not_reached();
6266     }
6267 }
6268 
6269 /* Floating point data-processing (1 source)
6270  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6271  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6272  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6273  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6274  */
6275 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6276 {
6277     int mos = extract32(insn, 29, 3);
6278     int type = extract32(insn, 22, 2);
6279     int opcode = extract32(insn, 15, 6);
6280     int rn = extract32(insn, 5, 5);
6281     int rd = extract32(insn, 0, 5);
6282 
6283     if (mos) {
6284         goto do_unallocated;
6285     }
6286 
6287     switch (opcode) {
6288     case 0x4: case 0x5: case 0x7:
6289     {
6290         /* FCVT between half, single and double precision */
6291         int dtype = extract32(opcode, 0, 2);
6292         if (type == 2 || dtype == type) {
6293             goto do_unallocated;
6294         }
6295         if (!fp_access_check(s)) {
6296             return;
6297         }
6298 
6299         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6300         break;
6301     }
6302 
6303     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6304         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6305             goto do_unallocated;
6306         }
6307         /* fall through */
6308     case 0x0 ... 0x3:
6309     case 0x8 ... 0xc:
6310     case 0xe ... 0xf:
6311         /* 32-to-32 and 64-to-64 ops */
6312         switch (type) {
6313         case 0:
6314             if (!fp_access_check(s)) {
6315                 return;
6316             }
6317             handle_fp_1src_single(s, opcode, rd, rn);
6318             break;
6319         case 1:
6320             if (!fp_access_check(s)) {
6321                 return;
6322             }
6323             handle_fp_1src_double(s, opcode, rd, rn);
6324             break;
6325         case 3:
6326             if (!dc_isar_feature(aa64_fp16, s)) {
6327                 goto do_unallocated;
6328             }
6329 
6330             if (!fp_access_check(s)) {
6331                 return;
6332             }
6333             handle_fp_1src_half(s, opcode, rd, rn);
6334             break;
6335         default:
6336             goto do_unallocated;
6337         }
6338         break;
6339 
6340     case 0x6:
6341         switch (type) {
6342         case 1: /* BFCVT */
6343             if (!dc_isar_feature(aa64_bf16, s)) {
6344                 goto do_unallocated;
6345             }
6346             if (!fp_access_check(s)) {
6347                 return;
6348             }
6349             handle_fp_1src_single(s, opcode, rd, rn);
6350             break;
6351         default:
6352             goto do_unallocated;
6353         }
6354         break;
6355 
6356     default:
6357     do_unallocated:
6358         unallocated_encoding(s);
6359         break;
6360     }
6361 }
6362 
6363 /* Floating-point data-processing (2 source) - single precision */
6364 static void handle_fp_2src_single(DisasContext *s, int opcode,
6365                                   int rd, int rn, int rm)
6366 {
6367     TCGv_i32 tcg_op1;
6368     TCGv_i32 tcg_op2;
6369     TCGv_i32 tcg_res;
6370     TCGv_ptr fpst;
6371 
6372     tcg_res = tcg_temp_new_i32();
6373     fpst = fpstatus_ptr(FPST_FPCR);
6374     tcg_op1 = read_fp_sreg(s, rn);
6375     tcg_op2 = read_fp_sreg(s, rm);
6376 
6377     switch (opcode) {
6378     case 0x0: /* FMUL */
6379         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6380         break;
6381     case 0x1: /* FDIV */
6382         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6383         break;
6384     case 0x2: /* FADD */
6385         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6386         break;
6387     case 0x3: /* FSUB */
6388         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6389         break;
6390     case 0x4: /* FMAX */
6391         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6392         break;
6393     case 0x5: /* FMIN */
6394         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6395         break;
6396     case 0x6: /* FMAXNM */
6397         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6398         break;
6399     case 0x7: /* FMINNM */
6400         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6401         break;
6402     case 0x8: /* FNMUL */
6403         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6404         gen_helper_vfp_negs(tcg_res, tcg_res);
6405         break;
6406     }
6407 
6408     write_fp_sreg(s, rd, tcg_res);
6409 }
6410 
6411 /* Floating-point data-processing (2 source) - double precision */
6412 static void handle_fp_2src_double(DisasContext *s, int opcode,
6413                                   int rd, int rn, int rm)
6414 {
6415     TCGv_i64 tcg_op1;
6416     TCGv_i64 tcg_op2;
6417     TCGv_i64 tcg_res;
6418     TCGv_ptr fpst;
6419 
6420     tcg_res = tcg_temp_new_i64();
6421     fpst = fpstatus_ptr(FPST_FPCR);
6422     tcg_op1 = read_fp_dreg(s, rn);
6423     tcg_op2 = read_fp_dreg(s, rm);
6424 
6425     switch (opcode) {
6426     case 0x0: /* FMUL */
6427         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6428         break;
6429     case 0x1: /* FDIV */
6430         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6431         break;
6432     case 0x2: /* FADD */
6433         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6434         break;
6435     case 0x3: /* FSUB */
6436         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6437         break;
6438     case 0x4: /* FMAX */
6439         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6440         break;
6441     case 0x5: /* FMIN */
6442         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6443         break;
6444     case 0x6: /* FMAXNM */
6445         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6446         break;
6447     case 0x7: /* FMINNM */
6448         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6449         break;
6450     case 0x8: /* FNMUL */
6451         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6452         gen_helper_vfp_negd(tcg_res, tcg_res);
6453         break;
6454     }
6455 
6456     write_fp_dreg(s, rd, tcg_res);
6457 }
6458 
6459 /* Floating-point data-processing (2 source) - half precision */
6460 static void handle_fp_2src_half(DisasContext *s, int opcode,
6461                                 int rd, int rn, int rm)
6462 {
6463     TCGv_i32 tcg_op1;
6464     TCGv_i32 tcg_op2;
6465     TCGv_i32 tcg_res;
6466     TCGv_ptr fpst;
6467 
6468     tcg_res = tcg_temp_new_i32();
6469     fpst = fpstatus_ptr(FPST_FPCR_F16);
6470     tcg_op1 = read_fp_hreg(s, rn);
6471     tcg_op2 = read_fp_hreg(s, rm);
6472 
6473     switch (opcode) {
6474     case 0x0: /* FMUL */
6475         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6476         break;
6477     case 0x1: /* FDIV */
6478         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6479         break;
6480     case 0x2: /* FADD */
6481         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6482         break;
6483     case 0x3: /* FSUB */
6484         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6485         break;
6486     case 0x4: /* FMAX */
6487         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6488         break;
6489     case 0x5: /* FMIN */
6490         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6491         break;
6492     case 0x6: /* FMAXNM */
6493         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6494         break;
6495     case 0x7: /* FMINNM */
6496         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6497         break;
6498     case 0x8: /* FNMUL */
6499         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6500         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6501         break;
6502     default:
6503         g_assert_not_reached();
6504     }
6505 
6506     write_fp_sreg(s, rd, tcg_res);
6507 }
6508 
6509 /* Floating point data-processing (2 source)
6510  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6511  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6512  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6513  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6514  */
6515 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6516 {
6517     int mos = extract32(insn, 29, 3);
6518     int type = extract32(insn, 22, 2);
6519     int rd = extract32(insn, 0, 5);
6520     int rn = extract32(insn, 5, 5);
6521     int rm = extract32(insn, 16, 5);
6522     int opcode = extract32(insn, 12, 4);
6523 
6524     if (opcode > 8 || mos) {
6525         unallocated_encoding(s);
6526         return;
6527     }
6528 
6529     switch (type) {
6530     case 0:
6531         if (!fp_access_check(s)) {
6532             return;
6533         }
6534         handle_fp_2src_single(s, opcode, rd, rn, rm);
6535         break;
6536     case 1:
6537         if (!fp_access_check(s)) {
6538             return;
6539         }
6540         handle_fp_2src_double(s, opcode, rd, rn, rm);
6541         break;
6542     case 3:
6543         if (!dc_isar_feature(aa64_fp16, s)) {
6544             unallocated_encoding(s);
6545             return;
6546         }
6547         if (!fp_access_check(s)) {
6548             return;
6549         }
6550         handle_fp_2src_half(s, opcode, rd, rn, rm);
6551         break;
6552     default:
6553         unallocated_encoding(s);
6554     }
6555 }
6556 
6557 /* Floating-point data-processing (3 source) - single precision */
6558 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6559                                   int rd, int rn, int rm, int ra)
6560 {
6561     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6562     TCGv_i32 tcg_res = tcg_temp_new_i32();
6563     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6564 
6565     tcg_op1 = read_fp_sreg(s, rn);
6566     tcg_op2 = read_fp_sreg(s, rm);
6567     tcg_op3 = read_fp_sreg(s, ra);
6568 
6569     /* These are fused multiply-add, and must be done as one
6570      * floating point operation with no rounding between the
6571      * multiplication and addition steps.
6572      * NB that doing the negations here as separate steps is
6573      * correct : an input NaN should come out with its sign bit
6574      * flipped if it is a negated-input.
6575      */
6576     if (o1 == true) {
6577         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6578     }
6579 
6580     if (o0 != o1) {
6581         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6582     }
6583 
6584     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6585 
6586     write_fp_sreg(s, rd, tcg_res);
6587 }
6588 
6589 /* Floating-point data-processing (3 source) - double precision */
6590 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6591                                   int rd, int rn, int rm, int ra)
6592 {
6593     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6594     TCGv_i64 tcg_res = tcg_temp_new_i64();
6595     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6596 
6597     tcg_op1 = read_fp_dreg(s, rn);
6598     tcg_op2 = read_fp_dreg(s, rm);
6599     tcg_op3 = read_fp_dreg(s, ra);
6600 
6601     /* These are fused multiply-add, and must be done as one
6602      * floating point operation with no rounding between the
6603      * multiplication and addition steps.
6604      * NB that doing the negations here as separate steps is
6605      * correct : an input NaN should come out with its sign bit
6606      * flipped if it is a negated-input.
6607      */
6608     if (o1 == true) {
6609         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6610     }
6611 
6612     if (o0 != o1) {
6613         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6614     }
6615 
6616     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6617 
6618     write_fp_dreg(s, rd, tcg_res);
6619 }
6620 
6621 /* Floating-point data-processing (3 source) - half precision */
6622 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6623                                 int rd, int rn, int rm, int ra)
6624 {
6625     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6626     TCGv_i32 tcg_res = tcg_temp_new_i32();
6627     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6628 
6629     tcg_op1 = read_fp_hreg(s, rn);
6630     tcg_op2 = read_fp_hreg(s, rm);
6631     tcg_op3 = read_fp_hreg(s, ra);
6632 
6633     /* These are fused multiply-add, and must be done as one
6634      * floating point operation with no rounding between the
6635      * multiplication and addition steps.
6636      * NB that doing the negations here as separate steps is
6637      * correct : an input NaN should come out with its sign bit
6638      * flipped if it is a negated-input.
6639      */
6640     if (o1 == true) {
6641         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6642     }
6643 
6644     if (o0 != o1) {
6645         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6646     }
6647 
6648     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6649 
6650     write_fp_sreg(s, rd, tcg_res);
6651 }
6652 
6653 /* Floating point data-processing (3 source)
6654  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6655  * +---+---+---+-----------+------+----+------+----+------+------+------+
6656  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6657  * +---+---+---+-----------+------+----+------+----+------+------+------+
6658  */
6659 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6660 {
6661     int mos = extract32(insn, 29, 3);
6662     int type = extract32(insn, 22, 2);
6663     int rd = extract32(insn, 0, 5);
6664     int rn = extract32(insn, 5, 5);
6665     int ra = extract32(insn, 10, 5);
6666     int rm = extract32(insn, 16, 5);
6667     bool o0 = extract32(insn, 15, 1);
6668     bool o1 = extract32(insn, 21, 1);
6669 
6670     if (mos) {
6671         unallocated_encoding(s);
6672         return;
6673     }
6674 
6675     switch (type) {
6676     case 0:
6677         if (!fp_access_check(s)) {
6678             return;
6679         }
6680         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6681         break;
6682     case 1:
6683         if (!fp_access_check(s)) {
6684             return;
6685         }
6686         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6687         break;
6688     case 3:
6689         if (!dc_isar_feature(aa64_fp16, s)) {
6690             unallocated_encoding(s);
6691             return;
6692         }
6693         if (!fp_access_check(s)) {
6694             return;
6695         }
6696         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6697         break;
6698     default:
6699         unallocated_encoding(s);
6700     }
6701 }
6702 
6703 /* Floating point immediate
6704  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6705  * +---+---+---+-----------+------+---+------------+-------+------+------+
6706  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6707  * +---+---+---+-----------+------+---+------------+-------+------+------+
6708  */
6709 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6710 {
6711     int rd = extract32(insn, 0, 5);
6712     int imm5 = extract32(insn, 5, 5);
6713     int imm8 = extract32(insn, 13, 8);
6714     int type = extract32(insn, 22, 2);
6715     int mos = extract32(insn, 29, 3);
6716     uint64_t imm;
6717     MemOp sz;
6718 
6719     if (mos || imm5) {
6720         unallocated_encoding(s);
6721         return;
6722     }
6723 
6724     switch (type) {
6725     case 0:
6726         sz = MO_32;
6727         break;
6728     case 1:
6729         sz = MO_64;
6730         break;
6731     case 3:
6732         sz = MO_16;
6733         if (dc_isar_feature(aa64_fp16, s)) {
6734             break;
6735         }
6736         /* fallthru */
6737     default:
6738         unallocated_encoding(s);
6739         return;
6740     }
6741 
6742     if (!fp_access_check(s)) {
6743         return;
6744     }
6745 
6746     imm = vfp_expand_imm(sz, imm8);
6747     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6748 }
6749 
6750 /* Handle floating point <=> fixed point conversions. Note that we can
6751  * also deal with fp <=> integer conversions as a special case (scale == 64)
6752  * OPTME: consider handling that special case specially or at least skipping
6753  * the call to scalbn in the helpers for zero shifts.
6754  */
6755 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6756                            bool itof, int rmode, int scale, int sf, int type)
6757 {
6758     bool is_signed = !(opcode & 1);
6759     TCGv_ptr tcg_fpstatus;
6760     TCGv_i32 tcg_shift, tcg_single;
6761     TCGv_i64 tcg_double;
6762 
6763     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6764 
6765     tcg_shift = tcg_constant_i32(64 - scale);
6766 
6767     if (itof) {
6768         TCGv_i64 tcg_int = cpu_reg(s, rn);
6769         if (!sf) {
6770             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6771 
6772             if (is_signed) {
6773                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6774             } else {
6775                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6776             }
6777 
6778             tcg_int = tcg_extend;
6779         }
6780 
6781         switch (type) {
6782         case 1: /* float64 */
6783             tcg_double = tcg_temp_new_i64();
6784             if (is_signed) {
6785                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6786                                      tcg_shift, tcg_fpstatus);
6787             } else {
6788                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6789                                      tcg_shift, tcg_fpstatus);
6790             }
6791             write_fp_dreg(s, rd, tcg_double);
6792             break;
6793 
6794         case 0: /* float32 */
6795             tcg_single = tcg_temp_new_i32();
6796             if (is_signed) {
6797                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6798                                      tcg_shift, tcg_fpstatus);
6799             } else {
6800                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6801                                      tcg_shift, tcg_fpstatus);
6802             }
6803             write_fp_sreg(s, rd, tcg_single);
6804             break;
6805 
6806         case 3: /* float16 */
6807             tcg_single = tcg_temp_new_i32();
6808             if (is_signed) {
6809                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6810                                      tcg_shift, tcg_fpstatus);
6811             } else {
6812                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6813                                      tcg_shift, tcg_fpstatus);
6814             }
6815             write_fp_sreg(s, rd, tcg_single);
6816             break;
6817 
6818         default:
6819             g_assert_not_reached();
6820         }
6821     } else {
6822         TCGv_i64 tcg_int = cpu_reg(s, rd);
6823         TCGv_i32 tcg_rmode;
6824 
6825         if (extract32(opcode, 2, 1)) {
6826             /* There are too many rounding modes to all fit into rmode,
6827              * so FCVTA[US] is a special case.
6828              */
6829             rmode = FPROUNDING_TIEAWAY;
6830         }
6831 
6832         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6833 
6834         switch (type) {
6835         case 1: /* float64 */
6836             tcg_double = read_fp_dreg(s, rn);
6837             if (is_signed) {
6838                 if (!sf) {
6839                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6840                                          tcg_shift, tcg_fpstatus);
6841                 } else {
6842                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6843                                          tcg_shift, tcg_fpstatus);
6844                 }
6845             } else {
6846                 if (!sf) {
6847                     gen_helper_vfp_tould(tcg_int, tcg_double,
6848                                          tcg_shift, tcg_fpstatus);
6849                 } else {
6850                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6851                                          tcg_shift, tcg_fpstatus);
6852                 }
6853             }
6854             if (!sf) {
6855                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6856             }
6857             break;
6858 
6859         case 0: /* float32 */
6860             tcg_single = read_fp_sreg(s, rn);
6861             if (sf) {
6862                 if (is_signed) {
6863                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6864                                          tcg_shift, tcg_fpstatus);
6865                 } else {
6866                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6867                                          tcg_shift, tcg_fpstatus);
6868                 }
6869             } else {
6870                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6871                 if (is_signed) {
6872                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6873                                          tcg_shift, tcg_fpstatus);
6874                 } else {
6875                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6876                                          tcg_shift, tcg_fpstatus);
6877                 }
6878                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6879             }
6880             break;
6881 
6882         case 3: /* float16 */
6883             tcg_single = read_fp_sreg(s, rn);
6884             if (sf) {
6885                 if (is_signed) {
6886                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6887                                          tcg_shift, tcg_fpstatus);
6888                 } else {
6889                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6890                                          tcg_shift, tcg_fpstatus);
6891                 }
6892             } else {
6893                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6894                 if (is_signed) {
6895                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6896                                          tcg_shift, tcg_fpstatus);
6897                 } else {
6898                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6899                                          tcg_shift, tcg_fpstatus);
6900                 }
6901                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6902             }
6903             break;
6904 
6905         default:
6906             g_assert_not_reached();
6907         }
6908 
6909         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6910     }
6911 }
6912 
6913 /* Floating point <-> fixed point conversions
6914  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6915  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6916  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6917  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6918  */
6919 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6920 {
6921     int rd = extract32(insn, 0, 5);
6922     int rn = extract32(insn, 5, 5);
6923     int scale = extract32(insn, 10, 6);
6924     int opcode = extract32(insn, 16, 3);
6925     int rmode = extract32(insn, 19, 2);
6926     int type = extract32(insn, 22, 2);
6927     bool sbit = extract32(insn, 29, 1);
6928     bool sf = extract32(insn, 31, 1);
6929     bool itof;
6930 
6931     if (sbit || (!sf && scale < 32)) {
6932         unallocated_encoding(s);
6933         return;
6934     }
6935 
6936     switch (type) {
6937     case 0: /* float32 */
6938     case 1: /* float64 */
6939         break;
6940     case 3: /* float16 */
6941         if (dc_isar_feature(aa64_fp16, s)) {
6942             break;
6943         }
6944         /* fallthru */
6945     default:
6946         unallocated_encoding(s);
6947         return;
6948     }
6949 
6950     switch ((rmode << 3) | opcode) {
6951     case 0x2: /* SCVTF */
6952     case 0x3: /* UCVTF */
6953         itof = true;
6954         break;
6955     case 0x18: /* FCVTZS */
6956     case 0x19: /* FCVTZU */
6957         itof = false;
6958         break;
6959     default:
6960         unallocated_encoding(s);
6961         return;
6962     }
6963 
6964     if (!fp_access_check(s)) {
6965         return;
6966     }
6967 
6968     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6969 }
6970 
6971 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6972 {
6973     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6974      * without conversion.
6975      */
6976 
6977     if (itof) {
6978         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6979         TCGv_i64 tmp;
6980 
6981         switch (type) {
6982         case 0:
6983             /* 32 bit */
6984             tmp = tcg_temp_new_i64();
6985             tcg_gen_ext32u_i64(tmp, tcg_rn);
6986             write_fp_dreg(s, rd, tmp);
6987             break;
6988         case 1:
6989             /* 64 bit */
6990             write_fp_dreg(s, rd, tcg_rn);
6991             break;
6992         case 2:
6993             /* 64 bit to top half. */
6994             tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
6995             clear_vec_high(s, true, rd);
6996             break;
6997         case 3:
6998             /* 16 bit */
6999             tmp = tcg_temp_new_i64();
7000             tcg_gen_ext16u_i64(tmp, tcg_rn);
7001             write_fp_dreg(s, rd, tmp);
7002             break;
7003         default:
7004             g_assert_not_reached();
7005         }
7006     } else {
7007         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7008 
7009         switch (type) {
7010         case 0:
7011             /* 32 bit */
7012             tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
7013             break;
7014         case 1:
7015             /* 64 bit */
7016             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
7017             break;
7018         case 2:
7019             /* 64 bits from top half */
7020             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
7021             break;
7022         case 3:
7023             /* 16 bit */
7024             tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
7025             break;
7026         default:
7027             g_assert_not_reached();
7028         }
7029     }
7030 }
7031 
7032 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7033 {
7034     TCGv_i64 t = read_fp_dreg(s, rn);
7035     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7036 
7037     gen_helper_fjcvtzs(t, t, fpstatus);
7038 
7039     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7040     tcg_gen_extrh_i64_i32(cpu_ZF, t);
7041     tcg_gen_movi_i32(cpu_CF, 0);
7042     tcg_gen_movi_i32(cpu_NF, 0);
7043     tcg_gen_movi_i32(cpu_VF, 0);
7044 }
7045 
7046 /* Floating point <-> integer conversions
7047  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7048  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7049  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7050  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7051  */
7052 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7053 {
7054     int rd = extract32(insn, 0, 5);
7055     int rn = extract32(insn, 5, 5);
7056     int opcode = extract32(insn, 16, 3);
7057     int rmode = extract32(insn, 19, 2);
7058     int type = extract32(insn, 22, 2);
7059     bool sbit = extract32(insn, 29, 1);
7060     bool sf = extract32(insn, 31, 1);
7061     bool itof = false;
7062 
7063     if (sbit) {
7064         goto do_unallocated;
7065     }
7066 
7067     switch (opcode) {
7068     case 2: /* SCVTF */
7069     case 3: /* UCVTF */
7070         itof = true;
7071         /* fallthru */
7072     case 4: /* FCVTAS */
7073     case 5: /* FCVTAU */
7074         if (rmode != 0) {
7075             goto do_unallocated;
7076         }
7077         /* fallthru */
7078     case 0: /* FCVT[NPMZ]S */
7079     case 1: /* FCVT[NPMZ]U */
7080         switch (type) {
7081         case 0: /* float32 */
7082         case 1: /* float64 */
7083             break;
7084         case 3: /* float16 */
7085             if (!dc_isar_feature(aa64_fp16, s)) {
7086                 goto do_unallocated;
7087             }
7088             break;
7089         default:
7090             goto do_unallocated;
7091         }
7092         if (!fp_access_check(s)) {
7093             return;
7094         }
7095         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7096         break;
7097 
7098     default:
7099         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7100         case 0b01100110: /* FMOV half <-> 32-bit int */
7101         case 0b01100111:
7102         case 0b11100110: /* FMOV half <-> 64-bit int */
7103         case 0b11100111:
7104             if (!dc_isar_feature(aa64_fp16, s)) {
7105                 goto do_unallocated;
7106             }
7107             /* fallthru */
7108         case 0b00000110: /* FMOV 32-bit */
7109         case 0b00000111:
7110         case 0b10100110: /* FMOV 64-bit */
7111         case 0b10100111:
7112         case 0b11001110: /* FMOV top half of 128-bit */
7113         case 0b11001111:
7114             if (!fp_access_check(s)) {
7115                 return;
7116             }
7117             itof = opcode & 1;
7118             handle_fmov(s, rd, rn, type, itof);
7119             break;
7120 
7121         case 0b00111110: /* FJCVTZS */
7122             if (!dc_isar_feature(aa64_jscvt, s)) {
7123                 goto do_unallocated;
7124             } else if (fp_access_check(s)) {
7125                 handle_fjcvtzs(s, rd, rn);
7126             }
7127             break;
7128 
7129         default:
7130         do_unallocated:
7131             unallocated_encoding(s);
7132             return;
7133         }
7134         break;
7135     }
7136 }
7137 
7138 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7139  *   31  30  29 28     25 24                          0
7140  * +---+---+---+---------+-----------------------------+
7141  * |   | 0 |   | 1 1 1 1 |                             |
7142  * +---+---+---+---------+-----------------------------+
7143  */
7144 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7145 {
7146     if (extract32(insn, 24, 1)) {
7147         /* Floating point data-processing (3 source) */
7148         disas_fp_3src(s, insn);
7149     } else if (extract32(insn, 21, 1) == 0) {
7150         /* Floating point to fixed point conversions */
7151         disas_fp_fixed_conv(s, insn);
7152     } else {
7153         switch (extract32(insn, 10, 2)) {
7154         case 1:
7155             /* Floating point conditional compare */
7156             disas_fp_ccomp(s, insn);
7157             break;
7158         case 2:
7159             /* Floating point data-processing (2 source) */
7160             disas_fp_2src(s, insn);
7161             break;
7162         case 3:
7163             /* Floating point conditional select */
7164             disas_fp_csel(s, insn);
7165             break;
7166         case 0:
7167             switch (ctz32(extract32(insn, 12, 4))) {
7168             case 0: /* [15:12] == xxx1 */
7169                 /* Floating point immediate */
7170                 disas_fp_imm(s, insn);
7171                 break;
7172             case 1: /* [15:12] == xx10 */
7173                 /* Floating point compare */
7174                 disas_fp_compare(s, insn);
7175                 break;
7176             case 2: /* [15:12] == x100 */
7177                 /* Floating point data-processing (1 source) */
7178                 disas_fp_1src(s, insn);
7179                 break;
7180             case 3: /* [15:12] == 1000 */
7181                 unallocated_encoding(s);
7182                 break;
7183             default: /* [15:12] == 0000 */
7184                 /* Floating point <-> integer conversions */
7185                 disas_fp_int_conv(s, insn);
7186                 break;
7187             }
7188             break;
7189         }
7190     }
7191 }
7192 
7193 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7194                      int pos)
7195 {
7196     /* Extract 64 bits from the middle of two concatenated 64 bit
7197      * vector register slices left:right. The extracted bits start
7198      * at 'pos' bits into the right (least significant) side.
7199      * We return the result in tcg_right, and guarantee not to
7200      * trash tcg_left.
7201      */
7202     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7203     assert(pos > 0 && pos < 64);
7204 
7205     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7206     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7207     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7208 }
7209 
7210 /* EXT
7211  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7212  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7213  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7214  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7215  */
7216 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7217 {
7218     int is_q = extract32(insn, 30, 1);
7219     int op2 = extract32(insn, 22, 2);
7220     int imm4 = extract32(insn, 11, 4);
7221     int rm = extract32(insn, 16, 5);
7222     int rn = extract32(insn, 5, 5);
7223     int rd = extract32(insn, 0, 5);
7224     int pos = imm4 << 3;
7225     TCGv_i64 tcg_resl, tcg_resh;
7226 
7227     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7228         unallocated_encoding(s);
7229         return;
7230     }
7231 
7232     if (!fp_access_check(s)) {
7233         return;
7234     }
7235 
7236     tcg_resh = tcg_temp_new_i64();
7237     tcg_resl = tcg_temp_new_i64();
7238 
7239     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7240      * either extracting 128 bits from a 128:128 concatenation, or
7241      * extracting 64 bits from a 64:64 concatenation.
7242      */
7243     if (!is_q) {
7244         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7245         if (pos != 0) {
7246             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7247             do_ext64(s, tcg_resh, tcg_resl, pos);
7248         }
7249     } else {
7250         TCGv_i64 tcg_hh;
7251         typedef struct {
7252             int reg;
7253             int elt;
7254         } EltPosns;
7255         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7256         EltPosns *elt = eltposns;
7257 
7258         if (pos >= 64) {
7259             elt++;
7260             pos -= 64;
7261         }
7262 
7263         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7264         elt++;
7265         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7266         elt++;
7267         if (pos != 0) {
7268             do_ext64(s, tcg_resh, tcg_resl, pos);
7269             tcg_hh = tcg_temp_new_i64();
7270             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7271             do_ext64(s, tcg_hh, tcg_resh, pos);
7272         }
7273     }
7274 
7275     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7276     if (is_q) {
7277         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7278     }
7279     clear_vec_high(s, is_q, rd);
7280 }
7281 
7282 /* TBL/TBX
7283  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7284  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7285  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7286  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7287  */
7288 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7289 {
7290     int op2 = extract32(insn, 22, 2);
7291     int is_q = extract32(insn, 30, 1);
7292     int rm = extract32(insn, 16, 5);
7293     int rn = extract32(insn, 5, 5);
7294     int rd = extract32(insn, 0, 5);
7295     int is_tbx = extract32(insn, 12, 1);
7296     int len = (extract32(insn, 13, 2) + 1) * 16;
7297 
7298     if (op2 != 0) {
7299         unallocated_encoding(s);
7300         return;
7301     }
7302 
7303     if (!fp_access_check(s)) {
7304         return;
7305     }
7306 
7307     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7308                        vec_full_reg_offset(s, rm), tcg_env,
7309                        is_q ? 16 : 8, vec_full_reg_size(s),
7310                        (len << 6) | (is_tbx << 5) | rn,
7311                        gen_helper_simd_tblx);
7312 }
7313 
7314 /* ZIP/UZP/TRN
7315  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7316  * +---+---+-------------+------+---+------+---+------------------+------+
7317  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7318  * +---+---+-------------+------+---+------+---+------------------+------+
7319  */
7320 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7321 {
7322     int rd = extract32(insn, 0, 5);
7323     int rn = extract32(insn, 5, 5);
7324     int rm = extract32(insn, 16, 5);
7325     int size = extract32(insn, 22, 2);
7326     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7327      * bit 2 indicates 1 vs 2 variant of the insn.
7328      */
7329     int opcode = extract32(insn, 12, 2);
7330     bool part = extract32(insn, 14, 1);
7331     bool is_q = extract32(insn, 30, 1);
7332     int esize = 8 << size;
7333     int i;
7334     int datasize = is_q ? 128 : 64;
7335     int elements = datasize / esize;
7336     TCGv_i64 tcg_res[2], tcg_ele;
7337 
7338     if (opcode == 0 || (size == 3 && !is_q)) {
7339         unallocated_encoding(s);
7340         return;
7341     }
7342 
7343     if (!fp_access_check(s)) {
7344         return;
7345     }
7346 
7347     tcg_res[0] = tcg_temp_new_i64();
7348     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7349     tcg_ele = tcg_temp_new_i64();
7350 
7351     for (i = 0; i < elements; i++) {
7352         int o, w;
7353 
7354         switch (opcode) {
7355         case 1: /* UZP1/2 */
7356         {
7357             int midpoint = elements / 2;
7358             if (i < midpoint) {
7359                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7360             } else {
7361                 read_vec_element(s, tcg_ele, rm,
7362                                  2 * (i - midpoint) + part, size);
7363             }
7364             break;
7365         }
7366         case 2: /* TRN1/2 */
7367             if (i & 1) {
7368                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7369             } else {
7370                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7371             }
7372             break;
7373         case 3: /* ZIP1/2 */
7374         {
7375             int base = part * elements / 2;
7376             if (i & 1) {
7377                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7378             } else {
7379                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7380             }
7381             break;
7382         }
7383         default:
7384             g_assert_not_reached();
7385         }
7386 
7387         w = (i * esize) / 64;
7388         o = (i * esize) % 64;
7389         if (o == 0) {
7390             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7391         } else {
7392             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7393             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7394         }
7395     }
7396 
7397     for (i = 0; i <= is_q; ++i) {
7398         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7399     }
7400     clear_vec_high(s, is_q, rd);
7401 }
7402 
7403 /*
7404  * do_reduction_op helper
7405  *
7406  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7407  * important for correct NaN propagation that we do these
7408  * operations in exactly the order specified by the pseudocode.
7409  *
7410  * This is a recursive function, TCG temps should be freed by the
7411  * calling function once it is done with the values.
7412  */
7413 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7414                                 int esize, int size, int vmap, TCGv_ptr fpst)
7415 {
7416     if (esize == size) {
7417         int element;
7418         MemOp msize = esize == 16 ? MO_16 : MO_32;
7419         TCGv_i32 tcg_elem;
7420 
7421         /* We should have one register left here */
7422         assert(ctpop8(vmap) == 1);
7423         element = ctz32(vmap);
7424         assert(element < 8);
7425 
7426         tcg_elem = tcg_temp_new_i32();
7427         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7428         return tcg_elem;
7429     } else {
7430         int bits = size / 2;
7431         int shift = ctpop8(vmap) / 2;
7432         int vmap_lo = (vmap >> shift) & vmap;
7433         int vmap_hi = (vmap & ~vmap_lo);
7434         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7435 
7436         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7437         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7438         tcg_res = tcg_temp_new_i32();
7439 
7440         switch (fpopcode) {
7441         case 0x0c: /* fmaxnmv half-precision */
7442             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7443             break;
7444         case 0x0f: /* fmaxv half-precision */
7445             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7446             break;
7447         case 0x1c: /* fminnmv half-precision */
7448             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7449             break;
7450         case 0x1f: /* fminv half-precision */
7451             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7452             break;
7453         case 0x2c: /* fmaxnmv */
7454             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7455             break;
7456         case 0x2f: /* fmaxv */
7457             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7458             break;
7459         case 0x3c: /* fminnmv */
7460             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7461             break;
7462         case 0x3f: /* fminv */
7463             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7464             break;
7465         default:
7466             g_assert_not_reached();
7467         }
7468         return tcg_res;
7469     }
7470 }
7471 
7472 /* AdvSIMD across lanes
7473  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7474  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7475  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7476  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7477  */
7478 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7479 {
7480     int rd = extract32(insn, 0, 5);
7481     int rn = extract32(insn, 5, 5);
7482     int size = extract32(insn, 22, 2);
7483     int opcode = extract32(insn, 12, 5);
7484     bool is_q = extract32(insn, 30, 1);
7485     bool is_u = extract32(insn, 29, 1);
7486     bool is_fp = false;
7487     bool is_min = false;
7488     int esize;
7489     int elements;
7490     int i;
7491     TCGv_i64 tcg_res, tcg_elt;
7492 
7493     switch (opcode) {
7494     case 0x1b: /* ADDV */
7495         if (is_u) {
7496             unallocated_encoding(s);
7497             return;
7498         }
7499         /* fall through */
7500     case 0x3: /* SADDLV, UADDLV */
7501     case 0xa: /* SMAXV, UMAXV */
7502     case 0x1a: /* SMINV, UMINV */
7503         if (size == 3 || (size == 2 && !is_q)) {
7504             unallocated_encoding(s);
7505             return;
7506         }
7507         break;
7508     case 0xc: /* FMAXNMV, FMINNMV */
7509     case 0xf: /* FMAXV, FMINV */
7510         /* Bit 1 of size field encodes min vs max and the actual size
7511          * depends on the encoding of the U bit. If not set (and FP16
7512          * enabled) then we do half-precision float instead of single
7513          * precision.
7514          */
7515         is_min = extract32(size, 1, 1);
7516         is_fp = true;
7517         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7518             size = 1;
7519         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7520             unallocated_encoding(s);
7521             return;
7522         } else {
7523             size = 2;
7524         }
7525         break;
7526     default:
7527         unallocated_encoding(s);
7528         return;
7529     }
7530 
7531     if (!fp_access_check(s)) {
7532         return;
7533     }
7534 
7535     esize = 8 << size;
7536     elements = (is_q ? 128 : 64) / esize;
7537 
7538     tcg_res = tcg_temp_new_i64();
7539     tcg_elt = tcg_temp_new_i64();
7540 
7541     /* These instructions operate across all lanes of a vector
7542      * to produce a single result. We can guarantee that a 64
7543      * bit intermediate is sufficient:
7544      *  + for [US]ADDLV the maximum element size is 32 bits, and
7545      *    the result type is 64 bits
7546      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7547      *    same as the element size, which is 32 bits at most
7548      * For the integer operations we can choose to work at 64
7549      * or 32 bits and truncate at the end; for simplicity
7550      * we use 64 bits always. The floating point
7551      * ops do require 32 bit intermediates, though.
7552      */
7553     if (!is_fp) {
7554         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7555 
7556         for (i = 1; i < elements; i++) {
7557             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7558 
7559             switch (opcode) {
7560             case 0x03: /* SADDLV / UADDLV */
7561             case 0x1b: /* ADDV */
7562                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7563                 break;
7564             case 0x0a: /* SMAXV / UMAXV */
7565                 if (is_u) {
7566                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7567                 } else {
7568                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7569                 }
7570                 break;
7571             case 0x1a: /* SMINV / UMINV */
7572                 if (is_u) {
7573                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7574                 } else {
7575                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7576                 }
7577                 break;
7578             default:
7579                 g_assert_not_reached();
7580             }
7581 
7582         }
7583     } else {
7584         /* Floating point vector reduction ops which work across 32
7585          * bit (single) or 16 bit (half-precision) intermediates.
7586          * Note that correct NaN propagation requires that we do these
7587          * operations in exactly the order specified by the pseudocode.
7588          */
7589         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7590         int fpopcode = opcode | is_min << 4 | is_u << 5;
7591         int vmap = (1 << elements) - 1;
7592         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7593                                              (is_q ? 128 : 64), vmap, fpst);
7594         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7595     }
7596 
7597     /* Now truncate the result to the width required for the final output */
7598     if (opcode == 0x03) {
7599         /* SADDLV, UADDLV: result is 2*esize */
7600         size++;
7601     }
7602 
7603     switch (size) {
7604     case 0:
7605         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7606         break;
7607     case 1:
7608         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7609         break;
7610     case 2:
7611         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7612         break;
7613     case 3:
7614         break;
7615     default:
7616         g_assert_not_reached();
7617     }
7618 
7619     write_fp_dreg(s, rd, tcg_res);
7620 }
7621 
7622 /* DUP (Element, Vector)
7623  *
7624  *  31  30   29              21 20    16 15        10  9    5 4    0
7625  * +---+---+-------------------+--------+-------------+------+------+
7626  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7627  * +---+---+-------------------+--------+-------------+------+------+
7628  *
7629  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7630  */
7631 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7632                              int imm5)
7633 {
7634     int size = ctz32(imm5);
7635     int index;
7636 
7637     if (size > 3 || (size == 3 && !is_q)) {
7638         unallocated_encoding(s);
7639         return;
7640     }
7641 
7642     if (!fp_access_check(s)) {
7643         return;
7644     }
7645 
7646     index = imm5 >> (size + 1);
7647     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7648                          vec_reg_offset(s, rn, index, size),
7649                          is_q ? 16 : 8, vec_full_reg_size(s));
7650 }
7651 
7652 /* DUP (element, scalar)
7653  *  31                   21 20    16 15        10  9    5 4    0
7654  * +-----------------------+--------+-------------+------+------+
7655  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7656  * +-----------------------+--------+-------------+------+------+
7657  */
7658 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7659                               int imm5)
7660 {
7661     int size = ctz32(imm5);
7662     int index;
7663     TCGv_i64 tmp;
7664 
7665     if (size > 3) {
7666         unallocated_encoding(s);
7667         return;
7668     }
7669 
7670     if (!fp_access_check(s)) {
7671         return;
7672     }
7673 
7674     index = imm5 >> (size + 1);
7675 
7676     /* This instruction just extracts the specified element and
7677      * zero-extends it into the bottom of the destination register.
7678      */
7679     tmp = tcg_temp_new_i64();
7680     read_vec_element(s, tmp, rn, index, size);
7681     write_fp_dreg(s, rd, tmp);
7682 }
7683 
7684 /* DUP (General)
7685  *
7686  *  31  30   29              21 20    16 15        10  9    5 4    0
7687  * +---+---+-------------------+--------+-------------+------+------+
7688  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7689  * +---+---+-------------------+--------+-------------+------+------+
7690  *
7691  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7692  */
7693 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7694                              int imm5)
7695 {
7696     int size = ctz32(imm5);
7697     uint32_t dofs, oprsz, maxsz;
7698 
7699     if (size > 3 || ((size == 3) && !is_q)) {
7700         unallocated_encoding(s);
7701         return;
7702     }
7703 
7704     if (!fp_access_check(s)) {
7705         return;
7706     }
7707 
7708     dofs = vec_full_reg_offset(s, rd);
7709     oprsz = is_q ? 16 : 8;
7710     maxsz = vec_full_reg_size(s);
7711 
7712     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7713 }
7714 
7715 /* INS (Element)
7716  *
7717  *  31                   21 20    16 15  14    11  10 9    5 4    0
7718  * +-----------------------+--------+------------+---+------+------+
7719  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7720  * +-----------------------+--------+------------+---+------+------+
7721  *
7722  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7723  * index: encoded in imm5<4:size+1>
7724  */
7725 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7726                              int imm4, int imm5)
7727 {
7728     int size = ctz32(imm5);
7729     int src_index, dst_index;
7730     TCGv_i64 tmp;
7731 
7732     if (size > 3) {
7733         unallocated_encoding(s);
7734         return;
7735     }
7736 
7737     if (!fp_access_check(s)) {
7738         return;
7739     }
7740 
7741     dst_index = extract32(imm5, 1+size, 5);
7742     src_index = extract32(imm4, size, 4);
7743 
7744     tmp = tcg_temp_new_i64();
7745 
7746     read_vec_element(s, tmp, rn, src_index, size);
7747     write_vec_element(s, tmp, rd, dst_index, size);
7748 
7749     /* INS is considered a 128-bit write for SVE. */
7750     clear_vec_high(s, true, rd);
7751 }
7752 
7753 
7754 /* INS (General)
7755  *
7756  *  31                   21 20    16 15        10  9    5 4    0
7757  * +-----------------------+--------+-------------+------+------+
7758  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7759  * +-----------------------+--------+-------------+------+------+
7760  *
7761  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7762  * index: encoded in imm5<4:size+1>
7763  */
7764 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7765 {
7766     int size = ctz32(imm5);
7767     int idx;
7768 
7769     if (size > 3) {
7770         unallocated_encoding(s);
7771         return;
7772     }
7773 
7774     if (!fp_access_check(s)) {
7775         return;
7776     }
7777 
7778     idx = extract32(imm5, 1 + size, 4 - size);
7779     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7780 
7781     /* INS is considered a 128-bit write for SVE. */
7782     clear_vec_high(s, true, rd);
7783 }
7784 
7785 /*
7786  * UMOV (General)
7787  * SMOV (General)
7788  *
7789  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7790  * +---+---+-------------------+--------+-------------+------+------+
7791  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7792  * +---+---+-------------------+--------+-------------+------+------+
7793  *
7794  * U: unsigned when set
7795  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7796  */
7797 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7798                                   int rn, int rd, int imm5)
7799 {
7800     int size = ctz32(imm5);
7801     int element;
7802     TCGv_i64 tcg_rd;
7803 
7804     /* Check for UnallocatedEncodings */
7805     if (is_signed) {
7806         if (size > 2 || (size == 2 && !is_q)) {
7807             unallocated_encoding(s);
7808             return;
7809         }
7810     } else {
7811         if (size > 3
7812             || (size < 3 && is_q)
7813             || (size == 3 && !is_q)) {
7814             unallocated_encoding(s);
7815             return;
7816         }
7817     }
7818 
7819     if (!fp_access_check(s)) {
7820         return;
7821     }
7822 
7823     element = extract32(imm5, 1+size, 4);
7824 
7825     tcg_rd = cpu_reg(s, rd);
7826     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7827     if (is_signed && !is_q) {
7828         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7829     }
7830 }
7831 
7832 /* AdvSIMD copy
7833  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7834  * +---+---+----+-----------------+------+---+------+---+------+------+
7835  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7836  * +---+---+----+-----------------+------+---+------+---+------+------+
7837  */
7838 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7839 {
7840     int rd = extract32(insn, 0, 5);
7841     int rn = extract32(insn, 5, 5);
7842     int imm4 = extract32(insn, 11, 4);
7843     int op = extract32(insn, 29, 1);
7844     int is_q = extract32(insn, 30, 1);
7845     int imm5 = extract32(insn, 16, 5);
7846 
7847     if (op) {
7848         if (is_q) {
7849             /* INS (element) */
7850             handle_simd_inse(s, rd, rn, imm4, imm5);
7851         } else {
7852             unallocated_encoding(s);
7853         }
7854     } else {
7855         switch (imm4) {
7856         case 0:
7857             /* DUP (element - vector) */
7858             handle_simd_dupe(s, is_q, rd, rn, imm5);
7859             break;
7860         case 1:
7861             /* DUP (general) */
7862             handle_simd_dupg(s, is_q, rd, rn, imm5);
7863             break;
7864         case 3:
7865             if (is_q) {
7866                 /* INS (general) */
7867                 handle_simd_insg(s, rd, rn, imm5);
7868             } else {
7869                 unallocated_encoding(s);
7870             }
7871             break;
7872         case 5:
7873         case 7:
7874             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7875             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7876             break;
7877         default:
7878             unallocated_encoding(s);
7879             break;
7880         }
7881     }
7882 }
7883 
7884 /* AdvSIMD modified immediate
7885  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7886  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7887  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7888  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7889  *
7890  * There are a number of operations that can be carried out here:
7891  *   MOVI - move (shifted) imm into register
7892  *   MVNI - move inverted (shifted) imm into register
7893  *   ORR  - bitwise OR of (shifted) imm with register
7894  *   BIC  - bitwise clear of (shifted) imm with register
7895  * With ARMv8.2 we also have:
7896  *   FMOV half-precision
7897  */
7898 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7899 {
7900     int rd = extract32(insn, 0, 5);
7901     int cmode = extract32(insn, 12, 4);
7902     int o2 = extract32(insn, 11, 1);
7903     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7904     bool is_neg = extract32(insn, 29, 1);
7905     bool is_q = extract32(insn, 30, 1);
7906     uint64_t imm = 0;
7907 
7908     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7909         /* Check for FMOV (vector, immediate) - half-precision */
7910         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7911             unallocated_encoding(s);
7912             return;
7913         }
7914     }
7915 
7916     if (!fp_access_check(s)) {
7917         return;
7918     }
7919 
7920     if (cmode == 15 && o2 && !is_neg) {
7921         /* FMOV (vector, immediate) - half-precision */
7922         imm = vfp_expand_imm(MO_16, abcdefgh);
7923         /* now duplicate across the lanes */
7924         imm = dup_const(MO_16, imm);
7925     } else {
7926         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7927     }
7928 
7929     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7930         /* MOVI or MVNI, with MVNI negation handled above.  */
7931         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7932                              vec_full_reg_size(s), imm);
7933     } else {
7934         /* ORR or BIC, with BIC negation to AND handled above.  */
7935         if (is_neg) {
7936             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7937         } else {
7938             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7939         }
7940     }
7941 }
7942 
7943 /* AdvSIMD scalar copy
7944  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7945  * +-----+----+-----------------+------+---+------+---+------+------+
7946  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7947  * +-----+----+-----------------+------+---+------+---+------+------+
7948  */
7949 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7950 {
7951     int rd = extract32(insn, 0, 5);
7952     int rn = extract32(insn, 5, 5);
7953     int imm4 = extract32(insn, 11, 4);
7954     int imm5 = extract32(insn, 16, 5);
7955     int op = extract32(insn, 29, 1);
7956 
7957     if (op != 0 || imm4 != 0) {
7958         unallocated_encoding(s);
7959         return;
7960     }
7961 
7962     /* DUP (element, scalar) */
7963     handle_simd_dupes(s, rd, rn, imm5);
7964 }
7965 
7966 /* AdvSIMD scalar pairwise
7967  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7968  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7969  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7970  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7971  */
7972 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7973 {
7974     int u = extract32(insn, 29, 1);
7975     int size = extract32(insn, 22, 2);
7976     int opcode = extract32(insn, 12, 5);
7977     int rn = extract32(insn, 5, 5);
7978     int rd = extract32(insn, 0, 5);
7979     TCGv_ptr fpst;
7980 
7981     /* For some ops (the FP ones), size[1] is part of the encoding.
7982      * For ADDP strictly it is not but size[1] is always 1 for valid
7983      * encodings.
7984      */
7985     opcode |= (extract32(size, 1, 1) << 5);
7986 
7987     switch (opcode) {
7988     case 0x3b: /* ADDP */
7989         if (u || size != 3) {
7990             unallocated_encoding(s);
7991             return;
7992         }
7993         if (!fp_access_check(s)) {
7994             return;
7995         }
7996 
7997         fpst = NULL;
7998         break;
7999     case 0xc: /* FMAXNMP */
8000     case 0xd: /* FADDP */
8001     case 0xf: /* FMAXP */
8002     case 0x2c: /* FMINNMP */
8003     case 0x2f: /* FMINP */
8004         /* FP op, size[0] is 32 or 64 bit*/
8005         if (!u) {
8006             if (!dc_isar_feature(aa64_fp16, s)) {
8007                 unallocated_encoding(s);
8008                 return;
8009             } else {
8010                 size = MO_16;
8011             }
8012         } else {
8013             size = extract32(size, 0, 1) ? MO_64 : MO_32;
8014         }
8015 
8016         if (!fp_access_check(s)) {
8017             return;
8018         }
8019 
8020         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8021         break;
8022     default:
8023         unallocated_encoding(s);
8024         return;
8025     }
8026 
8027     if (size == MO_64) {
8028         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8029         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8030         TCGv_i64 tcg_res = tcg_temp_new_i64();
8031 
8032         read_vec_element(s, tcg_op1, rn, 0, MO_64);
8033         read_vec_element(s, tcg_op2, rn, 1, MO_64);
8034 
8035         switch (opcode) {
8036         case 0x3b: /* ADDP */
8037             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8038             break;
8039         case 0xc: /* FMAXNMP */
8040             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8041             break;
8042         case 0xd: /* FADDP */
8043             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8044             break;
8045         case 0xf: /* FMAXP */
8046             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8047             break;
8048         case 0x2c: /* FMINNMP */
8049             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8050             break;
8051         case 0x2f: /* FMINP */
8052             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8053             break;
8054         default:
8055             g_assert_not_reached();
8056         }
8057 
8058         write_fp_dreg(s, rd, tcg_res);
8059     } else {
8060         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8061         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8062         TCGv_i32 tcg_res = tcg_temp_new_i32();
8063 
8064         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8065         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8066 
8067         if (size == MO_16) {
8068             switch (opcode) {
8069             case 0xc: /* FMAXNMP */
8070                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8071                 break;
8072             case 0xd: /* FADDP */
8073                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8074                 break;
8075             case 0xf: /* FMAXP */
8076                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8077                 break;
8078             case 0x2c: /* FMINNMP */
8079                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8080                 break;
8081             case 0x2f: /* FMINP */
8082                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8083                 break;
8084             default:
8085                 g_assert_not_reached();
8086             }
8087         } else {
8088             switch (opcode) {
8089             case 0xc: /* FMAXNMP */
8090                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8091                 break;
8092             case 0xd: /* FADDP */
8093                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8094                 break;
8095             case 0xf: /* FMAXP */
8096                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8097                 break;
8098             case 0x2c: /* FMINNMP */
8099                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8100                 break;
8101             case 0x2f: /* FMINP */
8102                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8103                 break;
8104             default:
8105                 g_assert_not_reached();
8106             }
8107         }
8108 
8109         write_fp_sreg(s, rd, tcg_res);
8110     }
8111 }
8112 
8113 /*
8114  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8115  *
8116  * This code is handles the common shifting code and is used by both
8117  * the vector and scalar code.
8118  */
8119 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8120                                     TCGv_i64 tcg_rnd, bool accumulate,
8121                                     bool is_u, int size, int shift)
8122 {
8123     bool extended_result = false;
8124     bool round = tcg_rnd != NULL;
8125     int ext_lshift = 0;
8126     TCGv_i64 tcg_src_hi;
8127 
8128     if (round && size == 3) {
8129         extended_result = true;
8130         ext_lshift = 64 - shift;
8131         tcg_src_hi = tcg_temp_new_i64();
8132     } else if (shift == 64) {
8133         if (!accumulate && is_u) {
8134             /* result is zero */
8135             tcg_gen_movi_i64(tcg_res, 0);
8136             return;
8137         }
8138     }
8139 
8140     /* Deal with the rounding step */
8141     if (round) {
8142         if (extended_result) {
8143             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8144             if (!is_u) {
8145                 /* take care of sign extending tcg_res */
8146                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8147                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8148                                  tcg_src, tcg_src_hi,
8149                                  tcg_rnd, tcg_zero);
8150             } else {
8151                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8152                                  tcg_src, tcg_zero,
8153                                  tcg_rnd, tcg_zero);
8154             }
8155         } else {
8156             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8157         }
8158     }
8159 
8160     /* Now do the shift right */
8161     if (round && extended_result) {
8162         /* extended case, >64 bit precision required */
8163         if (ext_lshift == 0) {
8164             /* special case, only high bits matter */
8165             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8166         } else {
8167             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8168             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8169             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8170         }
8171     } else {
8172         if (is_u) {
8173             if (shift == 64) {
8174                 /* essentially shifting in 64 zeros */
8175                 tcg_gen_movi_i64(tcg_src, 0);
8176             } else {
8177                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8178             }
8179         } else {
8180             if (shift == 64) {
8181                 /* effectively extending the sign-bit */
8182                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8183             } else {
8184                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8185             }
8186         }
8187     }
8188 
8189     if (accumulate) {
8190         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8191     } else {
8192         tcg_gen_mov_i64(tcg_res, tcg_src);
8193     }
8194 }
8195 
8196 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8197 static void handle_scalar_simd_shri(DisasContext *s,
8198                                     bool is_u, int immh, int immb,
8199                                     int opcode, int rn, int rd)
8200 {
8201     const int size = 3;
8202     int immhb = immh << 3 | immb;
8203     int shift = 2 * (8 << size) - immhb;
8204     bool accumulate = false;
8205     bool round = false;
8206     bool insert = false;
8207     TCGv_i64 tcg_rn;
8208     TCGv_i64 tcg_rd;
8209     TCGv_i64 tcg_round;
8210 
8211     if (!extract32(immh, 3, 1)) {
8212         unallocated_encoding(s);
8213         return;
8214     }
8215 
8216     if (!fp_access_check(s)) {
8217         return;
8218     }
8219 
8220     switch (opcode) {
8221     case 0x02: /* SSRA / USRA (accumulate) */
8222         accumulate = true;
8223         break;
8224     case 0x04: /* SRSHR / URSHR (rounding) */
8225         round = true;
8226         break;
8227     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8228         accumulate = round = true;
8229         break;
8230     case 0x08: /* SRI */
8231         insert = true;
8232         break;
8233     }
8234 
8235     if (round) {
8236         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8237     } else {
8238         tcg_round = NULL;
8239     }
8240 
8241     tcg_rn = read_fp_dreg(s, rn);
8242     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8243 
8244     if (insert) {
8245         /* shift count same as element size is valid but does nothing;
8246          * special case to avoid potential shift by 64.
8247          */
8248         int esize = 8 << size;
8249         if (shift != esize) {
8250             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8251             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8252         }
8253     } else {
8254         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8255                                 accumulate, is_u, size, shift);
8256     }
8257 
8258     write_fp_dreg(s, rd, tcg_rd);
8259 }
8260 
8261 /* SHL/SLI - Scalar shift left */
8262 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8263                                     int immh, int immb, int opcode,
8264                                     int rn, int rd)
8265 {
8266     int size = 32 - clz32(immh) - 1;
8267     int immhb = immh << 3 | immb;
8268     int shift = immhb - (8 << size);
8269     TCGv_i64 tcg_rn;
8270     TCGv_i64 tcg_rd;
8271 
8272     if (!extract32(immh, 3, 1)) {
8273         unallocated_encoding(s);
8274         return;
8275     }
8276 
8277     if (!fp_access_check(s)) {
8278         return;
8279     }
8280 
8281     tcg_rn = read_fp_dreg(s, rn);
8282     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8283 
8284     if (insert) {
8285         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8286     } else {
8287         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8288     }
8289 
8290     write_fp_dreg(s, rd, tcg_rd);
8291 }
8292 
8293 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8294  * (signed/unsigned) narrowing */
8295 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8296                                    bool is_u_shift, bool is_u_narrow,
8297                                    int immh, int immb, int opcode,
8298                                    int rn, int rd)
8299 {
8300     int immhb = immh << 3 | immb;
8301     int size = 32 - clz32(immh) - 1;
8302     int esize = 8 << size;
8303     int shift = (2 * esize) - immhb;
8304     int elements = is_scalar ? 1 : (64 / esize);
8305     bool round = extract32(opcode, 0, 1);
8306     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8307     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8308     TCGv_i32 tcg_rd_narrowed;
8309     TCGv_i64 tcg_final;
8310 
8311     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8312         { gen_helper_neon_narrow_sat_s8,
8313           gen_helper_neon_unarrow_sat8 },
8314         { gen_helper_neon_narrow_sat_s16,
8315           gen_helper_neon_unarrow_sat16 },
8316         { gen_helper_neon_narrow_sat_s32,
8317           gen_helper_neon_unarrow_sat32 },
8318         { NULL, NULL },
8319     };
8320     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8321         gen_helper_neon_narrow_sat_u8,
8322         gen_helper_neon_narrow_sat_u16,
8323         gen_helper_neon_narrow_sat_u32,
8324         NULL
8325     };
8326     NeonGenNarrowEnvFn *narrowfn;
8327 
8328     int i;
8329 
8330     assert(size < 4);
8331 
8332     if (extract32(immh, 3, 1)) {
8333         unallocated_encoding(s);
8334         return;
8335     }
8336 
8337     if (!fp_access_check(s)) {
8338         return;
8339     }
8340 
8341     if (is_u_shift) {
8342         narrowfn = unsigned_narrow_fns[size];
8343     } else {
8344         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8345     }
8346 
8347     tcg_rn = tcg_temp_new_i64();
8348     tcg_rd = tcg_temp_new_i64();
8349     tcg_rd_narrowed = tcg_temp_new_i32();
8350     tcg_final = tcg_temp_new_i64();
8351 
8352     if (round) {
8353         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8354     } else {
8355         tcg_round = NULL;
8356     }
8357 
8358     for (i = 0; i < elements; i++) {
8359         read_vec_element(s, tcg_rn, rn, i, ldop);
8360         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8361                                 false, is_u_shift, size+1, shift);
8362         narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
8363         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8364         if (i == 0) {
8365             tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
8366         } else {
8367             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8368         }
8369     }
8370 
8371     if (!is_q) {
8372         write_vec_element(s, tcg_final, rd, 0, MO_64);
8373     } else {
8374         write_vec_element(s, tcg_final, rd, 1, MO_64);
8375     }
8376     clear_vec_high(s, is_q, rd);
8377 }
8378 
8379 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8380 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8381                              bool src_unsigned, bool dst_unsigned,
8382                              int immh, int immb, int rn, int rd)
8383 {
8384     int immhb = immh << 3 | immb;
8385     int size = 32 - clz32(immh) - 1;
8386     int shift = immhb - (8 << size);
8387     int pass;
8388 
8389     assert(immh != 0);
8390     assert(!(scalar && is_q));
8391 
8392     if (!scalar) {
8393         if (!is_q && extract32(immh, 3, 1)) {
8394             unallocated_encoding(s);
8395             return;
8396         }
8397 
8398         /* Since we use the variable-shift helpers we must
8399          * replicate the shift count into each element of
8400          * the tcg_shift value.
8401          */
8402         switch (size) {
8403         case 0:
8404             shift |= shift << 8;
8405             /* fall through */
8406         case 1:
8407             shift |= shift << 16;
8408             break;
8409         case 2:
8410         case 3:
8411             break;
8412         default:
8413             g_assert_not_reached();
8414         }
8415     }
8416 
8417     if (!fp_access_check(s)) {
8418         return;
8419     }
8420 
8421     if (size == 3) {
8422         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8423         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8424             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8425             { NULL, gen_helper_neon_qshl_u64 },
8426         };
8427         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8428         int maxpass = is_q ? 2 : 1;
8429 
8430         for (pass = 0; pass < maxpass; pass++) {
8431             TCGv_i64 tcg_op = tcg_temp_new_i64();
8432 
8433             read_vec_element(s, tcg_op, rn, pass, MO_64);
8434             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
8435             write_vec_element(s, tcg_op, rd, pass, MO_64);
8436         }
8437         clear_vec_high(s, is_q, rd);
8438     } else {
8439         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8440         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8441             {
8442                 { gen_helper_neon_qshl_s8,
8443                   gen_helper_neon_qshl_s16,
8444                   gen_helper_neon_qshl_s32 },
8445                 { gen_helper_neon_qshlu_s8,
8446                   gen_helper_neon_qshlu_s16,
8447                   gen_helper_neon_qshlu_s32 }
8448             }, {
8449                 { NULL, NULL, NULL },
8450                 { gen_helper_neon_qshl_u8,
8451                   gen_helper_neon_qshl_u16,
8452                   gen_helper_neon_qshl_u32 }
8453             }
8454         };
8455         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8456         MemOp memop = scalar ? size : MO_32;
8457         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8458 
8459         for (pass = 0; pass < maxpass; pass++) {
8460             TCGv_i32 tcg_op = tcg_temp_new_i32();
8461 
8462             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8463             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
8464             if (scalar) {
8465                 switch (size) {
8466                 case 0:
8467                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8468                     break;
8469                 case 1:
8470                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8471                     break;
8472                 case 2:
8473                     break;
8474                 default:
8475                     g_assert_not_reached();
8476                 }
8477                 write_fp_sreg(s, rd, tcg_op);
8478             } else {
8479                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8480             }
8481         }
8482 
8483         if (!scalar) {
8484             clear_vec_high(s, is_q, rd);
8485         }
8486     }
8487 }
8488 
8489 /* Common vector code for handling integer to FP conversion */
8490 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8491                                    int elements, int is_signed,
8492                                    int fracbits, int size)
8493 {
8494     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8495     TCGv_i32 tcg_shift = NULL;
8496 
8497     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8498     int pass;
8499 
8500     if (fracbits || size == MO_64) {
8501         tcg_shift = tcg_constant_i32(fracbits);
8502     }
8503 
8504     if (size == MO_64) {
8505         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8506         TCGv_i64 tcg_double = tcg_temp_new_i64();
8507 
8508         for (pass = 0; pass < elements; pass++) {
8509             read_vec_element(s, tcg_int64, rn, pass, mop);
8510 
8511             if (is_signed) {
8512                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8513                                      tcg_shift, tcg_fpst);
8514             } else {
8515                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8516                                      tcg_shift, tcg_fpst);
8517             }
8518             if (elements == 1) {
8519                 write_fp_dreg(s, rd, tcg_double);
8520             } else {
8521                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8522             }
8523         }
8524     } else {
8525         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8526         TCGv_i32 tcg_float = tcg_temp_new_i32();
8527 
8528         for (pass = 0; pass < elements; pass++) {
8529             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8530 
8531             switch (size) {
8532             case MO_32:
8533                 if (fracbits) {
8534                     if (is_signed) {
8535                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8536                                              tcg_shift, tcg_fpst);
8537                     } else {
8538                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8539                                              tcg_shift, tcg_fpst);
8540                     }
8541                 } else {
8542                     if (is_signed) {
8543                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8544                     } else {
8545                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8546                     }
8547                 }
8548                 break;
8549             case MO_16:
8550                 if (fracbits) {
8551                     if (is_signed) {
8552                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8553                                              tcg_shift, tcg_fpst);
8554                     } else {
8555                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8556                                              tcg_shift, tcg_fpst);
8557                     }
8558                 } else {
8559                     if (is_signed) {
8560                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8561                     } else {
8562                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8563                     }
8564                 }
8565                 break;
8566             default:
8567                 g_assert_not_reached();
8568             }
8569 
8570             if (elements == 1) {
8571                 write_fp_sreg(s, rd, tcg_float);
8572             } else {
8573                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8574             }
8575         }
8576     }
8577 
8578     clear_vec_high(s, elements << size == 16, rd);
8579 }
8580 
8581 /* UCVTF/SCVTF - Integer to FP conversion */
8582 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8583                                          bool is_q, bool is_u,
8584                                          int immh, int immb, int opcode,
8585                                          int rn, int rd)
8586 {
8587     int size, elements, fracbits;
8588     int immhb = immh << 3 | immb;
8589 
8590     if (immh & 8) {
8591         size = MO_64;
8592         if (!is_scalar && !is_q) {
8593             unallocated_encoding(s);
8594             return;
8595         }
8596     } else if (immh & 4) {
8597         size = MO_32;
8598     } else if (immh & 2) {
8599         size = MO_16;
8600         if (!dc_isar_feature(aa64_fp16, s)) {
8601             unallocated_encoding(s);
8602             return;
8603         }
8604     } else {
8605         /* immh == 0 would be a failure of the decode logic */
8606         g_assert(immh == 1);
8607         unallocated_encoding(s);
8608         return;
8609     }
8610 
8611     if (is_scalar) {
8612         elements = 1;
8613     } else {
8614         elements = (8 << is_q) >> size;
8615     }
8616     fracbits = (16 << size) - immhb;
8617 
8618     if (!fp_access_check(s)) {
8619         return;
8620     }
8621 
8622     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8623 }
8624 
8625 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8626 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8627                                          bool is_q, bool is_u,
8628                                          int immh, int immb, int rn, int rd)
8629 {
8630     int immhb = immh << 3 | immb;
8631     int pass, size, fracbits;
8632     TCGv_ptr tcg_fpstatus;
8633     TCGv_i32 tcg_rmode, tcg_shift;
8634 
8635     if (immh & 0x8) {
8636         size = MO_64;
8637         if (!is_scalar && !is_q) {
8638             unallocated_encoding(s);
8639             return;
8640         }
8641     } else if (immh & 0x4) {
8642         size = MO_32;
8643     } else if (immh & 0x2) {
8644         size = MO_16;
8645         if (!dc_isar_feature(aa64_fp16, s)) {
8646             unallocated_encoding(s);
8647             return;
8648         }
8649     } else {
8650         /* Should have split out AdvSIMD modified immediate earlier.  */
8651         assert(immh == 1);
8652         unallocated_encoding(s);
8653         return;
8654     }
8655 
8656     if (!fp_access_check(s)) {
8657         return;
8658     }
8659 
8660     assert(!(is_scalar && is_q));
8661 
8662     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8663     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8664     fracbits = (16 << size) - immhb;
8665     tcg_shift = tcg_constant_i32(fracbits);
8666 
8667     if (size == MO_64) {
8668         int maxpass = is_scalar ? 1 : 2;
8669 
8670         for (pass = 0; pass < maxpass; pass++) {
8671             TCGv_i64 tcg_op = tcg_temp_new_i64();
8672 
8673             read_vec_element(s, tcg_op, rn, pass, MO_64);
8674             if (is_u) {
8675                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8676             } else {
8677                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8678             }
8679             write_vec_element(s, tcg_op, rd, pass, MO_64);
8680         }
8681         clear_vec_high(s, is_q, rd);
8682     } else {
8683         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8684         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8685 
8686         switch (size) {
8687         case MO_16:
8688             if (is_u) {
8689                 fn = gen_helper_vfp_touhh;
8690             } else {
8691                 fn = gen_helper_vfp_toshh;
8692             }
8693             break;
8694         case MO_32:
8695             if (is_u) {
8696                 fn = gen_helper_vfp_touls;
8697             } else {
8698                 fn = gen_helper_vfp_tosls;
8699             }
8700             break;
8701         default:
8702             g_assert_not_reached();
8703         }
8704 
8705         for (pass = 0; pass < maxpass; pass++) {
8706             TCGv_i32 tcg_op = tcg_temp_new_i32();
8707 
8708             read_vec_element_i32(s, tcg_op, rn, pass, size);
8709             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8710             if (is_scalar) {
8711                 write_fp_sreg(s, rd, tcg_op);
8712             } else {
8713                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8714             }
8715         }
8716         if (!is_scalar) {
8717             clear_vec_high(s, is_q, rd);
8718         }
8719     }
8720 
8721     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8722 }
8723 
8724 /* AdvSIMD scalar shift by immediate
8725  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8726  * +-----+---+-------------+------+------+--------+---+------+------+
8727  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8728  * +-----+---+-------------+------+------+--------+---+------+------+
8729  *
8730  * This is the scalar version so it works on a fixed sized registers
8731  */
8732 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8733 {
8734     int rd = extract32(insn, 0, 5);
8735     int rn = extract32(insn, 5, 5);
8736     int opcode = extract32(insn, 11, 5);
8737     int immb = extract32(insn, 16, 3);
8738     int immh = extract32(insn, 19, 4);
8739     bool is_u = extract32(insn, 29, 1);
8740 
8741     if (immh == 0) {
8742         unallocated_encoding(s);
8743         return;
8744     }
8745 
8746     switch (opcode) {
8747     case 0x08: /* SRI */
8748         if (!is_u) {
8749             unallocated_encoding(s);
8750             return;
8751         }
8752         /* fall through */
8753     case 0x00: /* SSHR / USHR */
8754     case 0x02: /* SSRA / USRA */
8755     case 0x04: /* SRSHR / URSHR */
8756     case 0x06: /* SRSRA / URSRA */
8757         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8758         break;
8759     case 0x0a: /* SHL / SLI */
8760         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8761         break;
8762     case 0x1c: /* SCVTF, UCVTF */
8763         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8764                                      opcode, rn, rd);
8765         break;
8766     case 0x10: /* SQSHRUN, SQSHRUN2 */
8767     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8768         if (!is_u) {
8769             unallocated_encoding(s);
8770             return;
8771         }
8772         handle_vec_simd_sqshrn(s, true, false, false, true,
8773                                immh, immb, opcode, rn, rd);
8774         break;
8775     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8776     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8777         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8778                                immh, immb, opcode, rn, rd);
8779         break;
8780     case 0xc: /* SQSHLU */
8781         if (!is_u) {
8782             unallocated_encoding(s);
8783             return;
8784         }
8785         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8786         break;
8787     case 0xe: /* SQSHL, UQSHL */
8788         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8789         break;
8790     case 0x1f: /* FCVTZS, FCVTZU */
8791         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8792         break;
8793     default:
8794         unallocated_encoding(s);
8795         break;
8796     }
8797 }
8798 
8799 /* AdvSIMD scalar three different
8800  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8801  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8802  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8803  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8804  */
8805 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8806 {
8807     bool is_u = extract32(insn, 29, 1);
8808     int size = extract32(insn, 22, 2);
8809     int opcode = extract32(insn, 12, 4);
8810     int rm = extract32(insn, 16, 5);
8811     int rn = extract32(insn, 5, 5);
8812     int rd = extract32(insn, 0, 5);
8813 
8814     if (is_u) {
8815         unallocated_encoding(s);
8816         return;
8817     }
8818 
8819     switch (opcode) {
8820     case 0x9: /* SQDMLAL, SQDMLAL2 */
8821     case 0xb: /* SQDMLSL, SQDMLSL2 */
8822     case 0xd: /* SQDMULL, SQDMULL2 */
8823         if (size == 0 || size == 3) {
8824             unallocated_encoding(s);
8825             return;
8826         }
8827         break;
8828     default:
8829         unallocated_encoding(s);
8830         return;
8831     }
8832 
8833     if (!fp_access_check(s)) {
8834         return;
8835     }
8836 
8837     if (size == 2) {
8838         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8839         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8840         TCGv_i64 tcg_res = tcg_temp_new_i64();
8841 
8842         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8843         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8844 
8845         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8846         gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res);
8847 
8848         switch (opcode) {
8849         case 0xd: /* SQDMULL, SQDMULL2 */
8850             break;
8851         case 0xb: /* SQDMLSL, SQDMLSL2 */
8852             tcg_gen_neg_i64(tcg_res, tcg_res);
8853             /* fall through */
8854         case 0x9: /* SQDMLAL, SQDMLAL2 */
8855             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8856             gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env,
8857                                               tcg_res, tcg_op1);
8858             break;
8859         default:
8860             g_assert_not_reached();
8861         }
8862 
8863         write_fp_dreg(s, rd, tcg_res);
8864     } else {
8865         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8866         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8867         TCGv_i64 tcg_res = tcg_temp_new_i64();
8868 
8869         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8870         gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res);
8871 
8872         switch (opcode) {
8873         case 0xd: /* SQDMULL, SQDMULL2 */
8874             break;
8875         case 0xb: /* SQDMLSL, SQDMLSL2 */
8876             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8877             /* fall through */
8878         case 0x9: /* SQDMLAL, SQDMLAL2 */
8879         {
8880             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8881             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8882             gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env,
8883                                               tcg_res, tcg_op3);
8884             break;
8885         }
8886         default:
8887             g_assert_not_reached();
8888         }
8889 
8890         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8891         write_fp_dreg(s, rd, tcg_res);
8892     }
8893 }
8894 
8895 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8896                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8897 {
8898     /* Handle 64x64->64 opcodes which are shared between the scalar
8899      * and vector 3-same groups. We cover every opcode where size == 3
8900      * is valid in either the three-reg-same (integer, not pairwise)
8901      * or scalar-three-reg-same groups.
8902      */
8903     TCGCond cond;
8904 
8905     switch (opcode) {
8906     case 0x1: /* SQADD */
8907         if (u) {
8908             gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8909         } else {
8910             gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8911         }
8912         break;
8913     case 0x5: /* SQSUB */
8914         if (u) {
8915             gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8916         } else {
8917             gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8918         }
8919         break;
8920     case 0x6: /* CMGT, CMHI */
8921         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8922     do_cmop:
8923         /* 64 bit integer comparison, result = test ? -1 : 0. */
8924         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8925         break;
8926     case 0x7: /* CMGE, CMHS */
8927         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8928         goto do_cmop;
8929     case 0x11: /* CMTST, CMEQ */
8930         if (u) {
8931             cond = TCG_COND_EQ;
8932             goto do_cmop;
8933         }
8934         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8935         break;
8936     case 0x8: /* SSHL, USHL */
8937         if (u) {
8938             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8939         } else {
8940             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8941         }
8942         break;
8943     case 0x9: /* SQSHL, UQSHL */
8944         if (u) {
8945             gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8946         } else {
8947             gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8948         }
8949         break;
8950     case 0xa: /* SRSHL, URSHL */
8951         if (u) {
8952             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8953         } else {
8954             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8955         }
8956         break;
8957     case 0xb: /* SQRSHL, UQRSHL */
8958         if (u) {
8959             gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8960         } else {
8961             gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8962         }
8963         break;
8964     case 0x10: /* ADD, SUB */
8965         if (u) {
8966             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8967         } else {
8968             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8969         }
8970         break;
8971     default:
8972         g_assert_not_reached();
8973     }
8974 }
8975 
8976 /* Handle the 3-same-operands float operations; shared by the scalar
8977  * and vector encodings. The caller must filter out any encodings
8978  * not allocated for the encoding it is dealing with.
8979  */
8980 static void handle_3same_float(DisasContext *s, int size, int elements,
8981                                int fpopcode, int rd, int rn, int rm)
8982 {
8983     int pass;
8984     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8985 
8986     for (pass = 0; pass < elements; pass++) {
8987         if (size) {
8988             /* Double */
8989             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8990             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8991             TCGv_i64 tcg_res = tcg_temp_new_i64();
8992 
8993             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8994             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8995 
8996             switch (fpopcode) {
8997             case 0x39: /* FMLS */
8998                 /* As usual for ARM, separate negation for fused multiply-add */
8999                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
9000                 /* fall through */
9001             case 0x19: /* FMLA */
9002                 read_vec_element(s, tcg_res, rd, pass, MO_64);
9003                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
9004                                        tcg_res, fpst);
9005                 break;
9006             case 0x18: /* FMAXNM */
9007                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9008                 break;
9009             case 0x1a: /* FADD */
9010                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
9011                 break;
9012             case 0x1b: /* FMULX */
9013                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
9014                 break;
9015             case 0x1c: /* FCMEQ */
9016                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9017                 break;
9018             case 0x1e: /* FMAX */
9019                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9020                 break;
9021             case 0x1f: /* FRECPS */
9022                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9023                 break;
9024             case 0x38: /* FMINNM */
9025                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9026                 break;
9027             case 0x3a: /* FSUB */
9028                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9029                 break;
9030             case 0x3e: /* FMIN */
9031                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9032                 break;
9033             case 0x3f: /* FRSQRTS */
9034                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9035                 break;
9036             case 0x5b: /* FMUL */
9037                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9038                 break;
9039             case 0x5c: /* FCMGE */
9040                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9041                 break;
9042             case 0x5d: /* FACGE */
9043                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9044                 break;
9045             case 0x5f: /* FDIV */
9046                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9047                 break;
9048             case 0x7a: /* FABD */
9049                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9050                 gen_helper_vfp_absd(tcg_res, tcg_res);
9051                 break;
9052             case 0x7c: /* FCMGT */
9053                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9054                 break;
9055             case 0x7d: /* FACGT */
9056                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9057                 break;
9058             default:
9059                 g_assert_not_reached();
9060             }
9061 
9062             write_vec_element(s, tcg_res, rd, pass, MO_64);
9063         } else {
9064             /* Single */
9065             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9066             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9067             TCGv_i32 tcg_res = tcg_temp_new_i32();
9068 
9069             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9070             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9071 
9072             switch (fpopcode) {
9073             case 0x39: /* FMLS */
9074                 /* As usual for ARM, separate negation for fused multiply-add */
9075                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9076                 /* fall through */
9077             case 0x19: /* FMLA */
9078                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9079                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9080                                        tcg_res, fpst);
9081                 break;
9082             case 0x1a: /* FADD */
9083                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9084                 break;
9085             case 0x1b: /* FMULX */
9086                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9087                 break;
9088             case 0x1c: /* FCMEQ */
9089                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9090                 break;
9091             case 0x1e: /* FMAX */
9092                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9093                 break;
9094             case 0x1f: /* FRECPS */
9095                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9096                 break;
9097             case 0x18: /* FMAXNM */
9098                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9099                 break;
9100             case 0x38: /* FMINNM */
9101                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9102                 break;
9103             case 0x3a: /* FSUB */
9104                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9105                 break;
9106             case 0x3e: /* FMIN */
9107                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9108                 break;
9109             case 0x3f: /* FRSQRTS */
9110                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9111                 break;
9112             case 0x5b: /* FMUL */
9113                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9114                 break;
9115             case 0x5c: /* FCMGE */
9116                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9117                 break;
9118             case 0x5d: /* FACGE */
9119                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9120                 break;
9121             case 0x5f: /* FDIV */
9122                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9123                 break;
9124             case 0x7a: /* FABD */
9125                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9126                 gen_helper_vfp_abss(tcg_res, tcg_res);
9127                 break;
9128             case 0x7c: /* FCMGT */
9129                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9130                 break;
9131             case 0x7d: /* FACGT */
9132                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9133                 break;
9134             default:
9135                 g_assert_not_reached();
9136             }
9137 
9138             if (elements == 1) {
9139                 /* scalar single so clear high part */
9140                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9141 
9142                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9143                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9144             } else {
9145                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9146             }
9147         }
9148     }
9149 
9150     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9151 }
9152 
9153 /* AdvSIMD scalar three same
9154  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9155  * +-----+---+-----------+------+---+------+--------+---+------+------+
9156  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9157  * +-----+---+-----------+------+---+------+--------+---+------+------+
9158  */
9159 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9160 {
9161     int rd = extract32(insn, 0, 5);
9162     int rn = extract32(insn, 5, 5);
9163     int opcode = extract32(insn, 11, 5);
9164     int rm = extract32(insn, 16, 5);
9165     int size = extract32(insn, 22, 2);
9166     bool u = extract32(insn, 29, 1);
9167     TCGv_i64 tcg_rd;
9168 
9169     if (opcode >= 0x18) {
9170         /* Floating point: U, size[1] and opcode indicate operation */
9171         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9172         switch (fpopcode) {
9173         case 0x1b: /* FMULX */
9174         case 0x1f: /* FRECPS */
9175         case 0x3f: /* FRSQRTS */
9176         case 0x5d: /* FACGE */
9177         case 0x7d: /* FACGT */
9178         case 0x1c: /* FCMEQ */
9179         case 0x5c: /* FCMGE */
9180         case 0x7c: /* FCMGT */
9181         case 0x7a: /* FABD */
9182             break;
9183         default:
9184             unallocated_encoding(s);
9185             return;
9186         }
9187 
9188         if (!fp_access_check(s)) {
9189             return;
9190         }
9191 
9192         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9193         return;
9194     }
9195 
9196     switch (opcode) {
9197     case 0x1: /* SQADD, UQADD */
9198     case 0x5: /* SQSUB, UQSUB */
9199     case 0x9: /* SQSHL, UQSHL */
9200     case 0xb: /* SQRSHL, UQRSHL */
9201         break;
9202     case 0x8: /* SSHL, USHL */
9203     case 0xa: /* SRSHL, URSHL */
9204     case 0x6: /* CMGT, CMHI */
9205     case 0x7: /* CMGE, CMHS */
9206     case 0x11: /* CMTST, CMEQ */
9207     case 0x10: /* ADD, SUB (vector) */
9208         if (size != 3) {
9209             unallocated_encoding(s);
9210             return;
9211         }
9212         break;
9213     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9214         if (size != 1 && size != 2) {
9215             unallocated_encoding(s);
9216             return;
9217         }
9218         break;
9219     default:
9220         unallocated_encoding(s);
9221         return;
9222     }
9223 
9224     if (!fp_access_check(s)) {
9225         return;
9226     }
9227 
9228     tcg_rd = tcg_temp_new_i64();
9229 
9230     if (size == 3) {
9231         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9232         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9233 
9234         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9235     } else {
9236         /* Do a single operation on the lowest element in the vector.
9237          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9238          * no side effects for all these operations.
9239          * OPTME: special-purpose helpers would avoid doing some
9240          * unnecessary work in the helper for the 8 and 16 bit cases.
9241          */
9242         NeonGenTwoOpEnvFn *genenvfn;
9243         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9244         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9245         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9246 
9247         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9248         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9249 
9250         switch (opcode) {
9251         case 0x1: /* SQADD, UQADD */
9252         {
9253             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9254                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9255                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9256                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9257             };
9258             genenvfn = fns[size][u];
9259             break;
9260         }
9261         case 0x5: /* SQSUB, UQSUB */
9262         {
9263             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9264                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9265                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9266                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9267             };
9268             genenvfn = fns[size][u];
9269             break;
9270         }
9271         case 0x9: /* SQSHL, UQSHL */
9272         {
9273             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9274                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9275                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9276                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9277             };
9278             genenvfn = fns[size][u];
9279             break;
9280         }
9281         case 0xb: /* SQRSHL, UQRSHL */
9282         {
9283             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9284                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9285                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9286                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9287             };
9288             genenvfn = fns[size][u];
9289             break;
9290         }
9291         case 0x16: /* SQDMULH, SQRDMULH */
9292         {
9293             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9294                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9295                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9296             };
9297             assert(size == 1 || size == 2);
9298             genenvfn = fns[size - 1][u];
9299             break;
9300         }
9301         default:
9302             g_assert_not_reached();
9303         }
9304 
9305         genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm);
9306         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9307     }
9308 
9309     write_fp_dreg(s, rd, tcg_rd);
9310 }
9311 
9312 /* AdvSIMD scalar three same FP16
9313  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9314  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9315  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9316  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9317  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9318  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9319  */
9320 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9321                                                   uint32_t insn)
9322 {
9323     int rd = extract32(insn, 0, 5);
9324     int rn = extract32(insn, 5, 5);
9325     int opcode = extract32(insn, 11, 3);
9326     int rm = extract32(insn, 16, 5);
9327     bool u = extract32(insn, 29, 1);
9328     bool a = extract32(insn, 23, 1);
9329     int fpopcode = opcode | (a << 3) |  (u << 4);
9330     TCGv_ptr fpst;
9331     TCGv_i32 tcg_op1;
9332     TCGv_i32 tcg_op2;
9333     TCGv_i32 tcg_res;
9334 
9335     switch (fpopcode) {
9336     case 0x03: /* FMULX */
9337     case 0x04: /* FCMEQ (reg) */
9338     case 0x07: /* FRECPS */
9339     case 0x0f: /* FRSQRTS */
9340     case 0x14: /* FCMGE (reg) */
9341     case 0x15: /* FACGE */
9342     case 0x1a: /* FABD */
9343     case 0x1c: /* FCMGT (reg) */
9344     case 0x1d: /* FACGT */
9345         break;
9346     default:
9347         unallocated_encoding(s);
9348         return;
9349     }
9350 
9351     if (!dc_isar_feature(aa64_fp16, s)) {
9352         unallocated_encoding(s);
9353     }
9354 
9355     if (!fp_access_check(s)) {
9356         return;
9357     }
9358 
9359     fpst = fpstatus_ptr(FPST_FPCR_F16);
9360 
9361     tcg_op1 = read_fp_hreg(s, rn);
9362     tcg_op2 = read_fp_hreg(s, rm);
9363     tcg_res = tcg_temp_new_i32();
9364 
9365     switch (fpopcode) {
9366     case 0x03: /* FMULX */
9367         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9368         break;
9369     case 0x04: /* FCMEQ (reg) */
9370         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9371         break;
9372     case 0x07: /* FRECPS */
9373         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9374         break;
9375     case 0x0f: /* FRSQRTS */
9376         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9377         break;
9378     case 0x14: /* FCMGE (reg) */
9379         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9380         break;
9381     case 0x15: /* FACGE */
9382         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9383         break;
9384     case 0x1a: /* FABD */
9385         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9386         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9387         break;
9388     case 0x1c: /* FCMGT (reg) */
9389         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9390         break;
9391     case 0x1d: /* FACGT */
9392         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9393         break;
9394     default:
9395         g_assert_not_reached();
9396     }
9397 
9398     write_fp_sreg(s, rd, tcg_res);
9399 }
9400 
9401 /* AdvSIMD scalar three same extra
9402  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9403  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9404  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9405  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9406  */
9407 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9408                                                    uint32_t insn)
9409 {
9410     int rd = extract32(insn, 0, 5);
9411     int rn = extract32(insn, 5, 5);
9412     int opcode = extract32(insn, 11, 4);
9413     int rm = extract32(insn, 16, 5);
9414     int size = extract32(insn, 22, 2);
9415     bool u = extract32(insn, 29, 1);
9416     TCGv_i32 ele1, ele2, ele3;
9417     TCGv_i64 res;
9418     bool feature;
9419 
9420     switch (u * 16 + opcode) {
9421     case 0x10: /* SQRDMLAH (vector) */
9422     case 0x11: /* SQRDMLSH (vector) */
9423         if (size != 1 && size != 2) {
9424             unallocated_encoding(s);
9425             return;
9426         }
9427         feature = dc_isar_feature(aa64_rdm, s);
9428         break;
9429     default:
9430         unallocated_encoding(s);
9431         return;
9432     }
9433     if (!feature) {
9434         unallocated_encoding(s);
9435         return;
9436     }
9437     if (!fp_access_check(s)) {
9438         return;
9439     }
9440 
9441     /* Do a single operation on the lowest element in the vector.
9442      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9443      * with no side effects for all these operations.
9444      * OPTME: special-purpose helpers would avoid doing some
9445      * unnecessary work in the helper for the 16 bit cases.
9446      */
9447     ele1 = tcg_temp_new_i32();
9448     ele2 = tcg_temp_new_i32();
9449     ele3 = tcg_temp_new_i32();
9450 
9451     read_vec_element_i32(s, ele1, rn, 0, size);
9452     read_vec_element_i32(s, ele2, rm, 0, size);
9453     read_vec_element_i32(s, ele3, rd, 0, size);
9454 
9455     switch (opcode) {
9456     case 0x0: /* SQRDMLAH */
9457         if (size == 1) {
9458             gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3);
9459         } else {
9460             gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3);
9461         }
9462         break;
9463     case 0x1: /* SQRDMLSH */
9464         if (size == 1) {
9465             gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3);
9466         } else {
9467             gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3);
9468         }
9469         break;
9470     default:
9471         g_assert_not_reached();
9472     }
9473 
9474     res = tcg_temp_new_i64();
9475     tcg_gen_extu_i32_i64(res, ele3);
9476     write_fp_dreg(s, rd, res);
9477 }
9478 
9479 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9480                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9481                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9482 {
9483     /* Handle 64->64 opcodes which are shared between the scalar and
9484      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9485      * is valid in either group and also the double-precision fp ops.
9486      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9487      * requires them.
9488      */
9489     TCGCond cond;
9490 
9491     switch (opcode) {
9492     case 0x4: /* CLS, CLZ */
9493         if (u) {
9494             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9495         } else {
9496             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9497         }
9498         break;
9499     case 0x5: /* NOT */
9500         /* This opcode is shared with CNT and RBIT but we have earlier
9501          * enforced that size == 3 if and only if this is the NOT insn.
9502          */
9503         tcg_gen_not_i64(tcg_rd, tcg_rn);
9504         break;
9505     case 0x7: /* SQABS, SQNEG */
9506         if (u) {
9507             gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
9508         } else {
9509             gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
9510         }
9511         break;
9512     case 0xa: /* CMLT */
9513         cond = TCG_COND_LT;
9514     do_cmop:
9515         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9516         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
9517         break;
9518     case 0x8: /* CMGT, CMGE */
9519         cond = u ? TCG_COND_GE : TCG_COND_GT;
9520         goto do_cmop;
9521     case 0x9: /* CMEQ, CMLE */
9522         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9523         goto do_cmop;
9524     case 0xb: /* ABS, NEG */
9525         if (u) {
9526             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9527         } else {
9528             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9529         }
9530         break;
9531     case 0x2f: /* FABS */
9532         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9533         break;
9534     case 0x6f: /* FNEG */
9535         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9536         break;
9537     case 0x7f: /* FSQRT */
9538         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
9539         break;
9540     case 0x1a: /* FCVTNS */
9541     case 0x1b: /* FCVTMS */
9542     case 0x1c: /* FCVTAS */
9543     case 0x3a: /* FCVTPS */
9544     case 0x3b: /* FCVTZS */
9545         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9546         break;
9547     case 0x5a: /* FCVTNU */
9548     case 0x5b: /* FCVTMU */
9549     case 0x5c: /* FCVTAU */
9550     case 0x7a: /* FCVTPU */
9551     case 0x7b: /* FCVTZU */
9552         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9553         break;
9554     case 0x18: /* FRINTN */
9555     case 0x19: /* FRINTM */
9556     case 0x38: /* FRINTP */
9557     case 0x39: /* FRINTZ */
9558     case 0x58: /* FRINTA */
9559     case 0x79: /* FRINTI */
9560         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9561         break;
9562     case 0x59: /* FRINTX */
9563         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9564         break;
9565     case 0x1e: /* FRINT32Z */
9566     case 0x5e: /* FRINT32X */
9567         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9568         break;
9569     case 0x1f: /* FRINT64Z */
9570     case 0x5f: /* FRINT64X */
9571         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9572         break;
9573     default:
9574         g_assert_not_reached();
9575     }
9576 }
9577 
9578 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9579                                    bool is_scalar, bool is_u, bool is_q,
9580                                    int size, int rn, int rd)
9581 {
9582     bool is_double = (size == MO_64);
9583     TCGv_ptr fpst;
9584 
9585     if (!fp_access_check(s)) {
9586         return;
9587     }
9588 
9589     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9590 
9591     if (is_double) {
9592         TCGv_i64 tcg_op = tcg_temp_new_i64();
9593         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9594         TCGv_i64 tcg_res = tcg_temp_new_i64();
9595         NeonGenTwoDoubleOpFn *genfn;
9596         bool swap = false;
9597         int pass;
9598 
9599         switch (opcode) {
9600         case 0x2e: /* FCMLT (zero) */
9601             swap = true;
9602             /* fallthrough */
9603         case 0x2c: /* FCMGT (zero) */
9604             genfn = gen_helper_neon_cgt_f64;
9605             break;
9606         case 0x2d: /* FCMEQ (zero) */
9607             genfn = gen_helper_neon_ceq_f64;
9608             break;
9609         case 0x6d: /* FCMLE (zero) */
9610             swap = true;
9611             /* fall through */
9612         case 0x6c: /* FCMGE (zero) */
9613             genfn = gen_helper_neon_cge_f64;
9614             break;
9615         default:
9616             g_assert_not_reached();
9617         }
9618 
9619         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9620             read_vec_element(s, tcg_op, rn, pass, MO_64);
9621             if (swap) {
9622                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9623             } else {
9624                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9625             }
9626             write_vec_element(s, tcg_res, rd, pass, MO_64);
9627         }
9628 
9629         clear_vec_high(s, !is_scalar, rd);
9630     } else {
9631         TCGv_i32 tcg_op = tcg_temp_new_i32();
9632         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9633         TCGv_i32 tcg_res = tcg_temp_new_i32();
9634         NeonGenTwoSingleOpFn *genfn;
9635         bool swap = false;
9636         int pass, maxpasses;
9637 
9638         if (size == MO_16) {
9639             switch (opcode) {
9640             case 0x2e: /* FCMLT (zero) */
9641                 swap = true;
9642                 /* fall through */
9643             case 0x2c: /* FCMGT (zero) */
9644                 genfn = gen_helper_advsimd_cgt_f16;
9645                 break;
9646             case 0x2d: /* FCMEQ (zero) */
9647                 genfn = gen_helper_advsimd_ceq_f16;
9648                 break;
9649             case 0x6d: /* FCMLE (zero) */
9650                 swap = true;
9651                 /* fall through */
9652             case 0x6c: /* FCMGE (zero) */
9653                 genfn = gen_helper_advsimd_cge_f16;
9654                 break;
9655             default:
9656                 g_assert_not_reached();
9657             }
9658         } else {
9659             switch (opcode) {
9660             case 0x2e: /* FCMLT (zero) */
9661                 swap = true;
9662                 /* fall through */
9663             case 0x2c: /* FCMGT (zero) */
9664                 genfn = gen_helper_neon_cgt_f32;
9665                 break;
9666             case 0x2d: /* FCMEQ (zero) */
9667                 genfn = gen_helper_neon_ceq_f32;
9668                 break;
9669             case 0x6d: /* FCMLE (zero) */
9670                 swap = true;
9671                 /* fall through */
9672             case 0x6c: /* FCMGE (zero) */
9673                 genfn = gen_helper_neon_cge_f32;
9674                 break;
9675             default:
9676                 g_assert_not_reached();
9677             }
9678         }
9679 
9680         if (is_scalar) {
9681             maxpasses = 1;
9682         } else {
9683             int vector_size = 8 << is_q;
9684             maxpasses = vector_size >> size;
9685         }
9686 
9687         for (pass = 0; pass < maxpasses; pass++) {
9688             read_vec_element_i32(s, tcg_op, rn, pass, size);
9689             if (swap) {
9690                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9691             } else {
9692                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9693             }
9694             if (is_scalar) {
9695                 write_fp_sreg(s, rd, tcg_res);
9696             } else {
9697                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9698             }
9699         }
9700 
9701         if (!is_scalar) {
9702             clear_vec_high(s, is_q, rd);
9703         }
9704     }
9705 }
9706 
9707 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9708                                     bool is_scalar, bool is_u, bool is_q,
9709                                     int size, int rn, int rd)
9710 {
9711     bool is_double = (size == 3);
9712     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9713 
9714     if (is_double) {
9715         TCGv_i64 tcg_op = tcg_temp_new_i64();
9716         TCGv_i64 tcg_res = tcg_temp_new_i64();
9717         int pass;
9718 
9719         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9720             read_vec_element(s, tcg_op, rn, pass, MO_64);
9721             switch (opcode) {
9722             case 0x3d: /* FRECPE */
9723                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9724                 break;
9725             case 0x3f: /* FRECPX */
9726                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9727                 break;
9728             case 0x7d: /* FRSQRTE */
9729                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9730                 break;
9731             default:
9732                 g_assert_not_reached();
9733             }
9734             write_vec_element(s, tcg_res, rd, pass, MO_64);
9735         }
9736         clear_vec_high(s, !is_scalar, rd);
9737     } else {
9738         TCGv_i32 tcg_op = tcg_temp_new_i32();
9739         TCGv_i32 tcg_res = tcg_temp_new_i32();
9740         int pass, maxpasses;
9741 
9742         if (is_scalar) {
9743             maxpasses = 1;
9744         } else {
9745             maxpasses = is_q ? 4 : 2;
9746         }
9747 
9748         for (pass = 0; pass < maxpasses; pass++) {
9749             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9750 
9751             switch (opcode) {
9752             case 0x3c: /* URECPE */
9753                 gen_helper_recpe_u32(tcg_res, tcg_op);
9754                 break;
9755             case 0x3d: /* FRECPE */
9756                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9757                 break;
9758             case 0x3f: /* FRECPX */
9759                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9760                 break;
9761             case 0x7d: /* FRSQRTE */
9762                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9763                 break;
9764             default:
9765                 g_assert_not_reached();
9766             }
9767 
9768             if (is_scalar) {
9769                 write_fp_sreg(s, rd, tcg_res);
9770             } else {
9771                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9772             }
9773         }
9774         if (!is_scalar) {
9775             clear_vec_high(s, is_q, rd);
9776         }
9777     }
9778 }
9779 
9780 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9781                                 int opcode, bool u, bool is_q,
9782                                 int size, int rn, int rd)
9783 {
9784     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9785      * in the source becomes a size element in the destination).
9786      */
9787     int pass;
9788     TCGv_i32 tcg_res[2];
9789     int destelt = is_q ? 2 : 0;
9790     int passes = scalar ? 1 : 2;
9791 
9792     if (scalar) {
9793         tcg_res[1] = tcg_constant_i32(0);
9794     }
9795 
9796     for (pass = 0; pass < passes; pass++) {
9797         TCGv_i64 tcg_op = tcg_temp_new_i64();
9798         NeonGenNarrowFn *genfn = NULL;
9799         NeonGenNarrowEnvFn *genenvfn = NULL;
9800 
9801         if (scalar) {
9802             read_vec_element(s, tcg_op, rn, pass, size + 1);
9803         } else {
9804             read_vec_element(s, tcg_op, rn, pass, MO_64);
9805         }
9806         tcg_res[pass] = tcg_temp_new_i32();
9807 
9808         switch (opcode) {
9809         case 0x12: /* XTN, SQXTUN */
9810         {
9811             static NeonGenNarrowFn * const xtnfns[3] = {
9812                 gen_helper_neon_narrow_u8,
9813                 gen_helper_neon_narrow_u16,
9814                 tcg_gen_extrl_i64_i32,
9815             };
9816             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9817                 gen_helper_neon_unarrow_sat8,
9818                 gen_helper_neon_unarrow_sat16,
9819                 gen_helper_neon_unarrow_sat32,
9820             };
9821             if (u) {
9822                 genenvfn = sqxtunfns[size];
9823             } else {
9824                 genfn = xtnfns[size];
9825             }
9826             break;
9827         }
9828         case 0x14: /* SQXTN, UQXTN */
9829         {
9830             static NeonGenNarrowEnvFn * const fns[3][2] = {
9831                 { gen_helper_neon_narrow_sat_s8,
9832                   gen_helper_neon_narrow_sat_u8 },
9833                 { gen_helper_neon_narrow_sat_s16,
9834                   gen_helper_neon_narrow_sat_u16 },
9835                 { gen_helper_neon_narrow_sat_s32,
9836                   gen_helper_neon_narrow_sat_u32 },
9837             };
9838             genenvfn = fns[size][u];
9839             break;
9840         }
9841         case 0x16: /* FCVTN, FCVTN2 */
9842             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9843             if (size == 2) {
9844                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
9845             } else {
9846                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9847                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9848                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9849                 TCGv_i32 ahp = get_ahp_flag();
9850 
9851                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9852                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9853                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9854                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9855             }
9856             break;
9857         case 0x36: /* BFCVTN, BFCVTN2 */
9858             {
9859                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9860                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9861             }
9862             break;
9863         case 0x56:  /* FCVTXN, FCVTXN2 */
9864             /* 64 bit to 32 bit float conversion
9865              * with von Neumann rounding (round to odd)
9866              */
9867             assert(size == 2);
9868             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
9869             break;
9870         default:
9871             g_assert_not_reached();
9872         }
9873 
9874         if (genfn) {
9875             genfn(tcg_res[pass], tcg_op);
9876         } else if (genenvfn) {
9877             genenvfn(tcg_res[pass], tcg_env, tcg_op);
9878         }
9879     }
9880 
9881     for (pass = 0; pass < 2; pass++) {
9882         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9883     }
9884     clear_vec_high(s, is_q, rd);
9885 }
9886 
9887 /* Remaining saturating accumulating ops */
9888 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9889                                 bool is_q, int size, int rn, int rd)
9890 {
9891     bool is_double = (size == 3);
9892 
9893     if (is_double) {
9894         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9895         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9896         int pass;
9897 
9898         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9899             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9900             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9901 
9902             if (is_u) { /* USQADD */
9903                 gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9904             } else { /* SUQADD */
9905                 gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9906             }
9907             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9908         }
9909         clear_vec_high(s, !is_scalar, rd);
9910     } else {
9911         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9912         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9913         int pass, maxpasses;
9914 
9915         if (is_scalar) {
9916             maxpasses = 1;
9917         } else {
9918             maxpasses = is_q ? 4 : 2;
9919         }
9920 
9921         for (pass = 0; pass < maxpasses; pass++) {
9922             if (is_scalar) {
9923                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9924                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9925             } else {
9926                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9927                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9928             }
9929 
9930             if (is_u) { /* USQADD */
9931                 switch (size) {
9932                 case 0:
9933                     gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9934                     break;
9935                 case 1:
9936                     gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9937                     break;
9938                 case 2:
9939                     gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9940                     break;
9941                 default:
9942                     g_assert_not_reached();
9943                 }
9944             } else { /* SUQADD */
9945                 switch (size) {
9946                 case 0:
9947                     gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9948                     break;
9949                 case 1:
9950                     gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9951                     break;
9952                 case 2:
9953                     gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9954                     break;
9955                 default:
9956                     g_assert_not_reached();
9957                 }
9958             }
9959 
9960             if (is_scalar) {
9961                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9962             }
9963             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9964         }
9965         clear_vec_high(s, is_q, rd);
9966     }
9967 }
9968 
9969 /* AdvSIMD scalar two reg misc
9970  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9971  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9972  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9973  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9974  */
9975 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9976 {
9977     int rd = extract32(insn, 0, 5);
9978     int rn = extract32(insn, 5, 5);
9979     int opcode = extract32(insn, 12, 5);
9980     int size = extract32(insn, 22, 2);
9981     bool u = extract32(insn, 29, 1);
9982     bool is_fcvt = false;
9983     int rmode;
9984     TCGv_i32 tcg_rmode;
9985     TCGv_ptr tcg_fpstatus;
9986 
9987     switch (opcode) {
9988     case 0x3: /* USQADD / SUQADD*/
9989         if (!fp_access_check(s)) {
9990             return;
9991         }
9992         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9993         return;
9994     case 0x7: /* SQABS / SQNEG */
9995         break;
9996     case 0xa: /* CMLT */
9997         if (u) {
9998             unallocated_encoding(s);
9999             return;
10000         }
10001         /* fall through */
10002     case 0x8: /* CMGT, CMGE */
10003     case 0x9: /* CMEQ, CMLE */
10004     case 0xb: /* ABS, NEG */
10005         if (size != 3) {
10006             unallocated_encoding(s);
10007             return;
10008         }
10009         break;
10010     case 0x12: /* SQXTUN */
10011         if (!u) {
10012             unallocated_encoding(s);
10013             return;
10014         }
10015         /* fall through */
10016     case 0x14: /* SQXTN, UQXTN */
10017         if (size == 3) {
10018             unallocated_encoding(s);
10019             return;
10020         }
10021         if (!fp_access_check(s)) {
10022             return;
10023         }
10024         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10025         return;
10026     case 0xc ... 0xf:
10027     case 0x16 ... 0x1d:
10028     case 0x1f:
10029         /* Floating point: U, size[1] and opcode indicate operation;
10030          * size[0] indicates single or double precision.
10031          */
10032         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10033         size = extract32(size, 0, 1) ? 3 : 2;
10034         switch (opcode) {
10035         case 0x2c: /* FCMGT (zero) */
10036         case 0x2d: /* FCMEQ (zero) */
10037         case 0x2e: /* FCMLT (zero) */
10038         case 0x6c: /* FCMGE (zero) */
10039         case 0x6d: /* FCMLE (zero) */
10040             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10041             return;
10042         case 0x1d: /* SCVTF */
10043         case 0x5d: /* UCVTF */
10044         {
10045             bool is_signed = (opcode == 0x1d);
10046             if (!fp_access_check(s)) {
10047                 return;
10048             }
10049             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10050             return;
10051         }
10052         case 0x3d: /* FRECPE */
10053         case 0x3f: /* FRECPX */
10054         case 0x7d: /* FRSQRTE */
10055             if (!fp_access_check(s)) {
10056                 return;
10057             }
10058             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10059             return;
10060         case 0x1a: /* FCVTNS */
10061         case 0x1b: /* FCVTMS */
10062         case 0x3a: /* FCVTPS */
10063         case 0x3b: /* FCVTZS */
10064         case 0x5a: /* FCVTNU */
10065         case 0x5b: /* FCVTMU */
10066         case 0x7a: /* FCVTPU */
10067         case 0x7b: /* FCVTZU */
10068             is_fcvt = true;
10069             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10070             break;
10071         case 0x1c: /* FCVTAS */
10072         case 0x5c: /* FCVTAU */
10073             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10074             is_fcvt = true;
10075             rmode = FPROUNDING_TIEAWAY;
10076             break;
10077         case 0x56: /* FCVTXN, FCVTXN2 */
10078             if (size == 2) {
10079                 unallocated_encoding(s);
10080                 return;
10081             }
10082             if (!fp_access_check(s)) {
10083                 return;
10084             }
10085             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10086             return;
10087         default:
10088             unallocated_encoding(s);
10089             return;
10090         }
10091         break;
10092     default:
10093         unallocated_encoding(s);
10094         return;
10095     }
10096 
10097     if (!fp_access_check(s)) {
10098         return;
10099     }
10100 
10101     if (is_fcvt) {
10102         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10103         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10104     } else {
10105         tcg_fpstatus = NULL;
10106         tcg_rmode = NULL;
10107     }
10108 
10109     if (size == 3) {
10110         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10111         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10112 
10113         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10114         write_fp_dreg(s, rd, tcg_rd);
10115     } else {
10116         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10117         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10118 
10119         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10120 
10121         switch (opcode) {
10122         case 0x7: /* SQABS, SQNEG */
10123         {
10124             NeonGenOneOpEnvFn *genfn;
10125             static NeonGenOneOpEnvFn * const fns[3][2] = {
10126                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10127                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10128                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10129             };
10130             genfn = fns[size][u];
10131             genfn(tcg_rd, tcg_env, tcg_rn);
10132             break;
10133         }
10134         case 0x1a: /* FCVTNS */
10135         case 0x1b: /* FCVTMS */
10136         case 0x1c: /* FCVTAS */
10137         case 0x3a: /* FCVTPS */
10138         case 0x3b: /* FCVTZS */
10139             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10140                                  tcg_fpstatus);
10141             break;
10142         case 0x5a: /* FCVTNU */
10143         case 0x5b: /* FCVTMU */
10144         case 0x5c: /* FCVTAU */
10145         case 0x7a: /* FCVTPU */
10146         case 0x7b: /* FCVTZU */
10147             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10148                                  tcg_fpstatus);
10149             break;
10150         default:
10151             g_assert_not_reached();
10152         }
10153 
10154         write_fp_sreg(s, rd, tcg_rd);
10155     }
10156 
10157     if (is_fcvt) {
10158         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10159     }
10160 }
10161 
10162 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10163 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10164                                  int immh, int immb, int opcode, int rn, int rd)
10165 {
10166     int size = 32 - clz32(immh) - 1;
10167     int immhb = immh << 3 | immb;
10168     int shift = 2 * (8 << size) - immhb;
10169     GVecGen2iFn *gvec_fn;
10170 
10171     if (extract32(immh, 3, 1) && !is_q) {
10172         unallocated_encoding(s);
10173         return;
10174     }
10175     tcg_debug_assert(size <= 3);
10176 
10177     if (!fp_access_check(s)) {
10178         return;
10179     }
10180 
10181     switch (opcode) {
10182     case 0x02: /* SSRA / USRA (accumulate) */
10183         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10184         break;
10185 
10186     case 0x08: /* SRI */
10187         gvec_fn = gen_gvec_sri;
10188         break;
10189 
10190     case 0x00: /* SSHR / USHR */
10191         if (is_u) {
10192             if (shift == 8 << size) {
10193                 /* Shift count the same size as element size produces zero.  */
10194                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10195                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10196                 return;
10197             }
10198             gvec_fn = tcg_gen_gvec_shri;
10199         } else {
10200             /* Shift count the same size as element size produces all sign.  */
10201             if (shift == 8 << size) {
10202                 shift -= 1;
10203             }
10204             gvec_fn = tcg_gen_gvec_sari;
10205         }
10206         break;
10207 
10208     case 0x04: /* SRSHR / URSHR (rounding) */
10209         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10210         break;
10211 
10212     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10213         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10214         break;
10215 
10216     default:
10217         g_assert_not_reached();
10218     }
10219 
10220     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10221 }
10222 
10223 /* SHL/SLI - Vector shift left */
10224 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10225                                  int immh, int immb, int opcode, int rn, int rd)
10226 {
10227     int size = 32 - clz32(immh) - 1;
10228     int immhb = immh << 3 | immb;
10229     int shift = immhb - (8 << size);
10230 
10231     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10232     assert(size >= 0 && size <= 3);
10233 
10234     if (extract32(immh, 3, 1) && !is_q) {
10235         unallocated_encoding(s);
10236         return;
10237     }
10238 
10239     if (!fp_access_check(s)) {
10240         return;
10241     }
10242 
10243     if (insert) {
10244         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10245     } else {
10246         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10247     }
10248 }
10249 
10250 /* USHLL/SHLL - Vector shift left with widening */
10251 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10252                                  int immh, int immb, int opcode, int rn, int rd)
10253 {
10254     int size = 32 - clz32(immh) - 1;
10255     int immhb = immh << 3 | immb;
10256     int shift = immhb - (8 << size);
10257     int dsize = 64;
10258     int esize = 8 << size;
10259     int elements = dsize/esize;
10260     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10261     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10262     int i;
10263 
10264     if (size >= 3) {
10265         unallocated_encoding(s);
10266         return;
10267     }
10268 
10269     if (!fp_access_check(s)) {
10270         return;
10271     }
10272 
10273     /* For the LL variants the store is larger than the load,
10274      * so if rd == rn we would overwrite parts of our input.
10275      * So load everything right now and use shifts in the main loop.
10276      */
10277     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10278 
10279     for (i = 0; i < elements; i++) {
10280         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10281         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10282         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10283         write_vec_element(s, tcg_rd, rd, i, size + 1);
10284     }
10285 }
10286 
10287 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10288 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10289                                  int immh, int immb, int opcode, int rn, int rd)
10290 {
10291     int immhb = immh << 3 | immb;
10292     int size = 32 - clz32(immh) - 1;
10293     int dsize = 64;
10294     int esize = 8 << size;
10295     int elements = dsize/esize;
10296     int shift = (2 * esize) - immhb;
10297     bool round = extract32(opcode, 0, 1);
10298     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10299     TCGv_i64 tcg_round;
10300     int i;
10301 
10302     if (extract32(immh, 3, 1)) {
10303         unallocated_encoding(s);
10304         return;
10305     }
10306 
10307     if (!fp_access_check(s)) {
10308         return;
10309     }
10310 
10311     tcg_rn = tcg_temp_new_i64();
10312     tcg_rd = tcg_temp_new_i64();
10313     tcg_final = tcg_temp_new_i64();
10314     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10315 
10316     if (round) {
10317         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10318     } else {
10319         tcg_round = NULL;
10320     }
10321 
10322     for (i = 0; i < elements; i++) {
10323         read_vec_element(s, tcg_rn, rn, i, size+1);
10324         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10325                                 false, true, size+1, shift);
10326 
10327         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10328     }
10329 
10330     if (!is_q) {
10331         write_vec_element(s, tcg_final, rd, 0, MO_64);
10332     } else {
10333         write_vec_element(s, tcg_final, rd, 1, MO_64);
10334     }
10335 
10336     clear_vec_high(s, is_q, rd);
10337 }
10338 
10339 
10340 /* AdvSIMD shift by immediate
10341  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10342  * +---+---+---+-------------+------+------+--------+---+------+------+
10343  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10344  * +---+---+---+-------------+------+------+--------+---+------+------+
10345  */
10346 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10347 {
10348     int rd = extract32(insn, 0, 5);
10349     int rn = extract32(insn, 5, 5);
10350     int opcode = extract32(insn, 11, 5);
10351     int immb = extract32(insn, 16, 3);
10352     int immh = extract32(insn, 19, 4);
10353     bool is_u = extract32(insn, 29, 1);
10354     bool is_q = extract32(insn, 30, 1);
10355 
10356     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10357     assert(immh != 0);
10358 
10359     switch (opcode) {
10360     case 0x08: /* SRI */
10361         if (!is_u) {
10362             unallocated_encoding(s);
10363             return;
10364         }
10365         /* fall through */
10366     case 0x00: /* SSHR / USHR */
10367     case 0x02: /* SSRA / USRA (accumulate) */
10368     case 0x04: /* SRSHR / URSHR (rounding) */
10369     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10370         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10371         break;
10372     case 0x0a: /* SHL / SLI */
10373         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10374         break;
10375     case 0x10: /* SHRN */
10376     case 0x11: /* RSHRN / SQRSHRUN */
10377         if (is_u) {
10378             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10379                                    opcode, rn, rd);
10380         } else {
10381             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10382         }
10383         break;
10384     case 0x12: /* SQSHRN / UQSHRN */
10385     case 0x13: /* SQRSHRN / UQRSHRN */
10386         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10387                                opcode, rn, rd);
10388         break;
10389     case 0x14: /* SSHLL / USHLL */
10390         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10391         break;
10392     case 0x1c: /* SCVTF / UCVTF */
10393         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10394                                      opcode, rn, rd);
10395         break;
10396     case 0xc: /* SQSHLU */
10397         if (!is_u) {
10398             unallocated_encoding(s);
10399             return;
10400         }
10401         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10402         break;
10403     case 0xe: /* SQSHL, UQSHL */
10404         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10405         break;
10406     case 0x1f: /* FCVTZS/ FCVTZU */
10407         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10408         return;
10409     default:
10410         unallocated_encoding(s);
10411         return;
10412     }
10413 }
10414 
10415 /* Generate code to do a "long" addition or subtraction, ie one done in
10416  * TCGv_i64 on vector lanes twice the width specified by size.
10417  */
10418 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10419                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10420 {
10421     static NeonGenTwo64OpFn * const fns[3][2] = {
10422         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10423         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10424         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10425     };
10426     NeonGenTwo64OpFn *genfn;
10427     assert(size < 3);
10428 
10429     genfn = fns[size][is_sub];
10430     genfn(tcg_res, tcg_op1, tcg_op2);
10431 }
10432 
10433 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10434                                 int opcode, int rd, int rn, int rm)
10435 {
10436     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10437     TCGv_i64 tcg_res[2];
10438     int pass, accop;
10439 
10440     tcg_res[0] = tcg_temp_new_i64();
10441     tcg_res[1] = tcg_temp_new_i64();
10442 
10443     /* Does this op do an adding accumulate, a subtracting accumulate,
10444      * or no accumulate at all?
10445      */
10446     switch (opcode) {
10447     case 5:
10448     case 8:
10449     case 9:
10450         accop = 1;
10451         break;
10452     case 10:
10453     case 11:
10454         accop = -1;
10455         break;
10456     default:
10457         accop = 0;
10458         break;
10459     }
10460 
10461     if (accop != 0) {
10462         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10463         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10464     }
10465 
10466     /* size == 2 means two 32x32->64 operations; this is worth special
10467      * casing because we can generally handle it inline.
10468      */
10469     if (size == 2) {
10470         for (pass = 0; pass < 2; pass++) {
10471             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10472             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10473             TCGv_i64 tcg_passres;
10474             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10475 
10476             int elt = pass + is_q * 2;
10477 
10478             read_vec_element(s, tcg_op1, rn, elt, memop);
10479             read_vec_element(s, tcg_op2, rm, elt, memop);
10480 
10481             if (accop == 0) {
10482                 tcg_passres = tcg_res[pass];
10483             } else {
10484                 tcg_passres = tcg_temp_new_i64();
10485             }
10486 
10487             switch (opcode) {
10488             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10489                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10490                 break;
10491             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10492                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10493                 break;
10494             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10495             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10496             {
10497                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10498                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10499 
10500                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10501                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10502                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10503                                     tcg_passres,
10504                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10505                 break;
10506             }
10507             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10508             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10509             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10510                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10511                 break;
10512             case 9: /* SQDMLAL, SQDMLAL2 */
10513             case 11: /* SQDMLSL, SQDMLSL2 */
10514             case 13: /* SQDMULL, SQDMULL2 */
10515                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10516                 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
10517                                                   tcg_passres, tcg_passres);
10518                 break;
10519             default:
10520                 g_assert_not_reached();
10521             }
10522 
10523             if (opcode == 9 || opcode == 11) {
10524                 /* saturating accumulate ops */
10525                 if (accop < 0) {
10526                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10527                 }
10528                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
10529                                                   tcg_res[pass], tcg_passres);
10530             } else if (accop > 0) {
10531                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10532             } else if (accop < 0) {
10533                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10534             }
10535         }
10536     } else {
10537         /* size 0 or 1, generally helper functions */
10538         for (pass = 0; pass < 2; pass++) {
10539             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10540             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10541             TCGv_i64 tcg_passres;
10542             int elt = pass + is_q * 2;
10543 
10544             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10545             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10546 
10547             if (accop == 0) {
10548                 tcg_passres = tcg_res[pass];
10549             } else {
10550                 tcg_passres = tcg_temp_new_i64();
10551             }
10552 
10553             switch (opcode) {
10554             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10555             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10556             {
10557                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10558                 static NeonGenWidenFn * const widenfns[2][2] = {
10559                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10560                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10561                 };
10562                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10563 
10564                 widenfn(tcg_op2_64, tcg_op2);
10565                 widenfn(tcg_passres, tcg_op1);
10566                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10567                               tcg_passres, tcg_op2_64);
10568                 break;
10569             }
10570             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10571             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10572                 if (size == 0) {
10573                     if (is_u) {
10574                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10575                     } else {
10576                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10577                     }
10578                 } else {
10579                     if (is_u) {
10580                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10581                     } else {
10582                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10583                     }
10584                 }
10585                 break;
10586             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10587             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10588             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10589                 if (size == 0) {
10590                     if (is_u) {
10591                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10592                     } else {
10593                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10594                     }
10595                 } else {
10596                     if (is_u) {
10597                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10598                     } else {
10599                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10600                     }
10601                 }
10602                 break;
10603             case 9: /* SQDMLAL, SQDMLAL2 */
10604             case 11: /* SQDMLSL, SQDMLSL2 */
10605             case 13: /* SQDMULL, SQDMULL2 */
10606                 assert(size == 1);
10607                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10608                 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
10609                                                   tcg_passres, tcg_passres);
10610                 break;
10611             default:
10612                 g_assert_not_reached();
10613             }
10614 
10615             if (accop != 0) {
10616                 if (opcode == 9 || opcode == 11) {
10617                     /* saturating accumulate ops */
10618                     if (accop < 0) {
10619                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10620                     }
10621                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
10622                                                       tcg_res[pass],
10623                                                       tcg_passres);
10624                 } else {
10625                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10626                                   tcg_res[pass], tcg_passres);
10627                 }
10628             }
10629         }
10630     }
10631 
10632     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10633     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10634 }
10635 
10636 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10637                             int opcode, int rd, int rn, int rm)
10638 {
10639     TCGv_i64 tcg_res[2];
10640     int part = is_q ? 2 : 0;
10641     int pass;
10642 
10643     for (pass = 0; pass < 2; pass++) {
10644         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10645         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10646         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10647         static NeonGenWidenFn * const widenfns[3][2] = {
10648             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10649             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10650             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10651         };
10652         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10653 
10654         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10655         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10656         widenfn(tcg_op2_wide, tcg_op2);
10657         tcg_res[pass] = tcg_temp_new_i64();
10658         gen_neon_addl(size, (opcode == 3),
10659                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10660     }
10661 
10662     for (pass = 0; pass < 2; pass++) {
10663         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10664     }
10665 }
10666 
10667 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10668 {
10669     tcg_gen_addi_i64(in, in, 1U << 31);
10670     tcg_gen_extrh_i64_i32(res, in);
10671 }
10672 
10673 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10674                                  int opcode, int rd, int rn, int rm)
10675 {
10676     TCGv_i32 tcg_res[2];
10677     int part = is_q ? 2 : 0;
10678     int pass;
10679 
10680     for (pass = 0; pass < 2; pass++) {
10681         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10682         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10683         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10684         static NeonGenNarrowFn * const narrowfns[3][2] = {
10685             { gen_helper_neon_narrow_high_u8,
10686               gen_helper_neon_narrow_round_high_u8 },
10687             { gen_helper_neon_narrow_high_u16,
10688               gen_helper_neon_narrow_round_high_u16 },
10689             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10690         };
10691         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10692 
10693         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10694         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10695 
10696         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10697 
10698         tcg_res[pass] = tcg_temp_new_i32();
10699         gennarrow(tcg_res[pass], tcg_wideres);
10700     }
10701 
10702     for (pass = 0; pass < 2; pass++) {
10703         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10704     }
10705     clear_vec_high(s, is_q, rd);
10706 }
10707 
10708 /* AdvSIMD three different
10709  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10710  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10711  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10712  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10713  */
10714 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10715 {
10716     /* Instructions in this group fall into three basic classes
10717      * (in each case with the operation working on each element in
10718      * the input vectors):
10719      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10720      *     128 bit input)
10721      * (2) wide 64 x 128 -> 128
10722      * (3) narrowing 128 x 128 -> 64
10723      * Here we do initial decode, catch unallocated cases and
10724      * dispatch to separate functions for each class.
10725      */
10726     int is_q = extract32(insn, 30, 1);
10727     int is_u = extract32(insn, 29, 1);
10728     int size = extract32(insn, 22, 2);
10729     int opcode = extract32(insn, 12, 4);
10730     int rm = extract32(insn, 16, 5);
10731     int rn = extract32(insn, 5, 5);
10732     int rd = extract32(insn, 0, 5);
10733 
10734     switch (opcode) {
10735     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10736     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10737         /* 64 x 128 -> 128 */
10738         if (size == 3) {
10739             unallocated_encoding(s);
10740             return;
10741         }
10742         if (!fp_access_check(s)) {
10743             return;
10744         }
10745         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10746         break;
10747     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10748     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10749         /* 128 x 128 -> 64 */
10750         if (size == 3) {
10751             unallocated_encoding(s);
10752             return;
10753         }
10754         if (!fp_access_check(s)) {
10755             return;
10756         }
10757         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10758         break;
10759     case 14: /* PMULL, PMULL2 */
10760         if (is_u) {
10761             unallocated_encoding(s);
10762             return;
10763         }
10764         switch (size) {
10765         case 0: /* PMULL.P8 */
10766             if (!fp_access_check(s)) {
10767                 return;
10768             }
10769             /* The Q field specifies lo/hi half input for this insn.  */
10770             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10771                              gen_helper_neon_pmull_h);
10772             break;
10773 
10774         case 3: /* PMULL.P64 */
10775             if (!dc_isar_feature(aa64_pmull, s)) {
10776                 unallocated_encoding(s);
10777                 return;
10778             }
10779             if (!fp_access_check(s)) {
10780                 return;
10781             }
10782             /* The Q field specifies lo/hi half input for this insn.  */
10783             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10784                              gen_helper_gvec_pmull_q);
10785             break;
10786 
10787         default:
10788             unallocated_encoding(s);
10789             break;
10790         }
10791         return;
10792     case 9: /* SQDMLAL, SQDMLAL2 */
10793     case 11: /* SQDMLSL, SQDMLSL2 */
10794     case 13: /* SQDMULL, SQDMULL2 */
10795         if (is_u || size == 0) {
10796             unallocated_encoding(s);
10797             return;
10798         }
10799         /* fall through */
10800     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10801     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10802     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10803     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10804     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10805     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10806     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10807         /* 64 x 64 -> 128 */
10808         if (size == 3) {
10809             unallocated_encoding(s);
10810             return;
10811         }
10812         if (!fp_access_check(s)) {
10813             return;
10814         }
10815 
10816         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10817         break;
10818     default:
10819         /* opcode 15 not allocated */
10820         unallocated_encoding(s);
10821         break;
10822     }
10823 }
10824 
10825 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10826 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10827 {
10828     int rd = extract32(insn, 0, 5);
10829     int rn = extract32(insn, 5, 5);
10830     int rm = extract32(insn, 16, 5);
10831     int size = extract32(insn, 22, 2);
10832     bool is_u = extract32(insn, 29, 1);
10833     bool is_q = extract32(insn, 30, 1);
10834 
10835     if (!fp_access_check(s)) {
10836         return;
10837     }
10838 
10839     switch (size + 4 * is_u) {
10840     case 0: /* AND */
10841         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10842         return;
10843     case 1: /* BIC */
10844         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10845         return;
10846     case 2: /* ORR */
10847         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10848         return;
10849     case 3: /* ORN */
10850         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10851         return;
10852     case 4: /* EOR */
10853         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10854         return;
10855 
10856     case 5: /* BSL bitwise select */
10857         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10858         return;
10859     case 6: /* BIT, bitwise insert if true */
10860         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10861         return;
10862     case 7: /* BIF, bitwise insert if false */
10863         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10864         return;
10865 
10866     default:
10867         g_assert_not_reached();
10868     }
10869 }
10870 
10871 /* Pairwise op subgroup of C3.6.16.
10872  *
10873  * This is called directly or via the handle_3same_float for float pairwise
10874  * operations where the opcode and size are calculated differently.
10875  */
10876 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10877                                    int size, int rn, int rm, int rd)
10878 {
10879     TCGv_ptr fpst;
10880     int pass;
10881 
10882     /* Floating point operations need fpst */
10883     if (opcode >= 0x58) {
10884         fpst = fpstatus_ptr(FPST_FPCR);
10885     } else {
10886         fpst = NULL;
10887     }
10888 
10889     if (!fp_access_check(s)) {
10890         return;
10891     }
10892 
10893     /* These operations work on the concatenated rm:rn, with each pair of
10894      * adjacent elements being operated on to produce an element in the result.
10895      */
10896     if (size == 3) {
10897         TCGv_i64 tcg_res[2];
10898 
10899         for (pass = 0; pass < 2; pass++) {
10900             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10901             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10902             int passreg = (pass == 0) ? rn : rm;
10903 
10904             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10905             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10906             tcg_res[pass] = tcg_temp_new_i64();
10907 
10908             switch (opcode) {
10909             case 0x17: /* ADDP */
10910                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10911                 break;
10912             case 0x58: /* FMAXNMP */
10913                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10914                 break;
10915             case 0x5a: /* FADDP */
10916                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10917                 break;
10918             case 0x5e: /* FMAXP */
10919                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10920                 break;
10921             case 0x78: /* FMINNMP */
10922                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10923                 break;
10924             case 0x7e: /* FMINP */
10925                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10926                 break;
10927             default:
10928                 g_assert_not_reached();
10929             }
10930         }
10931 
10932         for (pass = 0; pass < 2; pass++) {
10933             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10934         }
10935     } else {
10936         int maxpass = is_q ? 4 : 2;
10937         TCGv_i32 tcg_res[4];
10938 
10939         for (pass = 0; pass < maxpass; pass++) {
10940             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10941             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10942             NeonGenTwoOpFn *genfn = NULL;
10943             int passreg = pass < (maxpass / 2) ? rn : rm;
10944             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10945 
10946             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10947             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10948             tcg_res[pass] = tcg_temp_new_i32();
10949 
10950             switch (opcode) {
10951             case 0x17: /* ADDP */
10952             {
10953                 static NeonGenTwoOpFn * const fns[3] = {
10954                     gen_helper_neon_padd_u8,
10955                     gen_helper_neon_padd_u16,
10956                     tcg_gen_add_i32,
10957                 };
10958                 genfn = fns[size];
10959                 break;
10960             }
10961             case 0x14: /* SMAXP, UMAXP */
10962             {
10963                 static NeonGenTwoOpFn * const fns[3][2] = {
10964                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10965                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10966                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10967                 };
10968                 genfn = fns[size][u];
10969                 break;
10970             }
10971             case 0x15: /* SMINP, UMINP */
10972             {
10973                 static NeonGenTwoOpFn * const fns[3][2] = {
10974                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10975                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10976                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10977                 };
10978                 genfn = fns[size][u];
10979                 break;
10980             }
10981             /* The FP operations are all on single floats (32 bit) */
10982             case 0x58: /* FMAXNMP */
10983                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10984                 break;
10985             case 0x5a: /* FADDP */
10986                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10987                 break;
10988             case 0x5e: /* FMAXP */
10989                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10990                 break;
10991             case 0x78: /* FMINNMP */
10992                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10993                 break;
10994             case 0x7e: /* FMINP */
10995                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10996                 break;
10997             default:
10998                 g_assert_not_reached();
10999             }
11000 
11001             /* FP ops called directly, otherwise call now */
11002             if (genfn) {
11003                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
11004             }
11005         }
11006 
11007         for (pass = 0; pass < maxpass; pass++) {
11008             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11009         }
11010         clear_vec_high(s, is_q, rd);
11011     }
11012 }
11013 
11014 /* Floating point op subgroup of C3.6.16. */
11015 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11016 {
11017     /* For floating point ops, the U, size[1] and opcode bits
11018      * together indicate the operation. size[0] indicates single
11019      * or double.
11020      */
11021     int fpopcode = extract32(insn, 11, 5)
11022         | (extract32(insn, 23, 1) << 5)
11023         | (extract32(insn, 29, 1) << 6);
11024     int is_q = extract32(insn, 30, 1);
11025     int size = extract32(insn, 22, 1);
11026     int rm = extract32(insn, 16, 5);
11027     int rn = extract32(insn, 5, 5);
11028     int rd = extract32(insn, 0, 5);
11029 
11030     int datasize = is_q ? 128 : 64;
11031     int esize = 32 << size;
11032     int elements = datasize / esize;
11033 
11034     if (size == 1 && !is_q) {
11035         unallocated_encoding(s);
11036         return;
11037     }
11038 
11039     switch (fpopcode) {
11040     case 0x58: /* FMAXNMP */
11041     case 0x5a: /* FADDP */
11042     case 0x5e: /* FMAXP */
11043     case 0x78: /* FMINNMP */
11044     case 0x7e: /* FMINP */
11045         if (size && !is_q) {
11046             unallocated_encoding(s);
11047             return;
11048         }
11049         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11050                                rn, rm, rd);
11051         return;
11052     case 0x1b: /* FMULX */
11053     case 0x1f: /* FRECPS */
11054     case 0x3f: /* FRSQRTS */
11055     case 0x5d: /* FACGE */
11056     case 0x7d: /* FACGT */
11057     case 0x19: /* FMLA */
11058     case 0x39: /* FMLS */
11059     case 0x18: /* FMAXNM */
11060     case 0x1a: /* FADD */
11061     case 0x1c: /* FCMEQ */
11062     case 0x1e: /* FMAX */
11063     case 0x38: /* FMINNM */
11064     case 0x3a: /* FSUB */
11065     case 0x3e: /* FMIN */
11066     case 0x5b: /* FMUL */
11067     case 0x5c: /* FCMGE */
11068     case 0x5f: /* FDIV */
11069     case 0x7a: /* FABD */
11070     case 0x7c: /* FCMGT */
11071         if (!fp_access_check(s)) {
11072             return;
11073         }
11074         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11075         return;
11076 
11077     case 0x1d: /* FMLAL  */
11078     case 0x3d: /* FMLSL  */
11079     case 0x59: /* FMLAL2 */
11080     case 0x79: /* FMLSL2 */
11081         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11082             unallocated_encoding(s);
11083             return;
11084         }
11085         if (fp_access_check(s)) {
11086             int is_s = extract32(insn, 23, 1);
11087             int is_2 = extract32(insn, 29, 1);
11088             int data = (is_2 << 1) | is_s;
11089             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11090                                vec_full_reg_offset(s, rn),
11091                                vec_full_reg_offset(s, rm), tcg_env,
11092                                is_q ? 16 : 8, vec_full_reg_size(s),
11093                                data, gen_helper_gvec_fmlal_a64);
11094         }
11095         return;
11096 
11097     default:
11098         unallocated_encoding(s);
11099         return;
11100     }
11101 }
11102 
11103 /* Integer op subgroup of C3.6.16. */
11104 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11105 {
11106     int is_q = extract32(insn, 30, 1);
11107     int u = extract32(insn, 29, 1);
11108     int size = extract32(insn, 22, 2);
11109     int opcode = extract32(insn, 11, 5);
11110     int rm = extract32(insn, 16, 5);
11111     int rn = extract32(insn, 5, 5);
11112     int rd = extract32(insn, 0, 5);
11113     int pass;
11114     TCGCond cond;
11115 
11116     switch (opcode) {
11117     case 0x13: /* MUL, PMUL */
11118         if (u && size != 0) {
11119             unallocated_encoding(s);
11120             return;
11121         }
11122         /* fall through */
11123     case 0x0: /* SHADD, UHADD */
11124     case 0x2: /* SRHADD, URHADD */
11125     case 0x4: /* SHSUB, UHSUB */
11126     case 0xc: /* SMAX, UMAX */
11127     case 0xd: /* SMIN, UMIN */
11128     case 0xe: /* SABD, UABD */
11129     case 0xf: /* SABA, UABA */
11130     case 0x12: /* MLA, MLS */
11131         if (size == 3) {
11132             unallocated_encoding(s);
11133             return;
11134         }
11135         break;
11136     case 0x16: /* SQDMULH, SQRDMULH */
11137         if (size == 0 || size == 3) {
11138             unallocated_encoding(s);
11139             return;
11140         }
11141         break;
11142     default:
11143         if (size == 3 && !is_q) {
11144             unallocated_encoding(s);
11145             return;
11146         }
11147         break;
11148     }
11149 
11150     if (!fp_access_check(s)) {
11151         return;
11152     }
11153 
11154     switch (opcode) {
11155     case 0x01: /* SQADD, UQADD */
11156         if (u) {
11157             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11158         } else {
11159             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11160         }
11161         return;
11162     case 0x05: /* SQSUB, UQSUB */
11163         if (u) {
11164             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11165         } else {
11166             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11167         }
11168         return;
11169     case 0x08: /* SSHL, USHL */
11170         if (u) {
11171             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11172         } else {
11173             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11174         }
11175         return;
11176     case 0x0c: /* SMAX, UMAX */
11177         if (u) {
11178             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11179         } else {
11180             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11181         }
11182         return;
11183     case 0x0d: /* SMIN, UMIN */
11184         if (u) {
11185             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11186         } else {
11187             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11188         }
11189         return;
11190     case 0xe: /* SABD, UABD */
11191         if (u) {
11192             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11193         } else {
11194             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11195         }
11196         return;
11197     case 0xf: /* SABA, UABA */
11198         if (u) {
11199             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11200         } else {
11201             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11202         }
11203         return;
11204     case 0x10: /* ADD, SUB */
11205         if (u) {
11206             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11207         } else {
11208             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11209         }
11210         return;
11211     case 0x13: /* MUL, PMUL */
11212         if (!u) { /* MUL */
11213             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11214         } else {  /* PMUL */
11215             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11216         }
11217         return;
11218     case 0x12: /* MLA, MLS */
11219         if (u) {
11220             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11221         } else {
11222             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11223         }
11224         return;
11225     case 0x16: /* SQDMULH, SQRDMULH */
11226         {
11227             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11228                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11229                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11230             };
11231             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11232         }
11233         return;
11234     case 0x11:
11235         if (!u) { /* CMTST */
11236             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11237             return;
11238         }
11239         /* else CMEQ */
11240         cond = TCG_COND_EQ;
11241         goto do_gvec_cmp;
11242     case 0x06: /* CMGT, CMHI */
11243         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11244         goto do_gvec_cmp;
11245     case 0x07: /* CMGE, CMHS */
11246         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11247     do_gvec_cmp:
11248         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11249                          vec_full_reg_offset(s, rn),
11250                          vec_full_reg_offset(s, rm),
11251                          is_q ? 16 : 8, vec_full_reg_size(s));
11252         return;
11253     }
11254 
11255     if (size == 3) {
11256         assert(is_q);
11257         for (pass = 0; pass < 2; pass++) {
11258             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11259             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11260             TCGv_i64 tcg_res = tcg_temp_new_i64();
11261 
11262             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11263             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11264 
11265             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11266 
11267             write_vec_element(s, tcg_res, rd, pass, MO_64);
11268         }
11269     } else {
11270         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11271             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11272             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11273             TCGv_i32 tcg_res = tcg_temp_new_i32();
11274             NeonGenTwoOpFn *genfn = NULL;
11275             NeonGenTwoOpEnvFn *genenvfn = NULL;
11276 
11277             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11278             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11279 
11280             switch (opcode) {
11281             case 0x0: /* SHADD, UHADD */
11282             {
11283                 static NeonGenTwoOpFn * const fns[3][2] = {
11284                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11285                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11286                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11287                 };
11288                 genfn = fns[size][u];
11289                 break;
11290             }
11291             case 0x2: /* SRHADD, URHADD */
11292             {
11293                 static NeonGenTwoOpFn * const fns[3][2] = {
11294                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11295                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11296                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11297                 };
11298                 genfn = fns[size][u];
11299                 break;
11300             }
11301             case 0x4: /* SHSUB, UHSUB */
11302             {
11303                 static NeonGenTwoOpFn * const fns[3][2] = {
11304                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11305                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11306                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11307                 };
11308                 genfn = fns[size][u];
11309                 break;
11310             }
11311             case 0x9: /* SQSHL, UQSHL */
11312             {
11313                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11314                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11315                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11316                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11317                 };
11318                 genenvfn = fns[size][u];
11319                 break;
11320             }
11321             case 0xa: /* SRSHL, URSHL */
11322             {
11323                 static NeonGenTwoOpFn * const fns[3][2] = {
11324                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11325                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11326                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11327                 };
11328                 genfn = fns[size][u];
11329                 break;
11330             }
11331             case 0xb: /* SQRSHL, UQRSHL */
11332             {
11333                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11334                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11335                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11336                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11337                 };
11338                 genenvfn = fns[size][u];
11339                 break;
11340             }
11341             default:
11342                 g_assert_not_reached();
11343             }
11344 
11345             if (genenvfn) {
11346                 genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2);
11347             } else {
11348                 genfn(tcg_res, tcg_op1, tcg_op2);
11349             }
11350 
11351             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11352         }
11353     }
11354     clear_vec_high(s, is_q, rd);
11355 }
11356 
11357 /* AdvSIMD three same
11358  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11359  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11360  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11361  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11362  */
11363 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11364 {
11365     int opcode = extract32(insn, 11, 5);
11366 
11367     switch (opcode) {
11368     case 0x3: /* logic ops */
11369         disas_simd_3same_logic(s, insn);
11370         break;
11371     case 0x17: /* ADDP */
11372     case 0x14: /* SMAXP, UMAXP */
11373     case 0x15: /* SMINP, UMINP */
11374     {
11375         /* Pairwise operations */
11376         int is_q = extract32(insn, 30, 1);
11377         int u = extract32(insn, 29, 1);
11378         int size = extract32(insn, 22, 2);
11379         int rm = extract32(insn, 16, 5);
11380         int rn = extract32(insn, 5, 5);
11381         int rd = extract32(insn, 0, 5);
11382         if (opcode == 0x17) {
11383             if (u || (size == 3 && !is_q)) {
11384                 unallocated_encoding(s);
11385                 return;
11386             }
11387         } else {
11388             if (size == 3) {
11389                 unallocated_encoding(s);
11390                 return;
11391             }
11392         }
11393         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11394         break;
11395     }
11396     case 0x18 ... 0x31:
11397         /* floating point ops, sz[1] and U are part of opcode */
11398         disas_simd_3same_float(s, insn);
11399         break;
11400     default:
11401         disas_simd_3same_int(s, insn);
11402         break;
11403     }
11404 }
11405 
11406 /*
11407  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11408  *
11409  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11410  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11411  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11412  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11413  *
11414  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11415  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11416  *
11417  */
11418 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11419 {
11420     int opcode = extract32(insn, 11, 3);
11421     int u = extract32(insn, 29, 1);
11422     int a = extract32(insn, 23, 1);
11423     int is_q = extract32(insn, 30, 1);
11424     int rm = extract32(insn, 16, 5);
11425     int rn = extract32(insn, 5, 5);
11426     int rd = extract32(insn, 0, 5);
11427     /*
11428      * For these floating point ops, the U, a and opcode bits
11429      * together indicate the operation.
11430      */
11431     int fpopcode = opcode | (a << 3) | (u << 4);
11432     int datasize = is_q ? 128 : 64;
11433     int elements = datasize / 16;
11434     bool pairwise;
11435     TCGv_ptr fpst;
11436     int pass;
11437 
11438     switch (fpopcode) {
11439     case 0x0: /* FMAXNM */
11440     case 0x1: /* FMLA */
11441     case 0x2: /* FADD */
11442     case 0x3: /* FMULX */
11443     case 0x4: /* FCMEQ */
11444     case 0x6: /* FMAX */
11445     case 0x7: /* FRECPS */
11446     case 0x8: /* FMINNM */
11447     case 0x9: /* FMLS */
11448     case 0xa: /* FSUB */
11449     case 0xe: /* FMIN */
11450     case 0xf: /* FRSQRTS */
11451     case 0x13: /* FMUL */
11452     case 0x14: /* FCMGE */
11453     case 0x15: /* FACGE */
11454     case 0x17: /* FDIV */
11455     case 0x1a: /* FABD */
11456     case 0x1c: /* FCMGT */
11457     case 0x1d: /* FACGT */
11458         pairwise = false;
11459         break;
11460     case 0x10: /* FMAXNMP */
11461     case 0x12: /* FADDP */
11462     case 0x16: /* FMAXP */
11463     case 0x18: /* FMINNMP */
11464     case 0x1e: /* FMINP */
11465         pairwise = true;
11466         break;
11467     default:
11468         unallocated_encoding(s);
11469         return;
11470     }
11471 
11472     if (!dc_isar_feature(aa64_fp16, s)) {
11473         unallocated_encoding(s);
11474         return;
11475     }
11476 
11477     if (!fp_access_check(s)) {
11478         return;
11479     }
11480 
11481     fpst = fpstatus_ptr(FPST_FPCR_F16);
11482 
11483     if (pairwise) {
11484         int maxpass = is_q ? 8 : 4;
11485         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11486         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11487         TCGv_i32 tcg_res[8];
11488 
11489         for (pass = 0; pass < maxpass; pass++) {
11490             int passreg = pass < (maxpass / 2) ? rn : rm;
11491             int passelt = (pass << 1) & (maxpass - 1);
11492 
11493             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11494             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11495             tcg_res[pass] = tcg_temp_new_i32();
11496 
11497             switch (fpopcode) {
11498             case 0x10: /* FMAXNMP */
11499                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11500                                            fpst);
11501                 break;
11502             case 0x12: /* FADDP */
11503                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11504                 break;
11505             case 0x16: /* FMAXP */
11506                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11507                 break;
11508             case 0x18: /* FMINNMP */
11509                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11510                                            fpst);
11511                 break;
11512             case 0x1e: /* FMINP */
11513                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11514                 break;
11515             default:
11516                 g_assert_not_reached();
11517             }
11518         }
11519 
11520         for (pass = 0; pass < maxpass; pass++) {
11521             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11522         }
11523     } else {
11524         for (pass = 0; pass < elements; pass++) {
11525             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11526             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11527             TCGv_i32 tcg_res = tcg_temp_new_i32();
11528 
11529             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11530             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11531 
11532             switch (fpopcode) {
11533             case 0x0: /* FMAXNM */
11534                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11535                 break;
11536             case 0x1: /* FMLA */
11537                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11538                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11539                                            fpst);
11540                 break;
11541             case 0x2: /* FADD */
11542                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11543                 break;
11544             case 0x3: /* FMULX */
11545                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11546                 break;
11547             case 0x4: /* FCMEQ */
11548                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11549                 break;
11550             case 0x6: /* FMAX */
11551                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11552                 break;
11553             case 0x7: /* FRECPS */
11554                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11555                 break;
11556             case 0x8: /* FMINNM */
11557                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11558                 break;
11559             case 0x9: /* FMLS */
11560                 /* As usual for ARM, separate negation for fused multiply-add */
11561                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11562                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11563                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11564                                            fpst);
11565                 break;
11566             case 0xa: /* FSUB */
11567                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11568                 break;
11569             case 0xe: /* FMIN */
11570                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11571                 break;
11572             case 0xf: /* FRSQRTS */
11573                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11574                 break;
11575             case 0x13: /* FMUL */
11576                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11577                 break;
11578             case 0x14: /* FCMGE */
11579                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11580                 break;
11581             case 0x15: /* FACGE */
11582                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11583                 break;
11584             case 0x17: /* FDIV */
11585                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11586                 break;
11587             case 0x1a: /* FABD */
11588                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11589                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11590                 break;
11591             case 0x1c: /* FCMGT */
11592                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11593                 break;
11594             case 0x1d: /* FACGT */
11595                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11596                 break;
11597             default:
11598                 g_assert_not_reached();
11599             }
11600 
11601             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11602         }
11603     }
11604 
11605     clear_vec_high(s, is_q, rd);
11606 }
11607 
11608 /* AdvSIMD three same extra
11609  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11610  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11611  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11612  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11613  */
11614 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11615 {
11616     int rd = extract32(insn, 0, 5);
11617     int rn = extract32(insn, 5, 5);
11618     int opcode = extract32(insn, 11, 4);
11619     int rm = extract32(insn, 16, 5);
11620     int size = extract32(insn, 22, 2);
11621     bool u = extract32(insn, 29, 1);
11622     bool is_q = extract32(insn, 30, 1);
11623     bool feature;
11624     int rot;
11625 
11626     switch (u * 16 + opcode) {
11627     case 0x10: /* SQRDMLAH (vector) */
11628     case 0x11: /* SQRDMLSH (vector) */
11629         if (size != 1 && size != 2) {
11630             unallocated_encoding(s);
11631             return;
11632         }
11633         feature = dc_isar_feature(aa64_rdm, s);
11634         break;
11635     case 0x02: /* SDOT (vector) */
11636     case 0x12: /* UDOT (vector) */
11637         if (size != MO_32) {
11638             unallocated_encoding(s);
11639             return;
11640         }
11641         feature = dc_isar_feature(aa64_dp, s);
11642         break;
11643     case 0x03: /* USDOT */
11644         if (size != MO_32) {
11645             unallocated_encoding(s);
11646             return;
11647         }
11648         feature = dc_isar_feature(aa64_i8mm, s);
11649         break;
11650     case 0x04: /* SMMLA */
11651     case 0x14: /* UMMLA */
11652     case 0x05: /* USMMLA */
11653         if (!is_q || size != MO_32) {
11654             unallocated_encoding(s);
11655             return;
11656         }
11657         feature = dc_isar_feature(aa64_i8mm, s);
11658         break;
11659     case 0x18: /* FCMLA, #0 */
11660     case 0x19: /* FCMLA, #90 */
11661     case 0x1a: /* FCMLA, #180 */
11662     case 0x1b: /* FCMLA, #270 */
11663     case 0x1c: /* FCADD, #90 */
11664     case 0x1e: /* FCADD, #270 */
11665         if (size == 0
11666             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11667             || (size == 3 && !is_q)) {
11668             unallocated_encoding(s);
11669             return;
11670         }
11671         feature = dc_isar_feature(aa64_fcma, s);
11672         break;
11673     case 0x1d: /* BFMMLA */
11674         if (size != MO_16 || !is_q) {
11675             unallocated_encoding(s);
11676             return;
11677         }
11678         feature = dc_isar_feature(aa64_bf16, s);
11679         break;
11680     case 0x1f:
11681         switch (size) {
11682         case 1: /* BFDOT */
11683         case 3: /* BFMLAL{B,T} */
11684             feature = dc_isar_feature(aa64_bf16, s);
11685             break;
11686         default:
11687             unallocated_encoding(s);
11688             return;
11689         }
11690         break;
11691     default:
11692         unallocated_encoding(s);
11693         return;
11694     }
11695     if (!feature) {
11696         unallocated_encoding(s);
11697         return;
11698     }
11699     if (!fp_access_check(s)) {
11700         return;
11701     }
11702 
11703     switch (opcode) {
11704     case 0x0: /* SQRDMLAH (vector) */
11705         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11706         return;
11707 
11708     case 0x1: /* SQRDMLSH (vector) */
11709         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11710         return;
11711 
11712     case 0x2: /* SDOT / UDOT */
11713         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11714                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11715         return;
11716 
11717     case 0x3: /* USDOT */
11718         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11719         return;
11720 
11721     case 0x04: /* SMMLA, UMMLA */
11722         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11723                          u ? gen_helper_gvec_ummla_b
11724                          : gen_helper_gvec_smmla_b);
11725         return;
11726     case 0x05: /* USMMLA */
11727         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11728         return;
11729 
11730     case 0x8: /* FCMLA, #0 */
11731     case 0x9: /* FCMLA, #90 */
11732     case 0xa: /* FCMLA, #180 */
11733     case 0xb: /* FCMLA, #270 */
11734         rot = extract32(opcode, 0, 2);
11735         switch (size) {
11736         case 1:
11737             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11738                               gen_helper_gvec_fcmlah);
11739             break;
11740         case 2:
11741             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11742                               gen_helper_gvec_fcmlas);
11743             break;
11744         case 3:
11745             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11746                               gen_helper_gvec_fcmlad);
11747             break;
11748         default:
11749             g_assert_not_reached();
11750         }
11751         return;
11752 
11753     case 0xc: /* FCADD, #90 */
11754     case 0xe: /* FCADD, #270 */
11755         rot = extract32(opcode, 1, 1);
11756         switch (size) {
11757         case 1:
11758             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11759                               gen_helper_gvec_fcaddh);
11760             break;
11761         case 2:
11762             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11763                               gen_helper_gvec_fcadds);
11764             break;
11765         case 3:
11766             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11767                               gen_helper_gvec_fcaddd);
11768             break;
11769         default:
11770             g_assert_not_reached();
11771         }
11772         return;
11773 
11774     case 0xd: /* BFMMLA */
11775         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11776         return;
11777     case 0xf:
11778         switch (size) {
11779         case 1: /* BFDOT */
11780             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11781             break;
11782         case 3: /* BFMLAL{B,T} */
11783             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11784                               gen_helper_gvec_bfmlal);
11785             break;
11786         default:
11787             g_assert_not_reached();
11788         }
11789         return;
11790 
11791     default:
11792         g_assert_not_reached();
11793     }
11794 }
11795 
11796 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11797                                   int size, int rn, int rd)
11798 {
11799     /* Handle 2-reg-misc ops which are widening (so each size element
11800      * in the source becomes a 2*size element in the destination.
11801      * The only instruction like this is FCVTL.
11802      */
11803     int pass;
11804 
11805     if (size == 3) {
11806         /* 32 -> 64 bit fp conversion */
11807         TCGv_i64 tcg_res[2];
11808         int srcelt = is_q ? 2 : 0;
11809 
11810         for (pass = 0; pass < 2; pass++) {
11811             TCGv_i32 tcg_op = tcg_temp_new_i32();
11812             tcg_res[pass] = tcg_temp_new_i64();
11813 
11814             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11815             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
11816         }
11817         for (pass = 0; pass < 2; pass++) {
11818             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11819         }
11820     } else {
11821         /* 16 -> 32 bit fp conversion */
11822         int srcelt = is_q ? 4 : 0;
11823         TCGv_i32 tcg_res[4];
11824         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11825         TCGv_i32 ahp = get_ahp_flag();
11826 
11827         for (pass = 0; pass < 4; pass++) {
11828             tcg_res[pass] = tcg_temp_new_i32();
11829 
11830             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11831             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11832                                            fpst, ahp);
11833         }
11834         for (pass = 0; pass < 4; pass++) {
11835             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11836         }
11837     }
11838 }
11839 
11840 static void handle_rev(DisasContext *s, int opcode, bool u,
11841                        bool is_q, int size, int rn, int rd)
11842 {
11843     int op = (opcode << 1) | u;
11844     int opsz = op + size;
11845     int grp_size = 3 - opsz;
11846     int dsize = is_q ? 128 : 64;
11847     int i;
11848 
11849     if (opsz >= 3) {
11850         unallocated_encoding(s);
11851         return;
11852     }
11853 
11854     if (!fp_access_check(s)) {
11855         return;
11856     }
11857 
11858     if (size == 0) {
11859         /* Special case bytes, use bswap op on each group of elements */
11860         int groups = dsize / (8 << grp_size);
11861 
11862         for (i = 0; i < groups; i++) {
11863             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11864 
11865             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11866             switch (grp_size) {
11867             case MO_16:
11868                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11869                 break;
11870             case MO_32:
11871                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11872                 break;
11873             case MO_64:
11874                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11875                 break;
11876             default:
11877                 g_assert_not_reached();
11878             }
11879             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11880         }
11881         clear_vec_high(s, is_q, rd);
11882     } else {
11883         int revmask = (1 << grp_size) - 1;
11884         int esize = 8 << size;
11885         int elements = dsize / esize;
11886         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11887         TCGv_i64 tcg_rd[2];
11888 
11889         for (i = 0; i < 2; i++) {
11890             tcg_rd[i] = tcg_temp_new_i64();
11891             tcg_gen_movi_i64(tcg_rd[i], 0);
11892         }
11893 
11894         for (i = 0; i < elements; i++) {
11895             int e_rev = (i & 0xf) ^ revmask;
11896             int w = (e_rev * esize) / 64;
11897             int o = (e_rev * esize) % 64;
11898 
11899             read_vec_element(s, tcg_rn, rn, i, size);
11900             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11901         }
11902 
11903         for (i = 0; i < 2; i++) {
11904             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11905         }
11906         clear_vec_high(s, true, rd);
11907     }
11908 }
11909 
11910 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11911                                   bool is_q, int size, int rn, int rd)
11912 {
11913     /* Implement the pairwise operations from 2-misc:
11914      * SADDLP, UADDLP, SADALP, UADALP.
11915      * These all add pairs of elements in the input to produce a
11916      * double-width result element in the output (possibly accumulating).
11917      */
11918     bool accum = (opcode == 0x6);
11919     int maxpass = is_q ? 2 : 1;
11920     int pass;
11921     TCGv_i64 tcg_res[2];
11922 
11923     if (size == 2) {
11924         /* 32 + 32 -> 64 op */
11925         MemOp memop = size + (u ? 0 : MO_SIGN);
11926 
11927         for (pass = 0; pass < maxpass; pass++) {
11928             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11929             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11930 
11931             tcg_res[pass] = tcg_temp_new_i64();
11932 
11933             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11934             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11935             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11936             if (accum) {
11937                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11938                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11939             }
11940         }
11941     } else {
11942         for (pass = 0; pass < maxpass; pass++) {
11943             TCGv_i64 tcg_op = tcg_temp_new_i64();
11944             NeonGenOne64OpFn *genfn;
11945             static NeonGenOne64OpFn * const fns[2][2] = {
11946                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11947                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11948             };
11949 
11950             genfn = fns[size][u];
11951 
11952             tcg_res[pass] = tcg_temp_new_i64();
11953 
11954             read_vec_element(s, tcg_op, rn, pass, MO_64);
11955             genfn(tcg_res[pass], tcg_op);
11956 
11957             if (accum) {
11958                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11959                 if (size == 0) {
11960                     gen_helper_neon_addl_u16(tcg_res[pass],
11961                                              tcg_res[pass], tcg_op);
11962                 } else {
11963                     gen_helper_neon_addl_u32(tcg_res[pass],
11964                                              tcg_res[pass], tcg_op);
11965                 }
11966             }
11967         }
11968     }
11969     if (!is_q) {
11970         tcg_res[1] = tcg_constant_i64(0);
11971     }
11972     for (pass = 0; pass < 2; pass++) {
11973         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11974     }
11975 }
11976 
11977 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11978 {
11979     /* Implement SHLL and SHLL2 */
11980     int pass;
11981     int part = is_q ? 2 : 0;
11982     TCGv_i64 tcg_res[2];
11983 
11984     for (pass = 0; pass < 2; pass++) {
11985         static NeonGenWidenFn * const widenfns[3] = {
11986             gen_helper_neon_widen_u8,
11987             gen_helper_neon_widen_u16,
11988             tcg_gen_extu_i32_i64,
11989         };
11990         NeonGenWidenFn *widenfn = widenfns[size];
11991         TCGv_i32 tcg_op = tcg_temp_new_i32();
11992 
11993         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11994         tcg_res[pass] = tcg_temp_new_i64();
11995         widenfn(tcg_res[pass], tcg_op);
11996         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11997     }
11998 
11999     for (pass = 0; pass < 2; pass++) {
12000         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12001     }
12002 }
12003 
12004 /* AdvSIMD two reg misc
12005  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
12006  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12007  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12008  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12009  */
12010 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
12011 {
12012     int size = extract32(insn, 22, 2);
12013     int opcode = extract32(insn, 12, 5);
12014     bool u = extract32(insn, 29, 1);
12015     bool is_q = extract32(insn, 30, 1);
12016     int rn = extract32(insn, 5, 5);
12017     int rd = extract32(insn, 0, 5);
12018     bool need_fpstatus = false;
12019     int rmode = -1;
12020     TCGv_i32 tcg_rmode;
12021     TCGv_ptr tcg_fpstatus;
12022 
12023     switch (opcode) {
12024     case 0x0: /* REV64, REV32 */
12025     case 0x1: /* REV16 */
12026         handle_rev(s, opcode, u, is_q, size, rn, rd);
12027         return;
12028     case 0x5: /* CNT, NOT, RBIT */
12029         if (u && size == 0) {
12030             /* NOT */
12031             break;
12032         } else if (u && size == 1) {
12033             /* RBIT */
12034             break;
12035         } else if (!u && size == 0) {
12036             /* CNT */
12037             break;
12038         }
12039         unallocated_encoding(s);
12040         return;
12041     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12042     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12043         if (size == 3) {
12044             unallocated_encoding(s);
12045             return;
12046         }
12047         if (!fp_access_check(s)) {
12048             return;
12049         }
12050 
12051         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12052         return;
12053     case 0x4: /* CLS, CLZ */
12054         if (size == 3) {
12055             unallocated_encoding(s);
12056             return;
12057         }
12058         break;
12059     case 0x2: /* SADDLP, UADDLP */
12060     case 0x6: /* SADALP, UADALP */
12061         if (size == 3) {
12062             unallocated_encoding(s);
12063             return;
12064         }
12065         if (!fp_access_check(s)) {
12066             return;
12067         }
12068         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12069         return;
12070     case 0x13: /* SHLL, SHLL2 */
12071         if (u == 0 || size == 3) {
12072             unallocated_encoding(s);
12073             return;
12074         }
12075         if (!fp_access_check(s)) {
12076             return;
12077         }
12078         handle_shll(s, is_q, size, rn, rd);
12079         return;
12080     case 0xa: /* CMLT */
12081         if (u == 1) {
12082             unallocated_encoding(s);
12083             return;
12084         }
12085         /* fall through */
12086     case 0x8: /* CMGT, CMGE */
12087     case 0x9: /* CMEQ, CMLE */
12088     case 0xb: /* ABS, NEG */
12089         if (size == 3 && !is_q) {
12090             unallocated_encoding(s);
12091             return;
12092         }
12093         break;
12094     case 0x3: /* SUQADD, USQADD */
12095         if (size == 3 && !is_q) {
12096             unallocated_encoding(s);
12097             return;
12098         }
12099         if (!fp_access_check(s)) {
12100             return;
12101         }
12102         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12103         return;
12104     case 0x7: /* SQABS, SQNEG */
12105         if (size == 3 && !is_q) {
12106             unallocated_encoding(s);
12107             return;
12108         }
12109         break;
12110     case 0xc ... 0xf:
12111     case 0x16 ... 0x1f:
12112     {
12113         /* Floating point: U, size[1] and opcode indicate operation;
12114          * size[0] indicates single or double precision.
12115          */
12116         int is_double = extract32(size, 0, 1);
12117         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12118         size = is_double ? 3 : 2;
12119         switch (opcode) {
12120         case 0x2f: /* FABS */
12121         case 0x6f: /* FNEG */
12122             if (size == 3 && !is_q) {
12123                 unallocated_encoding(s);
12124                 return;
12125             }
12126             break;
12127         case 0x1d: /* SCVTF */
12128         case 0x5d: /* UCVTF */
12129         {
12130             bool is_signed = (opcode == 0x1d) ? true : false;
12131             int elements = is_double ? 2 : is_q ? 4 : 2;
12132             if (is_double && !is_q) {
12133                 unallocated_encoding(s);
12134                 return;
12135             }
12136             if (!fp_access_check(s)) {
12137                 return;
12138             }
12139             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12140             return;
12141         }
12142         case 0x2c: /* FCMGT (zero) */
12143         case 0x2d: /* FCMEQ (zero) */
12144         case 0x2e: /* FCMLT (zero) */
12145         case 0x6c: /* FCMGE (zero) */
12146         case 0x6d: /* FCMLE (zero) */
12147             if (size == 3 && !is_q) {
12148                 unallocated_encoding(s);
12149                 return;
12150             }
12151             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12152             return;
12153         case 0x7f: /* FSQRT */
12154             if (size == 3 && !is_q) {
12155                 unallocated_encoding(s);
12156                 return;
12157             }
12158             break;
12159         case 0x1a: /* FCVTNS */
12160         case 0x1b: /* FCVTMS */
12161         case 0x3a: /* FCVTPS */
12162         case 0x3b: /* FCVTZS */
12163         case 0x5a: /* FCVTNU */
12164         case 0x5b: /* FCVTMU */
12165         case 0x7a: /* FCVTPU */
12166         case 0x7b: /* FCVTZU */
12167             need_fpstatus = true;
12168             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12169             if (size == 3 && !is_q) {
12170                 unallocated_encoding(s);
12171                 return;
12172             }
12173             break;
12174         case 0x5c: /* FCVTAU */
12175         case 0x1c: /* FCVTAS */
12176             need_fpstatus = true;
12177             rmode = FPROUNDING_TIEAWAY;
12178             if (size == 3 && !is_q) {
12179                 unallocated_encoding(s);
12180                 return;
12181             }
12182             break;
12183         case 0x3c: /* URECPE */
12184             if (size == 3) {
12185                 unallocated_encoding(s);
12186                 return;
12187             }
12188             /* fall through */
12189         case 0x3d: /* FRECPE */
12190         case 0x7d: /* FRSQRTE */
12191             if (size == 3 && !is_q) {
12192                 unallocated_encoding(s);
12193                 return;
12194             }
12195             if (!fp_access_check(s)) {
12196                 return;
12197             }
12198             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12199             return;
12200         case 0x56: /* FCVTXN, FCVTXN2 */
12201             if (size == 2) {
12202                 unallocated_encoding(s);
12203                 return;
12204             }
12205             /* fall through */
12206         case 0x16: /* FCVTN, FCVTN2 */
12207             /* handle_2misc_narrow does a 2*size -> size operation, but these
12208              * instructions encode the source size rather than dest size.
12209              */
12210             if (!fp_access_check(s)) {
12211                 return;
12212             }
12213             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12214             return;
12215         case 0x36: /* BFCVTN, BFCVTN2 */
12216             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12217                 unallocated_encoding(s);
12218                 return;
12219             }
12220             if (!fp_access_check(s)) {
12221                 return;
12222             }
12223             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12224             return;
12225         case 0x17: /* FCVTL, FCVTL2 */
12226             if (!fp_access_check(s)) {
12227                 return;
12228             }
12229             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12230             return;
12231         case 0x18: /* FRINTN */
12232         case 0x19: /* FRINTM */
12233         case 0x38: /* FRINTP */
12234         case 0x39: /* FRINTZ */
12235             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12236             /* fall through */
12237         case 0x59: /* FRINTX */
12238         case 0x79: /* FRINTI */
12239             need_fpstatus = true;
12240             if (size == 3 && !is_q) {
12241                 unallocated_encoding(s);
12242                 return;
12243             }
12244             break;
12245         case 0x58: /* FRINTA */
12246             rmode = FPROUNDING_TIEAWAY;
12247             need_fpstatus = true;
12248             if (size == 3 && !is_q) {
12249                 unallocated_encoding(s);
12250                 return;
12251             }
12252             break;
12253         case 0x7c: /* URSQRTE */
12254             if (size == 3) {
12255                 unallocated_encoding(s);
12256                 return;
12257             }
12258             break;
12259         case 0x1e: /* FRINT32Z */
12260         case 0x1f: /* FRINT64Z */
12261             rmode = FPROUNDING_ZERO;
12262             /* fall through */
12263         case 0x5e: /* FRINT32X */
12264         case 0x5f: /* FRINT64X */
12265             need_fpstatus = true;
12266             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12267                 unallocated_encoding(s);
12268                 return;
12269             }
12270             break;
12271         default:
12272             unallocated_encoding(s);
12273             return;
12274         }
12275         break;
12276     }
12277     default:
12278         unallocated_encoding(s);
12279         return;
12280     }
12281 
12282     if (!fp_access_check(s)) {
12283         return;
12284     }
12285 
12286     if (need_fpstatus || rmode >= 0) {
12287         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12288     } else {
12289         tcg_fpstatus = NULL;
12290     }
12291     if (rmode >= 0) {
12292         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12293     } else {
12294         tcg_rmode = NULL;
12295     }
12296 
12297     switch (opcode) {
12298     case 0x5:
12299         if (u && size == 0) { /* NOT */
12300             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12301             return;
12302         }
12303         break;
12304     case 0x8: /* CMGT, CMGE */
12305         if (u) {
12306             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12307         } else {
12308             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12309         }
12310         return;
12311     case 0x9: /* CMEQ, CMLE */
12312         if (u) {
12313             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12314         } else {
12315             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12316         }
12317         return;
12318     case 0xa: /* CMLT */
12319         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12320         return;
12321     case 0xb:
12322         if (u) { /* ABS, NEG */
12323             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12324         } else {
12325             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12326         }
12327         return;
12328     }
12329 
12330     if (size == 3) {
12331         /* All 64-bit element operations can be shared with scalar 2misc */
12332         int pass;
12333 
12334         /* Coverity claims (size == 3 && !is_q) has been eliminated
12335          * from all paths leading to here.
12336          */
12337         tcg_debug_assert(is_q);
12338         for (pass = 0; pass < 2; pass++) {
12339             TCGv_i64 tcg_op = tcg_temp_new_i64();
12340             TCGv_i64 tcg_res = tcg_temp_new_i64();
12341 
12342             read_vec_element(s, tcg_op, rn, pass, MO_64);
12343 
12344             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12345                             tcg_rmode, tcg_fpstatus);
12346 
12347             write_vec_element(s, tcg_res, rd, pass, MO_64);
12348         }
12349     } else {
12350         int pass;
12351 
12352         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12353             TCGv_i32 tcg_op = tcg_temp_new_i32();
12354             TCGv_i32 tcg_res = tcg_temp_new_i32();
12355 
12356             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12357 
12358             if (size == 2) {
12359                 /* Special cases for 32 bit elements */
12360                 switch (opcode) {
12361                 case 0x4: /* CLS */
12362                     if (u) {
12363                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12364                     } else {
12365                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12366                     }
12367                     break;
12368                 case 0x7: /* SQABS, SQNEG */
12369                     if (u) {
12370                         gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
12371                     } else {
12372                         gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
12373                     }
12374                     break;
12375                 case 0x2f: /* FABS */
12376                     gen_helper_vfp_abss(tcg_res, tcg_op);
12377                     break;
12378                 case 0x6f: /* FNEG */
12379                     gen_helper_vfp_negs(tcg_res, tcg_op);
12380                     break;
12381                 case 0x7f: /* FSQRT */
12382                     gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
12383                     break;
12384                 case 0x1a: /* FCVTNS */
12385                 case 0x1b: /* FCVTMS */
12386                 case 0x1c: /* FCVTAS */
12387                 case 0x3a: /* FCVTPS */
12388                 case 0x3b: /* FCVTZS */
12389                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12390                                          tcg_constant_i32(0), tcg_fpstatus);
12391                     break;
12392                 case 0x5a: /* FCVTNU */
12393                 case 0x5b: /* FCVTMU */
12394                 case 0x5c: /* FCVTAU */
12395                 case 0x7a: /* FCVTPU */
12396                 case 0x7b: /* FCVTZU */
12397                     gen_helper_vfp_touls(tcg_res, tcg_op,
12398                                          tcg_constant_i32(0), tcg_fpstatus);
12399                     break;
12400                 case 0x18: /* FRINTN */
12401                 case 0x19: /* FRINTM */
12402                 case 0x38: /* FRINTP */
12403                 case 0x39: /* FRINTZ */
12404                 case 0x58: /* FRINTA */
12405                 case 0x79: /* FRINTI */
12406                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12407                     break;
12408                 case 0x59: /* FRINTX */
12409                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12410                     break;
12411                 case 0x7c: /* URSQRTE */
12412                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12413                     break;
12414                 case 0x1e: /* FRINT32Z */
12415                 case 0x5e: /* FRINT32X */
12416                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12417                     break;
12418                 case 0x1f: /* FRINT64Z */
12419                 case 0x5f: /* FRINT64X */
12420                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12421                     break;
12422                 default:
12423                     g_assert_not_reached();
12424                 }
12425             } else {
12426                 /* Use helpers for 8 and 16 bit elements */
12427                 switch (opcode) {
12428                 case 0x5: /* CNT, RBIT */
12429                     /* For these two insns size is part of the opcode specifier
12430                      * (handled earlier); they always operate on byte elements.
12431                      */
12432                     if (u) {
12433                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12434                     } else {
12435                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12436                     }
12437                     break;
12438                 case 0x7: /* SQABS, SQNEG */
12439                 {
12440                     NeonGenOneOpEnvFn *genfn;
12441                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12442                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12443                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12444                     };
12445                     genfn = fns[size][u];
12446                     genfn(tcg_res, tcg_env, tcg_op);
12447                     break;
12448                 }
12449                 case 0x4: /* CLS, CLZ */
12450                     if (u) {
12451                         if (size == 0) {
12452                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12453                         } else {
12454                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12455                         }
12456                     } else {
12457                         if (size == 0) {
12458                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12459                         } else {
12460                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12461                         }
12462                     }
12463                     break;
12464                 default:
12465                     g_assert_not_reached();
12466                 }
12467             }
12468 
12469             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12470         }
12471     }
12472     clear_vec_high(s, is_q, rd);
12473 
12474     if (tcg_rmode) {
12475         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12476     }
12477 }
12478 
12479 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12480  *
12481  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12482  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12483  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12484  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12485  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12486  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12487  *
12488  * This actually covers two groups where scalar access is governed by
12489  * bit 28. A bunch of the instructions (float to integral) only exist
12490  * in the vector form and are un-allocated for the scalar decode. Also
12491  * in the scalar decode Q is always 1.
12492  */
12493 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12494 {
12495     int fpop, opcode, a, u;
12496     int rn, rd;
12497     bool is_q;
12498     bool is_scalar;
12499     bool only_in_vector = false;
12500 
12501     int pass;
12502     TCGv_i32 tcg_rmode = NULL;
12503     TCGv_ptr tcg_fpstatus = NULL;
12504     bool need_fpst = true;
12505     int rmode = -1;
12506 
12507     if (!dc_isar_feature(aa64_fp16, s)) {
12508         unallocated_encoding(s);
12509         return;
12510     }
12511 
12512     rd = extract32(insn, 0, 5);
12513     rn = extract32(insn, 5, 5);
12514 
12515     a = extract32(insn, 23, 1);
12516     u = extract32(insn, 29, 1);
12517     is_scalar = extract32(insn, 28, 1);
12518     is_q = extract32(insn, 30, 1);
12519 
12520     opcode = extract32(insn, 12, 5);
12521     fpop = deposit32(opcode, 5, 1, a);
12522     fpop = deposit32(fpop, 6, 1, u);
12523 
12524     switch (fpop) {
12525     case 0x1d: /* SCVTF */
12526     case 0x5d: /* UCVTF */
12527     {
12528         int elements;
12529 
12530         if (is_scalar) {
12531             elements = 1;
12532         } else {
12533             elements = (is_q ? 8 : 4);
12534         }
12535 
12536         if (!fp_access_check(s)) {
12537             return;
12538         }
12539         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12540         return;
12541     }
12542     break;
12543     case 0x2c: /* FCMGT (zero) */
12544     case 0x2d: /* FCMEQ (zero) */
12545     case 0x2e: /* FCMLT (zero) */
12546     case 0x6c: /* FCMGE (zero) */
12547     case 0x6d: /* FCMLE (zero) */
12548         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12549         return;
12550     case 0x3d: /* FRECPE */
12551     case 0x3f: /* FRECPX */
12552         break;
12553     case 0x18: /* FRINTN */
12554         only_in_vector = true;
12555         rmode = FPROUNDING_TIEEVEN;
12556         break;
12557     case 0x19: /* FRINTM */
12558         only_in_vector = true;
12559         rmode = FPROUNDING_NEGINF;
12560         break;
12561     case 0x38: /* FRINTP */
12562         only_in_vector = true;
12563         rmode = FPROUNDING_POSINF;
12564         break;
12565     case 0x39: /* FRINTZ */
12566         only_in_vector = true;
12567         rmode = FPROUNDING_ZERO;
12568         break;
12569     case 0x58: /* FRINTA */
12570         only_in_vector = true;
12571         rmode = FPROUNDING_TIEAWAY;
12572         break;
12573     case 0x59: /* FRINTX */
12574     case 0x79: /* FRINTI */
12575         only_in_vector = true;
12576         /* current rounding mode */
12577         break;
12578     case 0x1a: /* FCVTNS */
12579         rmode = FPROUNDING_TIEEVEN;
12580         break;
12581     case 0x1b: /* FCVTMS */
12582         rmode = FPROUNDING_NEGINF;
12583         break;
12584     case 0x1c: /* FCVTAS */
12585         rmode = FPROUNDING_TIEAWAY;
12586         break;
12587     case 0x3a: /* FCVTPS */
12588         rmode = FPROUNDING_POSINF;
12589         break;
12590     case 0x3b: /* FCVTZS */
12591         rmode = FPROUNDING_ZERO;
12592         break;
12593     case 0x5a: /* FCVTNU */
12594         rmode = FPROUNDING_TIEEVEN;
12595         break;
12596     case 0x5b: /* FCVTMU */
12597         rmode = FPROUNDING_NEGINF;
12598         break;
12599     case 0x5c: /* FCVTAU */
12600         rmode = FPROUNDING_TIEAWAY;
12601         break;
12602     case 0x7a: /* FCVTPU */
12603         rmode = FPROUNDING_POSINF;
12604         break;
12605     case 0x7b: /* FCVTZU */
12606         rmode = FPROUNDING_ZERO;
12607         break;
12608     case 0x2f: /* FABS */
12609     case 0x6f: /* FNEG */
12610         need_fpst = false;
12611         break;
12612     case 0x7d: /* FRSQRTE */
12613     case 0x7f: /* FSQRT (vector) */
12614         break;
12615     default:
12616         unallocated_encoding(s);
12617         return;
12618     }
12619 
12620 
12621     /* Check additional constraints for the scalar encoding */
12622     if (is_scalar) {
12623         if (!is_q) {
12624             unallocated_encoding(s);
12625             return;
12626         }
12627         /* FRINTxx is only in the vector form */
12628         if (only_in_vector) {
12629             unallocated_encoding(s);
12630             return;
12631         }
12632     }
12633 
12634     if (!fp_access_check(s)) {
12635         return;
12636     }
12637 
12638     if (rmode >= 0 || need_fpst) {
12639         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12640     }
12641 
12642     if (rmode >= 0) {
12643         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12644     }
12645 
12646     if (is_scalar) {
12647         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12648         TCGv_i32 tcg_res = tcg_temp_new_i32();
12649 
12650         switch (fpop) {
12651         case 0x1a: /* FCVTNS */
12652         case 0x1b: /* FCVTMS */
12653         case 0x1c: /* FCVTAS */
12654         case 0x3a: /* FCVTPS */
12655         case 0x3b: /* FCVTZS */
12656             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12657             break;
12658         case 0x3d: /* FRECPE */
12659             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12660             break;
12661         case 0x3f: /* FRECPX */
12662             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12663             break;
12664         case 0x5a: /* FCVTNU */
12665         case 0x5b: /* FCVTMU */
12666         case 0x5c: /* FCVTAU */
12667         case 0x7a: /* FCVTPU */
12668         case 0x7b: /* FCVTZU */
12669             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12670             break;
12671         case 0x6f: /* FNEG */
12672             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12673             break;
12674         case 0x7d: /* FRSQRTE */
12675             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12676             break;
12677         default:
12678             g_assert_not_reached();
12679         }
12680 
12681         /* limit any sign extension going on */
12682         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12683         write_fp_sreg(s, rd, tcg_res);
12684     } else {
12685         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12686             TCGv_i32 tcg_op = tcg_temp_new_i32();
12687             TCGv_i32 tcg_res = tcg_temp_new_i32();
12688 
12689             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12690 
12691             switch (fpop) {
12692             case 0x1a: /* FCVTNS */
12693             case 0x1b: /* FCVTMS */
12694             case 0x1c: /* FCVTAS */
12695             case 0x3a: /* FCVTPS */
12696             case 0x3b: /* FCVTZS */
12697                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12698                 break;
12699             case 0x3d: /* FRECPE */
12700                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12701                 break;
12702             case 0x5a: /* FCVTNU */
12703             case 0x5b: /* FCVTMU */
12704             case 0x5c: /* FCVTAU */
12705             case 0x7a: /* FCVTPU */
12706             case 0x7b: /* FCVTZU */
12707                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12708                 break;
12709             case 0x18: /* FRINTN */
12710             case 0x19: /* FRINTM */
12711             case 0x38: /* FRINTP */
12712             case 0x39: /* FRINTZ */
12713             case 0x58: /* FRINTA */
12714             case 0x79: /* FRINTI */
12715                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12716                 break;
12717             case 0x59: /* FRINTX */
12718                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12719                 break;
12720             case 0x2f: /* FABS */
12721                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12722                 break;
12723             case 0x6f: /* FNEG */
12724                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12725                 break;
12726             case 0x7d: /* FRSQRTE */
12727                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12728                 break;
12729             case 0x7f: /* FSQRT */
12730                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12731                 break;
12732             default:
12733                 g_assert_not_reached();
12734             }
12735 
12736             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12737         }
12738 
12739         clear_vec_high(s, is_q, rd);
12740     }
12741 
12742     if (tcg_rmode) {
12743         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12744     }
12745 }
12746 
12747 /* AdvSIMD scalar x indexed element
12748  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12749  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12750  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12751  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12752  * AdvSIMD vector x indexed element
12753  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12754  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12755  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12756  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12757  */
12758 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12759 {
12760     /* This encoding has two kinds of instruction:
12761      *  normal, where we perform elt x idxelt => elt for each
12762      *     element in the vector
12763      *  long, where we perform elt x idxelt and generate a result of
12764      *     double the width of the input element
12765      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12766      */
12767     bool is_scalar = extract32(insn, 28, 1);
12768     bool is_q = extract32(insn, 30, 1);
12769     bool u = extract32(insn, 29, 1);
12770     int size = extract32(insn, 22, 2);
12771     int l = extract32(insn, 21, 1);
12772     int m = extract32(insn, 20, 1);
12773     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12774     int rm = extract32(insn, 16, 4);
12775     int opcode = extract32(insn, 12, 4);
12776     int h = extract32(insn, 11, 1);
12777     int rn = extract32(insn, 5, 5);
12778     int rd = extract32(insn, 0, 5);
12779     bool is_long = false;
12780     int is_fp = 0;
12781     bool is_fp16 = false;
12782     int index;
12783     TCGv_ptr fpst;
12784 
12785     switch (16 * u + opcode) {
12786     case 0x08: /* MUL */
12787     case 0x10: /* MLA */
12788     case 0x14: /* MLS */
12789         if (is_scalar) {
12790             unallocated_encoding(s);
12791             return;
12792         }
12793         break;
12794     case 0x02: /* SMLAL, SMLAL2 */
12795     case 0x12: /* UMLAL, UMLAL2 */
12796     case 0x06: /* SMLSL, SMLSL2 */
12797     case 0x16: /* UMLSL, UMLSL2 */
12798     case 0x0a: /* SMULL, SMULL2 */
12799     case 0x1a: /* UMULL, UMULL2 */
12800         if (is_scalar) {
12801             unallocated_encoding(s);
12802             return;
12803         }
12804         is_long = true;
12805         break;
12806     case 0x03: /* SQDMLAL, SQDMLAL2 */
12807     case 0x07: /* SQDMLSL, SQDMLSL2 */
12808     case 0x0b: /* SQDMULL, SQDMULL2 */
12809         is_long = true;
12810         break;
12811     case 0x0c: /* SQDMULH */
12812     case 0x0d: /* SQRDMULH */
12813         break;
12814     case 0x01: /* FMLA */
12815     case 0x05: /* FMLS */
12816     case 0x09: /* FMUL */
12817     case 0x19: /* FMULX */
12818         is_fp = 1;
12819         break;
12820     case 0x1d: /* SQRDMLAH */
12821     case 0x1f: /* SQRDMLSH */
12822         if (!dc_isar_feature(aa64_rdm, s)) {
12823             unallocated_encoding(s);
12824             return;
12825         }
12826         break;
12827     case 0x0e: /* SDOT */
12828     case 0x1e: /* UDOT */
12829         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12830             unallocated_encoding(s);
12831             return;
12832         }
12833         break;
12834     case 0x0f:
12835         switch (size) {
12836         case 0: /* SUDOT */
12837         case 2: /* USDOT */
12838             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12839                 unallocated_encoding(s);
12840                 return;
12841             }
12842             size = MO_32;
12843             break;
12844         case 1: /* BFDOT */
12845             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12846                 unallocated_encoding(s);
12847                 return;
12848             }
12849             size = MO_32;
12850             break;
12851         case 3: /* BFMLAL{B,T} */
12852             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12853                 unallocated_encoding(s);
12854                 return;
12855             }
12856             /* can't set is_fp without other incorrect size checks */
12857             size = MO_16;
12858             break;
12859         default:
12860             unallocated_encoding(s);
12861             return;
12862         }
12863         break;
12864     case 0x11: /* FCMLA #0 */
12865     case 0x13: /* FCMLA #90 */
12866     case 0x15: /* FCMLA #180 */
12867     case 0x17: /* FCMLA #270 */
12868         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12869             unallocated_encoding(s);
12870             return;
12871         }
12872         is_fp = 2;
12873         break;
12874     case 0x00: /* FMLAL */
12875     case 0x04: /* FMLSL */
12876     case 0x18: /* FMLAL2 */
12877     case 0x1c: /* FMLSL2 */
12878         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12879             unallocated_encoding(s);
12880             return;
12881         }
12882         size = MO_16;
12883         /* is_fp, but we pass tcg_env not fp_status.  */
12884         break;
12885     default:
12886         unallocated_encoding(s);
12887         return;
12888     }
12889 
12890     switch (is_fp) {
12891     case 1: /* normal fp */
12892         /* convert insn encoded size to MemOp size */
12893         switch (size) {
12894         case 0: /* half-precision */
12895             size = MO_16;
12896             is_fp16 = true;
12897             break;
12898         case MO_32: /* single precision */
12899         case MO_64: /* double precision */
12900             break;
12901         default:
12902             unallocated_encoding(s);
12903             return;
12904         }
12905         break;
12906 
12907     case 2: /* complex fp */
12908         /* Each indexable element is a complex pair.  */
12909         size += 1;
12910         switch (size) {
12911         case MO_32:
12912             if (h && !is_q) {
12913                 unallocated_encoding(s);
12914                 return;
12915             }
12916             is_fp16 = true;
12917             break;
12918         case MO_64:
12919             break;
12920         default:
12921             unallocated_encoding(s);
12922             return;
12923         }
12924         break;
12925 
12926     default: /* integer */
12927         switch (size) {
12928         case MO_8:
12929         case MO_64:
12930             unallocated_encoding(s);
12931             return;
12932         }
12933         break;
12934     }
12935     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12936         unallocated_encoding(s);
12937         return;
12938     }
12939 
12940     /* Given MemOp size, adjust register and indexing.  */
12941     switch (size) {
12942     case MO_16:
12943         index = h << 2 | l << 1 | m;
12944         break;
12945     case MO_32:
12946         index = h << 1 | l;
12947         rm |= m << 4;
12948         break;
12949     case MO_64:
12950         if (l || !is_q) {
12951             unallocated_encoding(s);
12952             return;
12953         }
12954         index = h;
12955         rm |= m << 4;
12956         break;
12957     default:
12958         g_assert_not_reached();
12959     }
12960 
12961     if (!fp_access_check(s)) {
12962         return;
12963     }
12964 
12965     if (is_fp) {
12966         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12967     } else {
12968         fpst = NULL;
12969     }
12970 
12971     switch (16 * u + opcode) {
12972     case 0x0e: /* SDOT */
12973     case 0x1e: /* UDOT */
12974         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12975                          u ? gen_helper_gvec_udot_idx_b
12976                          : gen_helper_gvec_sdot_idx_b);
12977         return;
12978     case 0x0f:
12979         switch (extract32(insn, 22, 2)) {
12980         case 0: /* SUDOT */
12981             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12982                              gen_helper_gvec_sudot_idx_b);
12983             return;
12984         case 1: /* BFDOT */
12985             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12986                              gen_helper_gvec_bfdot_idx);
12987             return;
12988         case 2: /* USDOT */
12989             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12990                              gen_helper_gvec_usdot_idx_b);
12991             return;
12992         case 3: /* BFMLAL{B,T} */
12993             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12994                               gen_helper_gvec_bfmlal_idx);
12995             return;
12996         }
12997         g_assert_not_reached();
12998     case 0x11: /* FCMLA #0 */
12999     case 0x13: /* FCMLA #90 */
13000     case 0x15: /* FCMLA #180 */
13001     case 0x17: /* FCMLA #270 */
13002         {
13003             int rot = extract32(insn, 13, 2);
13004             int data = (index << 2) | rot;
13005             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
13006                                vec_full_reg_offset(s, rn),
13007                                vec_full_reg_offset(s, rm),
13008                                vec_full_reg_offset(s, rd), fpst,
13009                                is_q ? 16 : 8, vec_full_reg_size(s), data,
13010                                size == MO_64
13011                                ? gen_helper_gvec_fcmlas_idx
13012                                : gen_helper_gvec_fcmlah_idx);
13013         }
13014         return;
13015 
13016     case 0x00: /* FMLAL */
13017     case 0x04: /* FMLSL */
13018     case 0x18: /* FMLAL2 */
13019     case 0x1c: /* FMLSL2 */
13020         {
13021             int is_s = extract32(opcode, 2, 1);
13022             int is_2 = u;
13023             int data = (index << 2) | (is_2 << 1) | is_s;
13024             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13025                                vec_full_reg_offset(s, rn),
13026                                vec_full_reg_offset(s, rm), tcg_env,
13027                                is_q ? 16 : 8, vec_full_reg_size(s),
13028                                data, gen_helper_gvec_fmlal_idx_a64);
13029         }
13030         return;
13031 
13032     case 0x08: /* MUL */
13033         if (!is_long && !is_scalar) {
13034             static gen_helper_gvec_3 * const fns[3] = {
13035                 gen_helper_gvec_mul_idx_h,
13036                 gen_helper_gvec_mul_idx_s,
13037                 gen_helper_gvec_mul_idx_d,
13038             };
13039             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13040                                vec_full_reg_offset(s, rn),
13041                                vec_full_reg_offset(s, rm),
13042                                is_q ? 16 : 8, vec_full_reg_size(s),
13043                                index, fns[size - 1]);
13044             return;
13045         }
13046         break;
13047 
13048     case 0x10: /* MLA */
13049         if (!is_long && !is_scalar) {
13050             static gen_helper_gvec_4 * const fns[3] = {
13051                 gen_helper_gvec_mla_idx_h,
13052                 gen_helper_gvec_mla_idx_s,
13053                 gen_helper_gvec_mla_idx_d,
13054             };
13055             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13056                                vec_full_reg_offset(s, rn),
13057                                vec_full_reg_offset(s, rm),
13058                                vec_full_reg_offset(s, rd),
13059                                is_q ? 16 : 8, vec_full_reg_size(s),
13060                                index, fns[size - 1]);
13061             return;
13062         }
13063         break;
13064 
13065     case 0x14: /* MLS */
13066         if (!is_long && !is_scalar) {
13067             static gen_helper_gvec_4 * const fns[3] = {
13068                 gen_helper_gvec_mls_idx_h,
13069                 gen_helper_gvec_mls_idx_s,
13070                 gen_helper_gvec_mls_idx_d,
13071             };
13072             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13073                                vec_full_reg_offset(s, rn),
13074                                vec_full_reg_offset(s, rm),
13075                                vec_full_reg_offset(s, rd),
13076                                is_q ? 16 : 8, vec_full_reg_size(s),
13077                                index, fns[size - 1]);
13078             return;
13079         }
13080         break;
13081     }
13082 
13083     if (size == 3) {
13084         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13085         int pass;
13086 
13087         assert(is_fp && is_q && !is_long);
13088 
13089         read_vec_element(s, tcg_idx, rm, index, MO_64);
13090 
13091         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13092             TCGv_i64 tcg_op = tcg_temp_new_i64();
13093             TCGv_i64 tcg_res = tcg_temp_new_i64();
13094 
13095             read_vec_element(s, tcg_op, rn, pass, MO_64);
13096 
13097             switch (16 * u + opcode) {
13098             case 0x05: /* FMLS */
13099                 /* As usual for ARM, separate negation for fused multiply-add */
13100                 gen_helper_vfp_negd(tcg_op, tcg_op);
13101                 /* fall through */
13102             case 0x01: /* FMLA */
13103                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13104                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13105                 break;
13106             case 0x09: /* FMUL */
13107                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13108                 break;
13109             case 0x19: /* FMULX */
13110                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13111                 break;
13112             default:
13113                 g_assert_not_reached();
13114             }
13115 
13116             write_vec_element(s, tcg_res, rd, pass, MO_64);
13117         }
13118 
13119         clear_vec_high(s, !is_scalar, rd);
13120     } else if (!is_long) {
13121         /* 32 bit floating point, or 16 or 32 bit integer.
13122          * For the 16 bit scalar case we use the usual Neon helpers and
13123          * rely on the fact that 0 op 0 == 0 with no side effects.
13124          */
13125         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13126         int pass, maxpasses;
13127 
13128         if (is_scalar) {
13129             maxpasses = 1;
13130         } else {
13131             maxpasses = is_q ? 4 : 2;
13132         }
13133 
13134         read_vec_element_i32(s, tcg_idx, rm, index, size);
13135 
13136         if (size == 1 && !is_scalar) {
13137             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13138              * the index into both halves of the 32 bit tcg_idx and then use
13139              * the usual Neon helpers.
13140              */
13141             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13142         }
13143 
13144         for (pass = 0; pass < maxpasses; pass++) {
13145             TCGv_i32 tcg_op = tcg_temp_new_i32();
13146             TCGv_i32 tcg_res = tcg_temp_new_i32();
13147 
13148             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13149 
13150             switch (16 * u + opcode) {
13151             case 0x08: /* MUL */
13152             case 0x10: /* MLA */
13153             case 0x14: /* MLS */
13154             {
13155                 static NeonGenTwoOpFn * const fns[2][2] = {
13156                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13157                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13158                 };
13159                 NeonGenTwoOpFn *genfn;
13160                 bool is_sub = opcode == 0x4;
13161 
13162                 if (size == 1) {
13163                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13164                 } else {
13165                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13166                 }
13167                 if (opcode == 0x8) {
13168                     break;
13169                 }
13170                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13171                 genfn = fns[size - 1][is_sub];
13172                 genfn(tcg_res, tcg_op, tcg_res);
13173                 break;
13174             }
13175             case 0x05: /* FMLS */
13176             case 0x01: /* FMLA */
13177                 read_vec_element_i32(s, tcg_res, rd, pass,
13178                                      is_scalar ? size : MO_32);
13179                 switch (size) {
13180                 case 1:
13181                     if (opcode == 0x5) {
13182                         /* As usual for ARM, separate negation for fused
13183                          * multiply-add */
13184                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13185                     }
13186                     if (is_scalar) {
13187                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13188                                                    tcg_res, fpst);
13189                     } else {
13190                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13191                                                     tcg_res, fpst);
13192                     }
13193                     break;
13194                 case 2:
13195                     if (opcode == 0x5) {
13196                         /* As usual for ARM, separate negation for
13197                          * fused multiply-add */
13198                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13199                     }
13200                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13201                                            tcg_res, fpst);
13202                     break;
13203                 default:
13204                     g_assert_not_reached();
13205                 }
13206                 break;
13207             case 0x09: /* FMUL */
13208                 switch (size) {
13209                 case 1:
13210                     if (is_scalar) {
13211                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13212                                                 tcg_idx, fpst);
13213                     } else {
13214                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13215                                                  tcg_idx, fpst);
13216                     }
13217                     break;
13218                 case 2:
13219                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13220                     break;
13221                 default:
13222                     g_assert_not_reached();
13223                 }
13224                 break;
13225             case 0x19: /* FMULX */
13226                 switch (size) {
13227                 case 1:
13228                     if (is_scalar) {
13229                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13230                                                  tcg_idx, fpst);
13231                     } else {
13232                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13233                                                   tcg_idx, fpst);
13234                     }
13235                     break;
13236                 case 2:
13237                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13238                     break;
13239                 default:
13240                     g_assert_not_reached();
13241                 }
13242                 break;
13243             case 0x0c: /* SQDMULH */
13244                 if (size == 1) {
13245                     gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
13246                                                tcg_op, tcg_idx);
13247                 } else {
13248                     gen_helper_neon_qdmulh_s32(tcg_res, tcg_env,
13249                                                tcg_op, tcg_idx);
13250                 }
13251                 break;
13252             case 0x0d: /* SQRDMULH */
13253                 if (size == 1) {
13254                     gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env,
13255                                                 tcg_op, tcg_idx);
13256                 } else {
13257                     gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env,
13258                                                 tcg_op, tcg_idx);
13259                 }
13260                 break;
13261             case 0x1d: /* SQRDMLAH */
13262                 read_vec_element_i32(s, tcg_res, rd, pass,
13263                                      is_scalar ? size : MO_32);
13264                 if (size == 1) {
13265                     gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env,
13266                                                 tcg_op, tcg_idx, tcg_res);
13267                 } else {
13268                     gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env,
13269                                                 tcg_op, tcg_idx, tcg_res);
13270                 }
13271                 break;
13272             case 0x1f: /* SQRDMLSH */
13273                 read_vec_element_i32(s, tcg_res, rd, pass,
13274                                      is_scalar ? size : MO_32);
13275                 if (size == 1) {
13276                     gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env,
13277                                                 tcg_op, tcg_idx, tcg_res);
13278                 } else {
13279                     gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env,
13280                                                 tcg_op, tcg_idx, tcg_res);
13281                 }
13282                 break;
13283             default:
13284                 g_assert_not_reached();
13285             }
13286 
13287             if (is_scalar) {
13288                 write_fp_sreg(s, rd, tcg_res);
13289             } else {
13290                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13291             }
13292         }
13293 
13294         clear_vec_high(s, is_q, rd);
13295     } else {
13296         /* long ops: 16x16->32 or 32x32->64 */
13297         TCGv_i64 tcg_res[2];
13298         int pass;
13299         bool satop = extract32(opcode, 0, 1);
13300         MemOp memop = MO_32;
13301 
13302         if (satop || !u) {
13303             memop |= MO_SIGN;
13304         }
13305 
13306         if (size == 2) {
13307             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13308 
13309             read_vec_element(s, tcg_idx, rm, index, memop);
13310 
13311             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13312                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13313                 TCGv_i64 tcg_passres;
13314                 int passelt;
13315 
13316                 if (is_scalar) {
13317                     passelt = 0;
13318                 } else {
13319                     passelt = pass + (is_q * 2);
13320                 }
13321 
13322                 read_vec_element(s, tcg_op, rn, passelt, memop);
13323 
13324                 tcg_res[pass] = tcg_temp_new_i64();
13325 
13326                 if (opcode == 0xa || opcode == 0xb) {
13327                     /* Non-accumulating ops */
13328                     tcg_passres = tcg_res[pass];
13329                 } else {
13330                     tcg_passres = tcg_temp_new_i64();
13331                 }
13332 
13333                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13334 
13335                 if (satop) {
13336                     /* saturating, doubling */
13337                     gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
13338                                                       tcg_passres, tcg_passres);
13339                 }
13340 
13341                 if (opcode == 0xa || opcode == 0xb) {
13342                     continue;
13343                 }
13344 
13345                 /* Accumulating op: handle accumulate step */
13346                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13347 
13348                 switch (opcode) {
13349                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13350                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13351                     break;
13352                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13353                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13354                     break;
13355                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13356                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13357                     /* fall through */
13358                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13359                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
13360                                                       tcg_res[pass],
13361                                                       tcg_passres);
13362                     break;
13363                 default:
13364                     g_assert_not_reached();
13365                 }
13366             }
13367 
13368             clear_vec_high(s, !is_scalar, rd);
13369         } else {
13370             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13371 
13372             assert(size == 1);
13373             read_vec_element_i32(s, tcg_idx, rm, index, size);
13374 
13375             if (!is_scalar) {
13376                 /* The simplest way to handle the 16x16 indexed ops is to
13377                  * duplicate the index into both halves of the 32 bit tcg_idx
13378                  * and then use the usual Neon helpers.
13379                  */
13380                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13381             }
13382 
13383             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13384                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13385                 TCGv_i64 tcg_passres;
13386 
13387                 if (is_scalar) {
13388                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13389                 } else {
13390                     read_vec_element_i32(s, tcg_op, rn,
13391                                          pass + (is_q * 2), MO_32);
13392                 }
13393 
13394                 tcg_res[pass] = tcg_temp_new_i64();
13395 
13396                 if (opcode == 0xa || opcode == 0xb) {
13397                     /* Non-accumulating ops */
13398                     tcg_passres = tcg_res[pass];
13399                 } else {
13400                     tcg_passres = tcg_temp_new_i64();
13401                 }
13402 
13403                 if (memop & MO_SIGN) {
13404                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13405                 } else {
13406                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13407                 }
13408                 if (satop) {
13409                     gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
13410                                                       tcg_passres, tcg_passres);
13411                 }
13412 
13413                 if (opcode == 0xa || opcode == 0xb) {
13414                     continue;
13415                 }
13416 
13417                 /* Accumulating op: handle accumulate step */
13418                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13419 
13420                 switch (opcode) {
13421                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13422                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13423                                              tcg_passres);
13424                     break;
13425                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13426                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13427                                              tcg_passres);
13428                     break;
13429                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13430                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13431                     /* fall through */
13432                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13433                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
13434                                                       tcg_res[pass],
13435                                                       tcg_passres);
13436                     break;
13437                 default:
13438                     g_assert_not_reached();
13439                 }
13440             }
13441 
13442             if (is_scalar) {
13443                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13444             }
13445         }
13446 
13447         if (is_scalar) {
13448             tcg_res[1] = tcg_constant_i64(0);
13449         }
13450 
13451         for (pass = 0; pass < 2; pass++) {
13452             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13453         }
13454     }
13455 }
13456 
13457 /* Crypto AES
13458  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13459  * +-----------------+------+-----------+--------+-----+------+------+
13460  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13461  * +-----------------+------+-----------+--------+-----+------+------+
13462  */
13463 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13464 {
13465     int size = extract32(insn, 22, 2);
13466     int opcode = extract32(insn, 12, 5);
13467     int rn = extract32(insn, 5, 5);
13468     int rd = extract32(insn, 0, 5);
13469     gen_helper_gvec_2 *genfn2 = NULL;
13470     gen_helper_gvec_3 *genfn3 = NULL;
13471 
13472     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13473         unallocated_encoding(s);
13474         return;
13475     }
13476 
13477     switch (opcode) {
13478     case 0x4: /* AESE */
13479         genfn3 = gen_helper_crypto_aese;
13480         break;
13481     case 0x6: /* AESMC */
13482         genfn2 = gen_helper_crypto_aesmc;
13483         break;
13484     case 0x5: /* AESD */
13485         genfn3 = gen_helper_crypto_aesd;
13486         break;
13487     case 0x7: /* AESIMC */
13488         genfn2 = gen_helper_crypto_aesimc;
13489         break;
13490     default:
13491         unallocated_encoding(s);
13492         return;
13493     }
13494 
13495     if (!fp_access_check(s)) {
13496         return;
13497     }
13498     if (genfn2) {
13499         gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
13500     } else {
13501         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
13502     }
13503 }
13504 
13505 /* Crypto three-reg SHA
13506  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13507  * +-----------------+------+---+------+---+--------+-----+------+------+
13508  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13509  * +-----------------+------+---+------+---+--------+-----+------+------+
13510  */
13511 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13512 {
13513     int size = extract32(insn, 22, 2);
13514     int opcode = extract32(insn, 12, 3);
13515     int rm = extract32(insn, 16, 5);
13516     int rn = extract32(insn, 5, 5);
13517     int rd = extract32(insn, 0, 5);
13518     gen_helper_gvec_3 *genfn;
13519     bool feature;
13520 
13521     if (size != 0) {
13522         unallocated_encoding(s);
13523         return;
13524     }
13525 
13526     switch (opcode) {
13527     case 0: /* SHA1C */
13528         genfn = gen_helper_crypto_sha1c;
13529         feature = dc_isar_feature(aa64_sha1, s);
13530         break;
13531     case 1: /* SHA1P */
13532         genfn = gen_helper_crypto_sha1p;
13533         feature = dc_isar_feature(aa64_sha1, s);
13534         break;
13535     case 2: /* SHA1M */
13536         genfn = gen_helper_crypto_sha1m;
13537         feature = dc_isar_feature(aa64_sha1, s);
13538         break;
13539     case 3: /* SHA1SU0 */
13540         genfn = gen_helper_crypto_sha1su0;
13541         feature = dc_isar_feature(aa64_sha1, s);
13542         break;
13543     case 4: /* SHA256H */
13544         genfn = gen_helper_crypto_sha256h;
13545         feature = dc_isar_feature(aa64_sha256, s);
13546         break;
13547     case 5: /* SHA256H2 */
13548         genfn = gen_helper_crypto_sha256h2;
13549         feature = dc_isar_feature(aa64_sha256, s);
13550         break;
13551     case 6: /* SHA256SU1 */
13552         genfn = gen_helper_crypto_sha256su1;
13553         feature = dc_isar_feature(aa64_sha256, s);
13554         break;
13555     default:
13556         unallocated_encoding(s);
13557         return;
13558     }
13559 
13560     if (!feature) {
13561         unallocated_encoding(s);
13562         return;
13563     }
13564 
13565     if (!fp_access_check(s)) {
13566         return;
13567     }
13568     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13569 }
13570 
13571 /* Crypto two-reg SHA
13572  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13573  * +-----------------+------+-----------+--------+-----+------+------+
13574  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13575  * +-----------------+------+-----------+--------+-----+------+------+
13576  */
13577 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13578 {
13579     int size = extract32(insn, 22, 2);
13580     int opcode = extract32(insn, 12, 5);
13581     int rn = extract32(insn, 5, 5);
13582     int rd = extract32(insn, 0, 5);
13583     gen_helper_gvec_2 *genfn;
13584     bool feature;
13585 
13586     if (size != 0) {
13587         unallocated_encoding(s);
13588         return;
13589     }
13590 
13591     switch (opcode) {
13592     case 0: /* SHA1H */
13593         feature = dc_isar_feature(aa64_sha1, s);
13594         genfn = gen_helper_crypto_sha1h;
13595         break;
13596     case 1: /* SHA1SU1 */
13597         feature = dc_isar_feature(aa64_sha1, s);
13598         genfn = gen_helper_crypto_sha1su1;
13599         break;
13600     case 2: /* SHA256SU0 */
13601         feature = dc_isar_feature(aa64_sha256, s);
13602         genfn = gen_helper_crypto_sha256su0;
13603         break;
13604     default:
13605         unallocated_encoding(s);
13606         return;
13607     }
13608 
13609     if (!feature) {
13610         unallocated_encoding(s);
13611         return;
13612     }
13613 
13614     if (!fp_access_check(s)) {
13615         return;
13616     }
13617     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13618 }
13619 
13620 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13621 {
13622     tcg_gen_rotli_i64(d, m, 1);
13623     tcg_gen_xor_i64(d, d, n);
13624 }
13625 
13626 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13627 {
13628     tcg_gen_rotli_vec(vece, d, m, 1);
13629     tcg_gen_xor_vec(vece, d, d, n);
13630 }
13631 
13632 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13633                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13634 {
13635     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13636     static const GVecGen3 op = {
13637         .fni8 = gen_rax1_i64,
13638         .fniv = gen_rax1_vec,
13639         .opt_opc = vecop_list,
13640         .fno = gen_helper_crypto_rax1,
13641         .vece = MO_64,
13642     };
13643     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13644 }
13645 
13646 /* Crypto three-reg SHA512
13647  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13648  * +-----------------------+------+---+---+-----+--------+------+------+
13649  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13650  * +-----------------------+------+---+---+-----+--------+------+------+
13651  */
13652 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13653 {
13654     int opcode = extract32(insn, 10, 2);
13655     int o =  extract32(insn, 14, 1);
13656     int rm = extract32(insn, 16, 5);
13657     int rn = extract32(insn, 5, 5);
13658     int rd = extract32(insn, 0, 5);
13659     bool feature;
13660     gen_helper_gvec_3 *oolfn = NULL;
13661     GVecGen3Fn *gvecfn = NULL;
13662 
13663     if (o == 0) {
13664         switch (opcode) {
13665         case 0: /* SHA512H */
13666             feature = dc_isar_feature(aa64_sha512, s);
13667             oolfn = gen_helper_crypto_sha512h;
13668             break;
13669         case 1: /* SHA512H2 */
13670             feature = dc_isar_feature(aa64_sha512, s);
13671             oolfn = gen_helper_crypto_sha512h2;
13672             break;
13673         case 2: /* SHA512SU1 */
13674             feature = dc_isar_feature(aa64_sha512, s);
13675             oolfn = gen_helper_crypto_sha512su1;
13676             break;
13677         case 3: /* RAX1 */
13678             feature = dc_isar_feature(aa64_sha3, s);
13679             gvecfn = gen_gvec_rax1;
13680             break;
13681         default:
13682             g_assert_not_reached();
13683         }
13684     } else {
13685         switch (opcode) {
13686         case 0: /* SM3PARTW1 */
13687             feature = dc_isar_feature(aa64_sm3, s);
13688             oolfn = gen_helper_crypto_sm3partw1;
13689             break;
13690         case 1: /* SM3PARTW2 */
13691             feature = dc_isar_feature(aa64_sm3, s);
13692             oolfn = gen_helper_crypto_sm3partw2;
13693             break;
13694         case 2: /* SM4EKEY */
13695             feature = dc_isar_feature(aa64_sm4, s);
13696             oolfn = gen_helper_crypto_sm4ekey;
13697             break;
13698         default:
13699             unallocated_encoding(s);
13700             return;
13701         }
13702     }
13703 
13704     if (!feature) {
13705         unallocated_encoding(s);
13706         return;
13707     }
13708 
13709     if (!fp_access_check(s)) {
13710         return;
13711     }
13712 
13713     if (oolfn) {
13714         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13715     } else {
13716         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13717     }
13718 }
13719 
13720 /* Crypto two-reg SHA512
13721  *  31                                     12  11  10  9    5 4    0
13722  * +-----------------------------------------+--------+------+------+
13723  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13724  * +-----------------------------------------+--------+------+------+
13725  */
13726 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13727 {
13728     int opcode = extract32(insn, 10, 2);
13729     int rn = extract32(insn, 5, 5);
13730     int rd = extract32(insn, 0, 5);
13731     bool feature;
13732 
13733     switch (opcode) {
13734     case 0: /* SHA512SU0 */
13735         feature = dc_isar_feature(aa64_sha512, s);
13736         break;
13737     case 1: /* SM4E */
13738         feature = dc_isar_feature(aa64_sm4, s);
13739         break;
13740     default:
13741         unallocated_encoding(s);
13742         return;
13743     }
13744 
13745     if (!feature) {
13746         unallocated_encoding(s);
13747         return;
13748     }
13749 
13750     if (!fp_access_check(s)) {
13751         return;
13752     }
13753 
13754     switch (opcode) {
13755     case 0: /* SHA512SU0 */
13756         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13757         break;
13758     case 1: /* SM4E */
13759         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13760         break;
13761     default:
13762         g_assert_not_reached();
13763     }
13764 }
13765 
13766 /* Crypto four-register
13767  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13768  * +-------------------+-----+------+---+------+------+------+
13769  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13770  * +-------------------+-----+------+---+------+------+------+
13771  */
13772 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13773 {
13774     int op0 = extract32(insn, 21, 2);
13775     int rm = extract32(insn, 16, 5);
13776     int ra = extract32(insn, 10, 5);
13777     int rn = extract32(insn, 5, 5);
13778     int rd = extract32(insn, 0, 5);
13779     bool feature;
13780 
13781     switch (op0) {
13782     case 0: /* EOR3 */
13783     case 1: /* BCAX */
13784         feature = dc_isar_feature(aa64_sha3, s);
13785         break;
13786     case 2: /* SM3SS1 */
13787         feature = dc_isar_feature(aa64_sm3, s);
13788         break;
13789     default:
13790         unallocated_encoding(s);
13791         return;
13792     }
13793 
13794     if (!feature) {
13795         unallocated_encoding(s);
13796         return;
13797     }
13798 
13799     if (!fp_access_check(s)) {
13800         return;
13801     }
13802 
13803     if (op0 < 2) {
13804         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13805         int pass;
13806 
13807         tcg_op1 = tcg_temp_new_i64();
13808         tcg_op2 = tcg_temp_new_i64();
13809         tcg_op3 = tcg_temp_new_i64();
13810         tcg_res[0] = tcg_temp_new_i64();
13811         tcg_res[1] = tcg_temp_new_i64();
13812 
13813         for (pass = 0; pass < 2; pass++) {
13814             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13815             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13816             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13817 
13818             if (op0 == 0) {
13819                 /* EOR3 */
13820                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13821             } else {
13822                 /* BCAX */
13823                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13824             }
13825             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13826         }
13827         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13828         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13829     } else {
13830         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13831 
13832         tcg_op1 = tcg_temp_new_i32();
13833         tcg_op2 = tcg_temp_new_i32();
13834         tcg_op3 = tcg_temp_new_i32();
13835         tcg_res = tcg_temp_new_i32();
13836         tcg_zero = tcg_constant_i32(0);
13837 
13838         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13839         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13840         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13841 
13842         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13843         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13844         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13845         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13846 
13847         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13848         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13849         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13850         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13851     }
13852 }
13853 
13854 /* Crypto XAR
13855  *  31                   21 20  16 15    10 9    5 4    0
13856  * +-----------------------+------+--------+------+------+
13857  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13858  * +-----------------------+------+--------+------+------+
13859  */
13860 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13861 {
13862     int rm = extract32(insn, 16, 5);
13863     int imm6 = extract32(insn, 10, 6);
13864     int rn = extract32(insn, 5, 5);
13865     int rd = extract32(insn, 0, 5);
13866 
13867     if (!dc_isar_feature(aa64_sha3, s)) {
13868         unallocated_encoding(s);
13869         return;
13870     }
13871 
13872     if (!fp_access_check(s)) {
13873         return;
13874     }
13875 
13876     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13877                  vec_full_reg_offset(s, rn),
13878                  vec_full_reg_offset(s, rm), imm6, 16,
13879                  vec_full_reg_size(s));
13880 }
13881 
13882 /* Crypto three-reg imm2
13883  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13884  * +-----------------------+------+-----+------+--------+------+------+
13885  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13886  * +-----------------------+------+-----+------+--------+------+------+
13887  */
13888 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13889 {
13890     static gen_helper_gvec_3 * const fns[4] = {
13891         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13892         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13893     };
13894     int opcode = extract32(insn, 10, 2);
13895     int imm2 = extract32(insn, 12, 2);
13896     int rm = extract32(insn, 16, 5);
13897     int rn = extract32(insn, 5, 5);
13898     int rd = extract32(insn, 0, 5);
13899 
13900     if (!dc_isar_feature(aa64_sm3, s)) {
13901         unallocated_encoding(s);
13902         return;
13903     }
13904 
13905     if (!fp_access_check(s)) {
13906         return;
13907     }
13908 
13909     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13910 }
13911 
13912 /* C3.6 Data processing - SIMD, inc Crypto
13913  *
13914  * As the decode gets a little complex we are using a table based
13915  * approach for this part of the decode.
13916  */
13917 static const AArch64DecodeTable data_proc_simd[] = {
13918     /* pattern  ,  mask     ,  fn                        */
13919     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13920     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13921     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13922     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13923     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13924     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13925     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13926     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13927     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13928     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13929     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13930     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13931     { 0x2e000000, 0xbf208400, disas_simd_ext },
13932     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13933     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13934     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13935     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13936     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13937     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13938     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13939     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13940     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13941     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13942     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13943     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13944     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13945     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13946     { 0xce800000, 0xffe00000, disas_crypto_xar },
13947     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13948     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13949     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13950     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13951     { 0x00000000, 0x00000000, NULL }
13952 };
13953 
13954 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13955 {
13956     /* Note that this is called with all non-FP cases from
13957      * table C3-6 so it must UNDEF for entries not specifically
13958      * allocated to instructions in that table.
13959      */
13960     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13961     if (fn) {
13962         fn(s, insn);
13963     } else {
13964         unallocated_encoding(s);
13965     }
13966 }
13967 
13968 /* C3.6 Data processing - SIMD and floating point */
13969 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13970 {
13971     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13972         disas_data_proc_fp(s, insn);
13973     } else {
13974         /* SIMD, including crypto */
13975         disas_data_proc_simd(s, insn);
13976     }
13977 }
13978 
13979 static bool trans_OK(DisasContext *s, arg_OK *a)
13980 {
13981     return true;
13982 }
13983 
13984 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13985 {
13986     s->is_nonstreaming = true;
13987     return true;
13988 }
13989 
13990 /**
13991  * is_guarded_page:
13992  * @env: The cpu environment
13993  * @s: The DisasContext
13994  *
13995  * Return true if the page is guarded.
13996  */
13997 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13998 {
13999     uint64_t addr = s->base.pc_first;
14000 #ifdef CONFIG_USER_ONLY
14001     return page_get_flags(addr) & PAGE_BTI;
14002 #else
14003     CPUTLBEntryFull *full;
14004     void *host;
14005     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14006     int flags;
14007 
14008     /*
14009      * We test this immediately after reading an insn, which means
14010      * that the TLB entry must be present and valid, and thus this
14011      * access will never raise an exception.
14012      */
14013     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
14014                               false, &host, &full, 0);
14015     assert(!(flags & TLB_INVALID_MASK));
14016 
14017     return full->extra.arm.guarded;
14018 #endif
14019 }
14020 
14021 /**
14022  * btype_destination_ok:
14023  * @insn: The instruction at the branch destination
14024  * @bt: SCTLR_ELx.BT
14025  * @btype: PSTATE.BTYPE, and is non-zero
14026  *
14027  * On a guarded page, there are a limited number of insns
14028  * that may be present at the branch target:
14029  *   - branch target identifiers,
14030  *   - paciasp, pacibsp,
14031  *   - BRK insn
14032  *   - HLT insn
14033  * Anything else causes a Branch Target Exception.
14034  *
14035  * Return true if the branch is compatible, false to raise BTITRAP.
14036  */
14037 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14038 {
14039     if ((insn & 0xfffff01fu) == 0xd503201fu) {
14040         /* HINT space */
14041         switch (extract32(insn, 5, 7)) {
14042         case 0b011001: /* PACIASP */
14043         case 0b011011: /* PACIBSP */
14044             /*
14045              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14046              * with btype == 3.  Otherwise all btype are ok.
14047              */
14048             return !bt || btype != 3;
14049         case 0b100000: /* BTI */
14050             /* Not compatible with any btype.  */
14051             return false;
14052         case 0b100010: /* BTI c */
14053             /* Not compatible with btype == 3 */
14054             return btype != 3;
14055         case 0b100100: /* BTI j */
14056             /* Not compatible with btype == 2 */
14057             return btype != 2;
14058         case 0b100110: /* BTI jc */
14059             /* Compatible with any btype.  */
14060             return true;
14061         }
14062     } else {
14063         switch (insn & 0xffe0001fu) {
14064         case 0xd4200000u: /* BRK */
14065         case 0xd4400000u: /* HLT */
14066             /* Give priority to the breakpoint exception.  */
14067             return true;
14068         }
14069     }
14070     return false;
14071 }
14072 
14073 /* C3.1 A64 instruction index by encoding */
14074 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14075 {
14076     switch (extract32(insn, 25, 4)) {
14077     case 0x5:
14078     case 0xd:      /* Data processing - register */
14079         disas_data_proc_reg(s, insn);
14080         break;
14081     case 0x7:
14082     case 0xf:      /* Data processing - SIMD and floating point */
14083         disas_data_proc_simd_fp(s, insn);
14084         break;
14085     default:
14086         unallocated_encoding(s);
14087         break;
14088     }
14089 }
14090 
14091 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14092                                           CPUState *cpu)
14093 {
14094     DisasContext *dc = container_of(dcbase, DisasContext, base);
14095     CPUARMState *env = cpu_env(cpu);
14096     ARMCPU *arm_cpu = env_archcpu(env);
14097     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14098     int bound, core_mmu_idx;
14099 
14100     dc->isar = &arm_cpu->isar;
14101     dc->condjmp = 0;
14102     dc->pc_save = dc->base.pc_first;
14103     dc->aarch64 = true;
14104     dc->thumb = false;
14105     dc->sctlr_b = 0;
14106     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14107     dc->condexec_mask = 0;
14108     dc->condexec_cond = 0;
14109     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14110     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14111     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14112     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14113     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14114     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14115 #if !defined(CONFIG_USER_ONLY)
14116     dc->user = (dc->current_el == 0);
14117 #endif
14118     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14119     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14120     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14121     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14122     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14123     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
14124     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14125     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14126     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14127     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14128     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14129     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14130     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14131     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14132     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
14133     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
14134     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14135     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14136     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14137     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14138     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14139     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
14140     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
14141     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
14142     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
14143     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
14144     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
14145     dc->vec_len = 0;
14146     dc->vec_stride = 0;
14147     dc->cp_regs = arm_cpu->cp_regs;
14148     dc->features = env->features;
14149     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14150     dc->gm_blocksize = arm_cpu->gm_blocksize;
14151 
14152 #ifdef CONFIG_USER_ONLY
14153     /* In sve_probe_page, we assume TBI is enabled. */
14154     tcg_debug_assert(dc->tbid & 1);
14155 #endif
14156 
14157     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14158 
14159     /* Single step state. The code-generation logic here is:
14160      *  SS_ACTIVE == 0:
14161      *   generate code with no special handling for single-stepping (except
14162      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14163      *   this happens anyway because those changes are all system register or
14164      *   PSTATE writes).
14165      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14166      *   emit code for one insn
14167      *   emit code to clear PSTATE.SS
14168      *   emit code to generate software step exception for completed step
14169      *   end TB (as usual for having generated an exception)
14170      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14171      *   emit code to generate a software step exception
14172      *   end the TB
14173      */
14174     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14175     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14176     dc->is_ldex = false;
14177 
14178     /* Bound the number of insns to execute to those left on the page.  */
14179     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14180 
14181     /* If architectural single step active, limit to 1.  */
14182     if (dc->ss_active) {
14183         bound = 1;
14184     }
14185     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14186 }
14187 
14188 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14189 {
14190 }
14191 
14192 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14193 {
14194     DisasContext *dc = container_of(dcbase, DisasContext, base);
14195     target_ulong pc_arg = dc->base.pc_next;
14196 
14197     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14198         pc_arg &= ~TARGET_PAGE_MASK;
14199     }
14200     tcg_gen_insn_start(pc_arg, 0, 0);
14201     dc->insn_start_updated = false;
14202 }
14203 
14204 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14205 {
14206     DisasContext *s = container_of(dcbase, DisasContext, base);
14207     CPUARMState *env = cpu_env(cpu);
14208     uint64_t pc = s->base.pc_next;
14209     uint32_t insn;
14210 
14211     /* Singlestep exceptions have the highest priority. */
14212     if (s->ss_active && !s->pstate_ss) {
14213         /* Singlestep state is Active-pending.
14214          * If we're in this state at the start of a TB then either
14215          *  a) we just took an exception to an EL which is being debugged
14216          *     and this is the first insn in the exception handler
14217          *  b) debug exceptions were masked and we just unmasked them
14218          *     without changing EL (eg by clearing PSTATE.D)
14219          * In either case we're going to take a swstep exception in the
14220          * "did not step an insn" case, and so the syndrome ISV and EX
14221          * bits should be zero.
14222          */
14223         assert(s->base.num_insns == 1);
14224         gen_swstep_exception(s, 0, 0);
14225         s->base.is_jmp = DISAS_NORETURN;
14226         s->base.pc_next = pc + 4;
14227         return;
14228     }
14229 
14230     if (pc & 3) {
14231         /*
14232          * PC alignment fault.  This has priority over the instruction abort
14233          * that we would receive from a translation fault via arm_ldl_code.
14234          * This should only be possible after an indirect branch, at the
14235          * start of the TB.
14236          */
14237         assert(s->base.num_insns == 1);
14238         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
14239         s->base.is_jmp = DISAS_NORETURN;
14240         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14241         return;
14242     }
14243 
14244     s->pc_curr = pc;
14245     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14246     s->insn = insn;
14247     s->base.pc_next = pc + 4;
14248 
14249     s->fp_access_checked = false;
14250     s->sve_access_checked = false;
14251 
14252     if (s->pstate_il) {
14253         /*
14254          * Illegal execution state. This has priority over BTI
14255          * exceptions, but comes after instruction abort exceptions.
14256          */
14257         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14258         return;
14259     }
14260 
14261     if (dc_isar_feature(aa64_bti, s)) {
14262         if (s->base.num_insns == 1) {
14263             /*
14264              * At the first insn of the TB, compute s->guarded_page.
14265              * We delayed computing this until successfully reading
14266              * the first insn of the TB, above.  This (mostly) ensures
14267              * that the softmmu tlb entry has been populated, and the
14268              * page table GP bit is available.
14269              *
14270              * Note that we need to compute this even if btype == 0,
14271              * because this value is used for BR instructions later
14272              * where ENV is not available.
14273              */
14274             s->guarded_page = is_guarded_page(env, s);
14275 
14276             /* First insn can have btype set to non-zero.  */
14277             tcg_debug_assert(s->btype >= 0);
14278 
14279             /*
14280              * Note that the Branch Target Exception has fairly high
14281              * priority -- below debugging exceptions but above most
14282              * everything else.  This allows us to handle this now
14283              * instead of waiting until the insn is otherwise decoded.
14284              */
14285             if (s->btype != 0
14286                 && s->guarded_page
14287                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14288                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14289                 return;
14290             }
14291         } else {
14292             /* Not the first insn: btype must be 0.  */
14293             tcg_debug_assert(s->btype == 0);
14294         }
14295     }
14296 
14297     s->is_nonstreaming = false;
14298     if (s->sme_trap_nonstreaming) {
14299         disas_sme_fa64(s, insn);
14300     }
14301 
14302     if (!disas_a64(s, insn) &&
14303         !disas_sme(s, insn) &&
14304         !disas_sve(s, insn)) {
14305         disas_a64_legacy(s, insn);
14306     }
14307 
14308     /*
14309      * After execution of most insns, btype is reset to 0.
14310      * Note that we set btype == -1 when the insn sets btype.
14311      */
14312     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14313         reset_btype(s);
14314     }
14315 }
14316 
14317 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14318 {
14319     DisasContext *dc = container_of(dcbase, DisasContext, base);
14320 
14321     if (unlikely(dc->ss_active)) {
14322         /* Note that this means single stepping WFI doesn't halt the CPU.
14323          * For conditional branch insns this is harmless unreachable code as
14324          * gen_goto_tb() has already handled emitting the debug exception
14325          * (and thus a tb-jump is not possible when singlestepping).
14326          */
14327         switch (dc->base.is_jmp) {
14328         default:
14329             gen_a64_update_pc(dc, 4);
14330             /* fall through */
14331         case DISAS_EXIT:
14332         case DISAS_JUMP:
14333             gen_step_complete_exception(dc);
14334             break;
14335         case DISAS_NORETURN:
14336             break;
14337         }
14338     } else {
14339         switch (dc->base.is_jmp) {
14340         case DISAS_NEXT:
14341         case DISAS_TOO_MANY:
14342             gen_goto_tb(dc, 1, 4);
14343             break;
14344         default:
14345         case DISAS_UPDATE_EXIT:
14346             gen_a64_update_pc(dc, 4);
14347             /* fall through */
14348         case DISAS_EXIT:
14349             tcg_gen_exit_tb(NULL, 0);
14350             break;
14351         case DISAS_UPDATE_NOCHAIN:
14352             gen_a64_update_pc(dc, 4);
14353             /* fall through */
14354         case DISAS_JUMP:
14355             tcg_gen_lookup_and_goto_ptr();
14356             break;
14357         case DISAS_NORETURN:
14358         case DISAS_SWI:
14359             break;
14360         case DISAS_WFE:
14361             gen_a64_update_pc(dc, 4);
14362             gen_helper_wfe(tcg_env);
14363             break;
14364         case DISAS_YIELD:
14365             gen_a64_update_pc(dc, 4);
14366             gen_helper_yield(tcg_env);
14367             break;
14368         case DISAS_WFI:
14369             /*
14370              * This is a special case because we don't want to just halt
14371              * the CPU if trying to debug across a WFI.
14372              */
14373             gen_a64_update_pc(dc, 4);
14374             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
14375             /*
14376              * The helper doesn't necessarily throw an exception, but we
14377              * must go back to the main loop to check for interrupts anyway.
14378              */
14379             tcg_gen_exit_tb(NULL, 0);
14380             break;
14381         }
14382     }
14383 }
14384 
14385 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14386                                  CPUState *cpu, FILE *logfile)
14387 {
14388     DisasContext *dc = container_of(dcbase, DisasContext, base);
14389 
14390     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14391     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14392 }
14393 
14394 const TranslatorOps aarch64_translator_ops = {
14395     .init_disas_context = aarch64_tr_init_disas_context,
14396     .tb_start           = aarch64_tr_tb_start,
14397     .insn_start         = aarch64_tr_insn_start,
14398     .translate_insn     = aarch64_tr_translate_insn,
14399     .tb_stop            = aarch64_tr_tb_stop,
14400     .disas_log          = aarch64_tr_disas_log,
14401 };
14402